diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 0000000000..2f725806c3 --- /dev/null +++ b/.codespellrc @@ -0,0 +1,3 @@ +[codespell] +ignore-words = .github/linters/codespell.txt +skip = ./build,./doc/api,./doc/capi diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000..237f445691 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,14 @@ +.DS_Store +.idea +.vscode +.yardoc +*.bak +*.iml +*.ipr +*.swp +*.tmp +bin +build +doc/api +doc/capi +node_modules diff --git a/.editorconfig b/.editorconfig index 70cc78f0be..081e8c6b90 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,36 +8,23 @@ root = true [*] charset = utf-8 end_of_line = lf -indent_size = 8 -indent_style = tab +indent_size = 2 +indent_style = space insert_final_newline = true tab_width = 8 +trim_trailing_whitespace = true [{Makefile,Makefile.*,makefile,*.mk}] -trim_trailing_whitespace = true +indent_style = tab #max_line_length = 80 -[*.{c,cc,C,cxx,cpp,h,hh,H,hxx,hpp,inc,y}] -indent_size = 2 -indent_style = space -trim_trailing_whitespace = true +#[*.{c,cc,C,cxx,cpp,h,hh,H,hxx,hpp,inc,y}] #max_line_length = 120 -[{*.rb,Rakefile,rakefile,*.rake,*.gemspec,*.gembox}] -indent_size = 2 -indent_style = space -trim_trailing_whitespace = true +#[{*.rb,Rakefile,rakefile,*.rake,*.gemspec,*.gembox}] #max_line_length = 120 +# limitation to US-ASCII [*.bat] -charset = latin1 end_of_line = crlf #max_line_length = 80 - -[*.{yaml,yml}] -indent_size = 2 -indent_style = space - -[*.md] -indent_size = 2 -indent_style = space diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 7d2ec818ee..1b3e367de8 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -4,3 +4,5 @@ 61e8a540869c57ebddac53cf9d243db407d57fff # reindent parse.y 650ffb9d8410e1ae2206890ef6e2a8643c600454 +# reindent build_config/playstationportable.rb +90ab714fda6825c4e951d47d7e6ecff033dd14e3 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..0f776312d9 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +* text=auto eol=lf +*.bat text eol=crlf +*.cmd text eol=crlf +*.png binary diff --git a/CODEOWNERS b/.github/CODEOWNERS similarity index 100% rename from CODEOWNERS rename to .github/CODEOWNERS diff --git a/.github/dependabot.yml b/.github/dependabot.yml index dac4cac336..b81f669526 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,10 +1,33 @@ -# Basic set up - +# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates version: 2 updates: - - # Maintain dependencies for GitHub Actions + - package-ecosystem: "bundler" + directory: "/" + schedule: + interval: "daily" + groups: + bundler-dependencies: + patterns: + - "*" + cooldown: + default-days: 7 - package-ecosystem: "github-actions" directory: "/" schedule: interval: "daily" + groups: + github-actions-dependencies: + patterns: + - "*" + cooldown: + default-days: 7 + - package-ecosystem: "pre-commit" + directory: "/" + schedule: + interval: "daily" + groups: + pre-commit-hooks: + patterns: + - "*" + cooldown: + default-days: 7 diff --git a/.github/labeler.yml b/.github/labeler.yml new file mode 100644 index 0000000000..cdd8fcf55a --- /dev/null +++ b/.github/labeler.yml @@ -0,0 +1,81 @@ +# https://github.com/actions/labeler +benchmark: + - any: + - changed-files: + - any-glob-to-any-file: + - benchmark/**/* +build: + - any: + - changed-files: + - any-glob-to-any-file: + - Dockerfile + - Doxyfile + - Makefile + - Rakefile + - appveyor.yml + - build_config.rb + - build_config/**/* + - docker-compose.yml + - lib/**/* + - minirake + - tasks/**/* + - .codespellrc + - .dockerignore + - .pre-commit-config.yaml + - .prettierignore + - .prettierrc + - .travis.yml + - .yardopts +core: + - any: + - changed-files: + - any-glob-to-any-file: + - include/**/* + - mrblib/**/* + - src/**/* + - test/**/* +doc: + - any: + - changed-files: + - any-glob-to-any-file: + - CONTRIBUTING.md + - LEGAL + - LICENSE + - NEWS + - README.md + - SECURITY.md + - TODO.md + - doc/**/* + - examples/**/* +github: + - any: + - changed-files: + - any-glob-to-any-file: + - .github/**/* +miscellaneous: + - any: + - changed-files: + - any-glob-to-any-file: + - .editorconfig + - .git-blame-ignore-revs + - .gitattributes + - .gitignore + - AUTHORS + - Gemfile + - Gemfile.lock + - mruby-source.gemspec +mrbgems: + - any: + - changed-files: + - any-glob-to-any-file: + - mrbgems/**/* +oss-fuzz: + - any: + - changed-files: + - any-glob-to-any-file: + - oss-fuzz/**/* +tools: + - any: + - changed-files: + - any-glob-to-any-file: + - tools/**/* diff --git a/.github/linters/.hadolint.yaml b/.github/linters/.hadolint.yaml new file mode 100644 index 0000000000..b418cc3876 --- /dev/null +++ b/.github/linters/.hadolint.yaml @@ -0,0 +1,4 @@ +failure-threshold: info +ignored: + - DL3008 # https://github.com/hadolint/hadolint/wiki/DL3008 + - DL3013 # https://github.com/hadolint/hadolint/wiki/DL3013 diff --git a/.github/linters/.ls-lint.yml b/.github/linters/.ls-lint.yml new file mode 100644 index 0000000000..f0492cc6b8 --- /dev/null +++ b/.github/linters/.ls-lint.yml @@ -0,0 +1,12 @@ +ls: + .c: snake_case + .cpp: snake_case + .h: snake_case + .json: snake_case + .rb: snake_case + .yaml: regex:\.?[a-z]+(-[a-z]+)* + .yml: regex:\.?[a-z]+(-[a-z]+)* + +ignore: + - build_config + - mrbgems/mruby-compiler/core/y.tab.c diff --git a/.github/linters/.markdown-lint.yml b/.github/linters/.markdown-lint.yml index 5c6cbec4a3..ae0213306e 100644 --- a/.github/linters/.markdown-lint.yml +++ b/.github/linters/.markdown-lint.yml @@ -1,4 +1,5 @@ # https://github.com/DavidAnson/markdownlint#rules--aliases +# https://github.com/igorshubovych/markdownlint-cli # markdownlint -c .github/linters/.markdown-lint.yml . # MD001 heading-increment/header-increment - Heading levels should only increment by one level at a time @@ -19,5 +20,14 @@ MD025: false # MD026 no-trailing-punctuation - Trailing punctuation in heading MD026: false +# MD029/ol-prefix Ordered list item prefix +MD029: false + +# MD033/no-inline-html - Inline HTML +MD033: false + # MD040 fenced-code-language - Fenced code blocks should have a language specified MD040: false + +# MD041 first-line-heading/first-line-h1 - First line in a file should be a top-level heading +MD041: false diff --git a/.github/linters/.rubocop.yml b/.github/linters/.rubocop.yml new file mode 100644 index 0000000000..5475548be2 --- /dev/null +++ b/.github/linters/.rubocop.yml @@ -0,0 +1,13 @@ +AllCops: + DisabledByDefault: true + TargetRubyVersion: 3.4 + +Layout/AssignmentIndentation: + Enabled: true + +Layout/BlockEndNewline: + Enabled: true + +Layout/IndentationStyle: + Enabled: true + EnforcedStyle: spaces diff --git a/.github/linters/.yaml-lint.yml b/.github/linters/.yaml-lint.yml index be4c887988..f5e0ee4623 100644 --- a/.github/linters/.yaml-lint.yml +++ b/.github/linters/.yaml-lint.yml @@ -4,6 +4,8 @@ extends: default rules: + comments: + min-spaces-from-content: 1 document-start: disable line-length: disable truthy: false diff --git a/codespell.txt b/.github/linters/codespell.txt similarity index 66% rename from codespell.txt rename to .github/linters/codespell.txt index d6d80b0791..9a4d7548ce 100644 --- a/codespell.txt +++ b/.github/linters/codespell.txt @@ -1,20 +1,16 @@ ans -ba +celler clen -creat delet disabl -falsy filetest fo hel -hist ist -methid nd quitt remore -ro runn sting -upto +strin +wronly diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 870efdd243..e9f1863587 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,97 +1,71 @@ +# https://github.com/actions/runner-images#available-images name: Build & Test on: [push, pull_request] -jobs: - Ubuntu-1804-gcc: - runs-on: ubuntu-18.04 - timeout-minutes: 10 - env: - MRUBY_CONFIG: ci/gcc-clang - CC: gcc - steps: - - uses: actions/checkout@v3 - - name: Ruby version - run: ruby -v - - name: Compiler version - run: ${{ env.CC }} --version - - name: Build and test - run: rake -m test:build && rake test:run - - Ubuntu-1804-clang: - runs-on: ubuntu-18.04 - timeout-minutes: 10 - env: - MRUBY_CONFIG: ci/gcc-clang - CC: clang - steps: - - uses: actions/checkout@v3 - - name: Ruby version - run: ruby -v - - name: Compiler version - run: ${{ env.CC }} --version - - name: Build and test - run: rake -m test:build && rake test:run - - Ubuntu-2004-gcc: - runs-on: ubuntu-20.04 - timeout-minutes: 10 - env: - MRUBY_CONFIG: ci/gcc-clang - CC: gcc - steps: - - uses: actions/checkout@v3 - - name: Ruby version - run: ruby -v - - name: Compiler version - run: ${{ env.CC }} --version - - name: Build and test - run: rake -m test:build && rake test:run - - Ubuntu-2004-clang: - runs-on: ubuntu-20.04 - timeout-minutes: 10 - env: - MRUBY_CONFIG: ci/gcc-clang - CC: clang - steps: - - uses: actions/checkout@v3 - - name: Ruby version - run: ruby -v - - name: Compiler version - run: ${{ env.CC }} --version - - name: Build and test - run: rake -m test:build && rake test:run +permissions: + contents: read - macOS: - runs-on: macos-latest +jobs: + GCC-CLANG: + name: "${{ matrix.os }}-${{ matrix.altname || matrix.cc }}" + runs-on: ${{ matrix.os }} timeout-minutes: 10 + strategy: + fail-fast: false + max-parallel: 8 + matrix: + include: + - {os: ubuntu-24.04, cc: gcc, cxx: g++} + - {os: ubuntu-24.04, cc: clang, cxx: clang++} + - {os: ubuntu-22.04, cc: gcc, cxx: g++} + - {os: ubuntu-22.04, cc: clang, cxx: clang++} + - {os: macos-15, cc: clang, cxx: clang++} + - {os: macos-14, cc: clang, cxx: clang++} + - {os: windows-2025, cc: gcc, cxx: g++, altname: "mingw-gcc"} + - {os: windows-2022, cc: gcc, cxx: g++, altname: "mingw-gcc"} env: MRUBY_CONFIG: ci/gcc-clang - CC: clang + CC: ${{ matrix.cc }} + CXX: ${{ matrix.cxx }} + LD: ${{ matrix.cc }} steps: - - uses: actions/checkout@v3 + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Ruby version run: ruby -v - name: Compiler version run: ${{ env.CC }} --version - name: Build and test - run: rake -m test:build && rake test:run + run: rake -m test:run:serial - Windows-MinGW: - runs-on: windows-latest - timeout-minutes: 10 - env: - MRUBY_CONFIG: ci/gcc-clang - CC: gcc + Cosmopolitan: + runs-on: ubuntu-24.04 + timeout-minutes: 15 steps: - - uses: actions/checkout@v3 + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Ruby version run: ruby -v - - name: Compiler version - run: ${{ env.CC }} --version + - name: Cache cosmocc + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + id: cache-cosmocc + with: + path: ~/cosmo + key: cosmocc-${{ runner.os }}-20260104 + - name: Install cosmocc + if: steps.cache-cosmocc.outputs.cache-hit != 'true' + run: | + mkdir -p ~/cosmo && cd ~/cosmo + wget https://cosmo.zip/pub/cosmocc/cosmocc.zip + unzip cosmocc.zip - name: Build and test - run: rake -m test:build && rake test:run + run: | + COSMO_ROOT=~/cosmo rake -m test:run:serial MRUBY_CONFIG=cosmopolitan Windows-VC: runs-on: windows-2022 @@ -99,11 +73,14 @@ jobs: env: MRUBY_CONFIG: ci/msvc steps: - - uses: actions/checkout@v3 + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Ruby version run: ruby -v - name: Build and test shell: cmd run: | call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" - rake -m test:build && rake test:run + rake -m test:run:serial diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 56a0d0fc15..1901f512b9 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -1,46 +1,33 @@ -name: Code scanning - action - +name: CodeQL Analysis on: push: - branches-ignore: - - dependabot/** + branches: [master] pull_request: - schedule: - - cron: '0 19 * * 4' - + branches: [master] +permissions: + actions: read + contents: read + security-events: write jobs: - CodeQL-Build: + codeql: + name: CodeQL runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + language: ["actions"] steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: - # We must fetch at least the immediate parents so that if this is - # a pull request then we can checkout the head. - fetch-depth: 2 - - # Initializes the CodeQL tools for scanning. + persist-credentials: false - name: Initialize CodeQL - uses: github/codeql-action/init@v2 - # Override language selection by uncommenting this and choosing your languages - # with: - # languages: go, javascript, csharp, python, cpp, java - - # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). - # If this step fails, then you should remove it and run the build manually (see below) + uses: github/codeql-action/init@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0 + with: + languages: ${{ matrix.language }} - name: Autobuild - uses: github/codeql-action/autobuild@v2 - - # ℹ️ Command-line programs to run using the OS shell. - # 📚 https://git.io/JvXDl - - # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines - # and modify them (or add more) to build your code if your project - # uses a compiled language - - # - run: | - # make bootstrap - # make release - + uses: github/codeql-action/autobuild@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0 + with: + category: "Security" diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 0000000000..22302ce1d5 --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,46 @@ +name: Coverage + +on: [push] + +permissions: + contents: read + +jobs: + coverage: + name: Coverage + runs-on: ubuntu-24.04 + env: + MRUBY_CONFIG: ci/gcc-clang + CC: clang-18 + CXX: clang++-18 + LD: clang-18 + CFLAGS: -O0 -g --coverage + CXXFLAGS: -O0 -g --coverage + LDFLAGS: --coverage + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Ruby version + run: ruby -v + - name: Compiler version + run: ${{ env.CC }} --version + - name: Build and test + run: rake test + - name: Install gcovr + run: pip install gcovr + - name: Generate coverage report + run: | + { + echo "# Coverage report" + echo \`\`\` + gcovr --gcov-executable "llvm-cov-18 gcov" -e ".*_test.c" -e ".*/test/.*" -s --html-details -o coverage/ build + echo \`\`\` + } > "$GITHUB_STEP_SUMMARY" + - name: Upload coverage report + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: coverage-${{ github.sha }} + path: coverage/ + retention-days: 3 diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml new file mode 100644 index 0000000000..f665c8fc88 --- /dev/null +++ b/.github/workflows/labeler.yml @@ -0,0 +1,15 @@ +# https://github.com/actions/labeler +name: Pull Request Labeler +on: + - pull_request_target +jobs: + triage: + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-latest + steps: + - uses: actions/labeler@f27b608878404679385c85cfa523b85ccb86e213 # v6.1.0 + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" + sync-labels: true diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml deleted file mode 100644 index da6bc784ac..0000000000 --- a/.github/workflows/lint.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Lint - -on: [pull_request] - -jobs: - misspell: - name: Check spelling with misspell - runs-on: ubuntu-latest - steps: - - name: Check Out - uses: actions/checkout@v3 - - name: Install - run: wget -O - -q https://git.io/misspell | sh -s -- -b . - - name: Run misspell - run: git ls-files --empty-directory | xargs ./misspell -error - pre-commit: - name: Run pre-commit - runs-on: ubuntu-latest - steps: - - name: Check Out - uses: actions/checkout@v3 - - name: Install - run: | - python -m pip install --upgrade pip - pip install pre-commit - - name: Set PY - run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> $GITHUB_ENV - - uses: actions/cache@v3 - with: - path: ~/.cache/pre-commit - key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }} - - name: Run pre-commit - run: pre-commit run --all-files diff --git a/.github/workflows/ls-lint.yml b/.github/workflows/ls-lint.yml new file mode 100644 index 0000000000..86644ba904 --- /dev/null +++ b/.github/workflows/ls-lint.yml @@ -0,0 +1,19 @@ +# https://github.com/loeffel-io/ls-lint +name: Lint + +on: [pull_request] + +permissions: + contents: read + +jobs: + ls-lint: + name: Run ls-lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: ls-lint/action@02e380fe8733d499cbfc9e22276de5085508a5bd # v2.3.1 + with: + config: .github/linters/.ls-lint.yml diff --git a/.github/workflows/oss-fuzz.yml b/.github/workflows/oss-fuzz.yml index 687b807ddd..26fade42c8 100644 --- a/.github/workflows/oss-fuzz.yml +++ b/.github/workflows/oss-fuzz.yml @@ -1,5 +1,9 @@ +# https://github.com/google/oss-fuzz name: CIFuzz on: [pull_request] +permissions: + contents: read + jobs: Fuzzing: runs-on: ubuntu-latest @@ -7,16 +11,16 @@ jobs: - name: Build Fuzzers uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master with: - oss-fuzz-project-name: 'mruby' + oss-fuzz-project-name: "mruby" dry-run: false - name: Run Fuzzers uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master with: - oss-fuzz-project-name: 'mruby' + oss-fuzz-project-name: "mruby" fuzz-seconds: 600 dry-run: false - name: Upload Crash - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 if: failure() with: name: artifacts diff --git a/.github/workflows/pre-commit-manual.yml b/.github/workflows/pre-commit-manual.yml new file mode 100644 index 0000000000..bd7ac01c62 --- /dev/null +++ b/.github/workflows/pre-commit-manual.yml @@ -0,0 +1,22 @@ +# https://github.com/j178/prek +name: Manual hooks + +on: [pull_request] + +permissions: + contents: read + +jobs: + pre-commit: + name: Run pre-commit + runs-on: ubuntu-latest + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: j178/prek-action@bdca6f102f98e2b4c7029491a53dfd366469e33d # v2.0.4 + with: + install-only: true + - name: Run manual pre-commit hooks + run: prek run --color=always --all-files --hook-stage manual diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000000..cec73055db --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,20 @@ +# https://github.com/j178/prek +name: pre-commit + +on: [pull_request] + +permissions: + contents: read + +jobs: + pre-commit: + name: Run pre-commit + runs-on: ubuntu-latest + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: j178/prek-action@bdca6f102f98e2b4c7029491a53dfd366469e33d # v2.0.4 + with: + extra-args: --all-files diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000000..3eb4f79945 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,48 @@ +name: Release Builds + +on: + push: + tags: + - "*.*.*" + +permissions: read-all + +jobs: + source: + name: Source Code Releasing + if: ${{ github.repository == 'mruby/mruby' }} + permissions: + contents: write + runs-on: ubuntu-22.04 + timeout-minutes: 10 + strategy: + fail-fast: false + steps: + - name: "Checkout ${{ github.ref_name }} ( ${{ github.sha }} )" + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Builds + id: builds + run: | + tagname="$GITHUB_REF_NAME" + packagename="mruby-$tagname" + destdir=packages + + mkdir -p "$destdir" + git archive --format zip --prefix "$packagename/" -o "$destdir/$packagename.zip" "$GITHUB_REF" + git archive --format tar.gz --prefix "$packagename/" -o "$destdir/$packagename.tar.gz" "$GITHUB_REF" + gunzip -c "$destdir/$packagename.tar.gz" | xz > "$destdir/$packagename.tar.xz" + + ( + cd "$destdir" || exit + sha256sum -- * > .sha256 + mv .sha256 "$packagename.sha256" + ) + - name: Release + uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0 + with: + draft: true + prerelease: ${{ contains(github.ref_name, '-rc') }} + body_path: NEWS + files: packages/* diff --git a/.github/workflows/super-linter.yml b/.github/workflows/super-linter.yml index 563758473f..3dd83ddbf8 100644 --- a/.github/workflows/super-linter.yml +++ b/.github/workflows/super-linter.yml @@ -1,25 +1,32 @@ +# https://github.com/super-linter/super-linter name: Super-Linter on: [pull_request] +permissions: + contents: read + jobs: build: + permissions: + contents: read # for actions/checkout to fetch code + statuses: write # for super-linter/super-linter/slim to mark status of each linter run name: Lint Code Base runs-on: ubuntu-latest steps: - - name: Checkout Code - uses: actions/checkout@v3 + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: # Full git history is needed to get a proper list of changed files within `super-linter` fetch-depth: 0 + persist-credentials: false - name: Lint Code Base - uses: github/super-linter/slim@v4.9.6 + uses: super-linter/super-linter/slim@9e863354e3ff62e0727d37183162c4a88873df41 # v8.6.0 env: - ERROR_ON_MISSING_EXEC_BIT: true - VALIDATE_BASH: true # VALIDATE_BASH_EXEC: true + VALIDATE_DOCKERFILE_HADOLINT: true # VALIDATE_EDITORCONFIG: true # VALIDATE_SHELL_SHFMT: true DEFAULT_BRANCH: master GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - VALIDATE_ALL_CODEBASE: false + VALIDATE_ALL_CODEBASE: true diff --git a/.gitignore b/.gitignore index 6a0e7e46b8..76861bb041 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ -*.lock *.bak *.bc *.d *.i +*.lock *.o *.orig *.pdb @@ -13,25 +13,27 @@ *.tmp *~ .DS_Store +.cache +.ccls* .ccmalloc +.git .svn .vscode .yardoc -.ccls* -compile_flags.txt +benchmark/**/*.dat +benchmark/*.pdf +benchmark/*.png +bin +/build +doc/api +doc/capi compile_commands.json +compile_flags.txt cscope.files cscope.out +gmon.out +mruby-source-*.gem +node_modules +perf.data* tags - -/.git -/bin -/build -/mruby-source-*.gem - -/benchmark/**/*.dat -/benchmark/*.pdf -/benchmark/*.png - -/doc/api -/doc/capi +!Gemfile.lock diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b38ae58dd9..928f2df469 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,58 +1,142 @@ --- -default_stages: [commit, push] +# https://github.com/j178/prek +default_stages: [pre-commit, pre-push] default_language_version: - # force all unspecified Python hooks to run python3 python: python3 -minimum_pre_commit_version: '2.18.1' + node: 24.14.0 +minimum_pre_commit_version: "3.2.0" +exclude: "^tools/lrama/" repos: - repo: meta hooks: - id: identity + name: run identity + description: check your identity - id: check-hooks-apply + name: run check-hooks-apply + description: check hooks apply to the repository + - id: check-useless-excludes + name: run check-useless-excludes + description: clean up unnecessary exclusion patterns + - repo: local + hooks: + - id: prettier + name: run prettier + description: format files with prettier + entry: prettier --write '**/*.md' '**/*.yaml' '**/*.yml' + files: \.(md|ya?ml)$ + language: node + additional_dependencies: ["prettier@3.7.4"] + pass_filenames: false + stages: [manual] + - id: check-makefile-indentation + name: check Makefiles are indented with tabs + description: ensures that Makefiles are indented with tabs + entry: ./scripts/check_makefiles_for_tabs.sh + language: system + files: "(?i)^makefile$" + pass_filenames: true # <-- Crucial change: pass filenames to the script + types: [file] # Ensure only regular files are passed, not directories + stages: [manual] + - id: check-zip-file-is-not-committed + name: disallow zip files + description: Zip files are not allowed in the repository + language: fail + entry: | + Zip files are not allowed in the repository as they are hard to + track and have security implications. Please remove the zip file from the repository. + files: \.zip$ + - repo: https://github.com/gitleaks/gitleaks + rev: v8.30.1 + hooks: + - id: gitleaks + name: run gitleaks + description: detect hardcoded secrets with gitleaks + - repo: https://github.com/oxipng/oxipng + rev: v10.1.1 + hooks: + - id: oxipng + name: run oxipng + description: use lossless compression to optimize PNG files + args: ["--fix", "-o", "4", "--strip", "safe", "--alpha"] + stages: [manual] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v6.0.0 hooks: - id: check-added-large-files - id: check-case-conflict - id: check-executables-have-shebangs exclude: ^test/t/lang\.rb$ + - id: check-illegal-windows-names - id: check-merge-conflict + - id: check-shebang-scripts-are-executable - id: check-vcs-permalinks - id: check-yaml + - id: destroyed-symlinks + - id: detect-aws-credentials + args: [--allow-missing-credentials] - id: detect-private-key - id: end-of-file-fixer - id: file-contents-sorter - files: ^codespell\.txt$ + args: [--unique] + files: ^\.github/linters/codespell\.txt$ - id: fix-byte-order-marker + - id: forbid-submodules - id: mixed-line-ending - id: trailing-whitespace - # - repo: git://github.com/Lucas-C/pre-commit-hooks - # rev: v1.1.9 - # hooks: - # - id: forbid-tabs - # - id: remove-tabs + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.6 + hooks: + - id: forbid-tabs + name: run no-tabs checker + description: check the codebase for tabs + exclude: Makefile$ + - id: remove-tabs + name: run tabs remover + description: find and convert tabs to spaces + args: [--whitespaces-count, "2"] + exclude: Makefile$ + - repo: https://github.com/rhysd/actionlint + rev: v1.7.12 + hooks: + - id: actionlint + name: run actionlint + description: lint GitHub Actions workflow files - repo: https://github.com/codespell-project/codespell - rev: v2.2.1 + rev: v2.4.2 hooks: - id: codespell - name: Run codespell - description: Check spelling with codespell - entry: codespell --ignore-words=codespell.txt + name: run codespell + description: check spelling with codespell - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.32.2 + rev: v0.48.0 hooks: - id: markdownlint - name: Run markdownlint - description: Checks the style of Markdown files - entry: markdownlint -c .github/linters/.markdown-lint.yml . + name: run markdownlint + description: checks the style of Markdown files + args: [--config=.github/linters/.markdown-lint.yml] types: [markdown] - files: \.(md|mdown|markdown)$ + files: \.md$ + - repo: https://github.com/rubocop/rubocop + rev: v1.86.2 + hooks: + - id: rubocop + name: run rubocop + description: RuboCop is a Ruby code style checker (linter) and formatter based on the community-driven Ruby Style Guide + exclude: ^test/t/syntax\.rb$ + args: [--config=.github/linters/.rubocop.yml] + - repo: https://github.com/shellcheck-py/shellcheck-py + rev: v0.11.0.1 + hooks: + - id: shellcheck + name: run shellcheck + description: check shell scripts with a static analysis tool - repo: https://github.com/adrienverge/yamllint - rev: v1.28.0 + rev: v1.38.0 hooks: - id: yamllint - name: Run yamllint - description: Check YAML files with yamllint - entry: yamllint --strict -c .github/linters/.yaml-lint.yml + name: run yamllint + description: check YAML files with yamllint + args: [--strict, -c=.github/linters/.yaml-lint.yml] types: [yaml] - files: \.(yaml|yml)$ + files: \.ya?ml$ diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000000..8ed001b585 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,6 @@ +# Ignore artifacts: +build +coverage +doc/internal/opcode.md +tools/lrama +.venv diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000000..81b81d20de --- /dev/null +++ b/.prettierrc @@ -0,0 +1,3 @@ +{ + "bracketSpacing": false +} diff --git a/.travis.yml b/.travis.yml index c47a54494e..23d9728634 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,4 +8,4 @@ env: - MRUBY_CONFIG=ci/gcc-clang script: - - rake -m test:build && rake test:run + - rake -m test:run:serial diff --git a/.yardopts b/.yardopts index 4499bf513a..f87c265254 100644 --- a/.yardopts +++ b/.yardopts @@ -11,7 +11,13 @@ mrbgems/*/src/**/*.c mrbgems/*/mrblib/**/*.rb mrbgems/*/include/**/*.h - -AUTHORS -LICENSE CONTRIBUTING.md +SECURITY.md +TODO.md +doc/*.md doc/guides/*.md +doc/internal/*.md +AUTHORS +LEGAL +LICENSE +NEWS diff --git a/AUTHORS b/AUTHORS index 5ba39c87ba..25b3359af5 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,90 +1,100 @@ # Authors of mruby (mruby developers) -## The List of Contributors sorted by number of commits (as of 2022-09-06 b06fef6) +## The List of Contributors sorted by number of commits (as of 2026-05-31 9d084b0) - 8532 Yukihiro "Matz" Matsumoto (@matz)* - 586 KOBAYASHI Shuji (@shuujii) - 408 dearblue (@dearblue)* - 376 Daniel Bovensiepen (@bovi) - 346 Takeshi Watanabe (@take-cheeze)* - 334 Masaki Muranaka (@monaka) - 308 Tomoyuki Sahara (@tsahara)* + 7747 Yukihiro "Matz" Matsumoto (@matz)* + 749 dearblue (@dearblue)* + 587 KOBAYASHI Shuji (@shuujii) + 353 Daniel Bovensiepen (@bovi)* + 345 Takeshi Watanabe (@take-cheeze)* + 333 Masaki Muranaka (@monaka) + 266 John Bampton (@jbampton) 234 Jun Hiroe (@suzukaze) + 228 Tomoyuki Sahara (@tsahara)* 220 Cremno (@cremno)* 209 Yuki Kurihara (@ksss)+ - 148 Yasuhiro Matsumoto (@mattn)* + 146 Yasuhiro Matsumoto (@mattn)* 113 Carson McDonald (@carsonmcdonald) - 103 Tomasz Dąbrowski (@dabroz)* - 84 skandhas (@skandhas) + 104 Tomasz Pędraszewski (@dabroz)* + 83 Akira Yumiyama (@akiray03)* + 83 skandhas (@skandhas) 80 Masamitsu MURASE (@masamitsu-murase) - 74 Yuichiro MASUI (@masuidrive) - 73 Hiroshi Mimaki (@mimaki)* + 79 Hiroshi Mimaki (@mimaki)* 71 Tatsuhiko Kubo (@cubicdaiya)* - 70 John Bampton (@jbampton) - 67 Akira Yumiyama (@akiray03)* - 62 Paolo Bosetti (@pbosetti)* + 71 Yuichiro MASUI (@masuidrive) + 62 Yuichiro Kaneko (@yui-knk)+ 59 Kurebayashi, Takahiro (@crimsonwoods)* 56 h2so5 (@h2so5) 52 Ralph Desir (@Mav7)* + 48 Paolo Bosetti (@pbosetti)* 45 Rory O'Connell (@RoryO)* - 43 Yuichiro Kaneko (@yui-knk)+ 42 fleuria (@flaneur2020) 40 Christopher Aue (@christopheraue) 40 Seba Gamboa (@sagmor) + 39 Kouhei Sutou (@kou)* 38 Koji Yoshioka (@kyab)*+ - 38 Kouhei Sutou (@kou)* + 32 Masayoshi Takahashi (@takahashim)+ 31 MATSUMOTO Ryosuke (@matsumotory)* 30 Nobuyoshi Nakada (@nobu) - 28 Masayoshi Takahashi (@takahashim)+ - 24 Julian Aron Prenner (@furunkel)* - 24 Ryan Scott (@ryan-scott-dev)* + 29 HASUMI Hitoshi (@hasumikin) + 26 Hoshiumi Arata (@hoshiumiarata)* + 25 Julian Aron Prenner (@furunkel)* + 23 leviongit (@leviongit) 22 Clayton Smith (@clayton-shopify) + 22 Uchio Kondo (@udzura)* 22 Zachary Scott (@zzak)* - 22 mirichi (@mirichi) - 21 Jared Breeden (@jbreeden)* 21 Ryan Lopopolo (@lopopolo) - 20 Uchio Kondo (@udzura)* + 20 Ryan Scott (@ryan-scott-dev)* + 20 google-labs-jules[bot] (@google-jules) 19 Bouke van der Bijl (@bouk) - 19 Felix Jones (@felixjones)* - 19 Hidetaka Takano (@TJ-Hidetaka-Takano) + 19 Jared Breeden (@jbreeden)* 19 go kikuta (@gkta)* 18 Corey Powell (@IceDragon200) + 18 Hidetaka Takano (@TJ-Hidetaka-Takano) 18 Jon Maken (@jonforums)+ - 18 Mitchell Blank Jr (@mitchblank)* + 18 mirichi (@mirichi) + 17 Mitchell Blank Jr (@mitchblank)* + 16 Hendrik (@Asmod4n) 16 bggd (@bggd) 16 kano4 (@kano4) + 15 Felix Jones (@felixjones)* 14 Blaž Hrastnik (@archseer)* - 14 Jose Narvaez (@goyox86) 14 Kazuki Tsujimoto (@k-tsj) 14 Tadashi FUKUZAWA (@FUKUZAWA-Tadashi)+ + 14 fn ⌃ ⌥ (@FnControlOption) + 13 Jose Narvaez (@goyox86) 13 Patrick Hogan (@pbhogan) 12 Akira Kuroda (@akuroda) 12 Kouki Ooyatsu (kaishuu0123)* 12 NAKAMURA Usaku (@unak)* - 12 RIZAL Reckordp (@Reckordp)+ + 12 Ray Chason (@chasonr)* + 12 SiZiOUS (@suzious) 12 Takashi Sawanaka (@sdottaka)* 12 Ukrainskiy Sergey (@ukrainskiysergey) 12 Xuejie "Rafael" Xiao (@xxuejie)* 11 Julien Ammous (@schmurfy) 11 Kazuho Oku (@kazuho) + 11 RIZAL Reckordp (@Reckordp)+ 11 Seeker (@SeekingMeaning) - 11 YAMAMOTO Masaya (pandax381) 11 takkaw (@takkaw) + 10 Chris Hasiński (@khasinski) 10 Miura Hideki (@miura1729) 10 Narihiro Nakamura (@authorNari) - 10 Ray Chason (@chasonr)* + 10 YAMAMOTO Masaya (pandax381) 10 Yuichi Nishiwaki (@nyuichi) 9 Akira Mitsui (@murasesyuka)* 9 Frank Celler (@fceller) 9 Tatsuya Matsumoto (@tmash06)* 8 Takashi Sogabe (@sogabe) + 8 Wataru Ashihara (@wataash)* 7 Bhargava Shastry (@bshastry)* 7 Kouichi Nakanishi (@keizo042) + 7 Paweł Świątkowski (@katafrakt) 7 Rubyist (@expeditiousRubyist) 7 Simon Génier (@simon-shopify) 7 Terence Lee (@hone) - 7 Wataru Ashihara (@wataash)* 7 roco (@rystyle)* + 7 vickash (@vickash) 6 Akito Mochizuki (@ak-mochi) 6 Beoran (@beoran) 6 David Siaw (@davidsiaw)* @@ -95,53 +105,60 @@ 6 Kenji Okimoto (@okkez)+ 6 Selman ULUG (@selman) 6 Yusuke Endoh (@mame)* + 6 buty4649 (@buty4649) + 6 masahino (@masahino) 5 Chris Reuter (@suetanvil) 5 Davide D'Agostino (@DAddYE) 5 Eric Hodel (@drbrain) - 5 Hendrik (@Asmod4n) 5 Ichito Nagata (@i110) 5 Keita Obo (@ktaobo)* 5 Max Anselm (@silverhammermba) - 5 SiZiOUS (@sizious) + 5 Rodrigo Malizia (@rmalizia44)+ + 5 Ryan Davis (@zenspider) 5 Syohei YOSHIDA (@syohex) 5 TOMITA Masahiro (@tmtm) 5 Yurie Yamane (@yurie)+ 5 dreamedge (@dreamedge) 5 nkshigeru (@nkshigeru) + 5 xuejianqing (@joans321) 4 Dante Catalfamo (@dantecatalfamo) 4 Goro Kikuchi (@gorogit) 4 Herwin Weststrate (@herwinw) + 4 Horimoto Yasuhiro (@komainu8) 4 Jon Moss (@maclover7) 4 Ken Muramatsu (@ken-mu)+ 4 Kohei Suzuki (@eagletmt) + 4 Lanza (@LanzaSchneider) 4 Li Yazhou (@flaneur2020) 4 Marcus Stollsteimer (@stomar) + 4 Mark Delk (@jethrodaniel) 4 NARUSE, Yui (@nurse) 4 Ravil Bayramgalin (@brainopia)*+ 4 Satoshi Odawara (@SatoshiOdawara) + 4 UENO, M. (@eunos-1128) 4 Yuhei Okazaki (@Yuuhei-Okazaki)* 4 Yuji Yamano (@yyamano) 4 kurodash (@kurodash)* - 4 masahino (@masahino) 4 wanabe (@wanabe)* - 4 xuejianqing (@joans321) + 2 0x1eef (@0x1eef) 3 Anton Davydov (@davydovanton) + 3 Aurora Nockert (@auroranockert) 3 Carlo Prelz (@asfluido)* + 3 Daniel K. Sierpiński (@513ry)+ 3 David Turnbull (@AE9RB) 3 Franck Verrot (@franckverrot) - 3 HASUMI Hitoshi (@hasumikin) - 3 Horimoto Yasuhiro (@komainu8) + 3 Hirohito Higashi (@HirohitoHigashi) 3 J. Mutua (@katmutua)+ 3 Jan Berdajs (@mrbrdo) 3 Jonas Minnberg (@sasq64) 3 Joseph McCullough (@joequery) 3 Mark McCurry (@fundamental) + 3 Meder Kydyraliev (@meder) 3 Nobuhiro Iwamatsu (@iwamatsu) 3 Per Lundberg (@perlun)* 3 Rob Fors (@robfors)* - 3 Rodrigo Malizia (@rmalizia44)+ + 3 Robert Rowe (@CaptainJet) 3 Sebastián Katzer (@katzer)* - 3 Shouji Kuboyama (@Shokuji)* 3 Shuta Kimura (@kimushu)+ 3 TERAJIMA, Motoyuki (@trmmy) 3 Taichi AOKI (@aoki1980taichi) @@ -152,6 +169,7 @@ 3 William Light (@wrl) 3 bamchoh (@bamchoh) 3 sasaki takeru (@takeru) + 3 windwiny (@windwiny) 2 Akira Moroo (@retrage) 2 Artur K (@nemerle) 2 Christian Mauceri (@mauceri) @@ -162,7 +180,6 @@ 2 Francois Chagnon (@EiNSTeiN-)* 2 Gilad Zohari (@gzohari) 2 Go Saito (@govm) - 2 Hirohito Higashi (@HirohitoHigashi) 2 Hiroyuki Iwatsuki (@iwadon) 2 Huei-Horng Yo (@hiroshiyui) 2 Jonas Kulla (@Ancurio) @@ -171,19 +188,19 @@ 2 Kazuhiko Yamashita (@pyama86)+ 2 Kazuhiro Sera (@seratch) 2 Kuroda Daisuke (@dycoon)+ - 2 Lanza (@LanzaSchneider) 2 Lothar Scholz (@llothar) 2 Lukas Joeressen (@kext) 2 Masahiro Wakame (@vvkame)+ 2 Minao Yamamoto (@tarosay)+ 2 Nihad Abbasov (@NARKOZ) + 2 Pete Kinnecom (@petekinnecom) 2 Robert Mosolgo (@rmosolgo) 2 Russel Hunter Yukawa (@rhykw)+ 2 Ryunosuke SATO (@tricknotes) 2 Santa Zhang (@santazhang) - 2 Satoru Naba (@snaba)+ 2 Serg Podtynnyi (@shtirlic) 2 Shannen Saez (@shancat) + 2 Shouji Kuboyama (@Shokuji)* 2 SouthWolf (@southwolf) 2 TJ Singleton (@tjsingleton) 2 Taiyo Mizuhashi (@taiyoslime)+ @@ -191,22 +208,29 @@ 2 Yutaka HARA (@yhara)*+ 2 Zhang Xiaohui (@hifoolno) 2 icm7216 (@icm7216) - 2 windwiny (@windwiny) 1 A-Sat (@asatou)+ + 1 AN Long (@aisk) 1 Abinoam Praxedes Marques Junior (@abinoam) 1 Alex Wang (@nanamiwang)+ + 1 AlexDenisov (@AlexDenisov) 1 Andrew Nordman (@cadwallion) + 1 Ashish Kurmi (@boahc077) 1 Atsushi Morimoto (@mynz) 1 Ben A Morgan (@BenMorganIO) 1 Benoit Daloze (@eregon) + 1 Bradley Whited (@esotericpig) + 1 Colin MacKenzie IV (@sinisterchipmunk) 1 Daehyub Kim (@lateau) 1 Daniel Varga (@vargad) + 1 David Korczynski (@DavidKorczynski) + 1 Diamond Rivero (@diamant3) 1 Edgar Boda-Majer (@eboda) 1 Fangrui Song (@MaskRay) 1 Flavio Medeiros (@flaviommedeiros) 1 Francis Bogsanyi (@fbogsany) 1 Guo Xiao (@guoxiao) 1 Gwen Boatrite (@boatrit) + 1 Gwendolyn Boatrite (@boatrite) 1 HARADA Makoto (@haramako) 1 HAYASHI Kentaro (@kenhys) 1 Hiroki Mori (@yamori813)+ @@ -236,10 +260,11 @@ 1 Lukas Elmer (@lukaselmer) 1 Lukas Stabe (@Ahti) 1 M.Naruoka (@fenrir-naru) - 1 Mark Delk (@jethrodaniel) + 1 Marcelo Juchem (@juchem) 1 Martin Bosslet (@emboss)+ 1 Masahiko Sawada (@MasahikoSawada) 1 Matt Aimonetti (@mattetti) + 1 Max Base (@MaxFork) 1 Maxim Abramchuk (@MaximAbramchuck) 1 Megumi Tomita (@tomykaira)+ 1 Mitchell Hashimoto (@mitchellh) @@ -248,24 +273,30 @@ 1 Nicholas (@knf) 1 Nozomi SATO (@nozomiS) 1 Okumura Takahiro (@hfm) + 1 Oliver Chang (@oliverchang) 1 Patrick Ellis (@pje) 1 Patrick Pokatilo (@SHyx0rmZ) 1 Pavel Evstigneev (@Paxa)+ + 1 Piotr Usewicz (@pusewicz) 1 Prayag Verma (@pra85) 1 Ranmocy (@ranmocy) 1 Robert McNally (@wolfmcnally) - 1 Robert Rowe (@CaptainJet) 1 Ryan Scott Lewis (@RyanScottLewis) 1 Ryo Okubo (@syucream) 1 SAkira a.k.a. Akira Suzuki (@sakisakira) + 1 SaekiMototsune (@saeki-mototsune) 1 Santiago Rodriguez (@sanrodari) 1 Satoh, Hiroh (@cho45)+ + 1 Satoru Naba (@snaba)+ 1 Sayed Abdelhaleem (@visualsayed) + 1 Sergey Ukrainskiy (@ukrainskiysergey) 1 Shugo Maeda (@shugo) + 1 Sinkevich Artem (@ArtSin) 1 Sorah Fukumori (@sorah) 1 Stephen Jothen (@sjothen) 1 Stuart Hinson (@stuarth) 1 Takuma Kume (@takumakume)+ + 1 Takuya ASADA (@syuu1228) 1 Thomas Schmidt (@digitaltom) 1 Timo Schilling (@timoschilling) 1 Tom Black (@blacktm) @@ -276,23 +307,28 @@ 1 Yevhen Viktorov (@yevgenko) 1 Yoji SHIDARA (@darashi) 1 Yoshiori SHOJI (@yoshiori) + 1 Yuji Yokoo (@yujiyokoo) 1 Yukang (@chenyukang) 1 Yurii Nakonechnyi (@inobelar) 1 Yusuke Suzuki (@Constellation)+ 1 Yusuke Tanaka (@csouls) + 1 alpha.netzilla (@alpha-netzilla) 1 arton (@arton) 1 duangsuse (@duangsuse) 1 fl0l0u (@fl0l0u) + 1 hasse (@hasse09052) 1 hhc0null (@hhc0null) 1 iTitou (@titouanc) 1 javier ramírez (@javier) 1 liyuray (@liyuray) 1 lucas dicioccio (@lucasdicioccio) 1 n4o847 (@n4o847) + 1 niyarin (@niyarin) 1 robert (@R-obert) 1 sbsoftware (@sbsoftware) 1 ssmallkirby (@smallkirby) 1 taku toyama (@tsuichu) + 1 vobloeb (@vobloeb) `*` - Entries unified according to names and addresses `+` - Entries with names different from commits diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 99297d7a34..12f5df7f14 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,55 +8,127 @@ Contributors agree to license their contribution(s) under MIT license. To make it easy to review and understand your change please keep the following things in mind before submitting your pull request: -* Work on the latest possible state of **mruby/master** -* Create a branch which is dedicated to your change -* Test your changes before creating a pull request (`rake test`) -* If possible write a test case which confirms your change -* Don't mix several features or bug-fixes in one pull request -* Create a meaningful commit message -* Explain your change (i.e. with a link to the issue you are fixing) -* Use mrbgem to provide non ISO features (classes, modules and methods) unless +- Work on the latest possible state of **mruby/master** +- Create a branch which is dedicated to your change +- Test your changes before creating a pull request (`rake test`) +- If possible write a test case which confirms your change +- Don't mix several features or bugfixes in one pull request +- Create a meaningful commit message +- Explain your change (i.e. with a link to the issue you are fixing) +- Use mrbgem to provide non ISO features (classes, modules and methods) unless you have a special reason to implement them in the core -## pre-commit +## Security Issues -A framework for managing and maintaining multi-language `pre-commit` hooks. -`pre-commit` can be [installed](https://pre-commit.com/#installation) with `pip`, `curl`, `brew` or `conda`. +If you discover a security vulnerability: -You need to first install `pre-commit` and then install the `pre-commit` hooks with `pre-commit install`. -Now `pre-commit` will run automatically on git commit! +- **High priority security vulnerabilities** (RCE): Report via email to +- **VM crashes from valid Ruby code**: Please report as regular bug reports on our issue tracker -It's usually a good idea to run the hooks against all the files when adding new hooks (usually `pre-commit` will only run on the changed files during git hooks). -Use `pre-commit run --all-files` to check all files. +For detailed guidance on what qualifies as a security issue and what doesn't, see [SECURITY.md](SECURITY.md). -To run a single hook use `pre-commit run --all-files ` +## prek -To update use `pre-commit autoupdate` +We use [prek](https://github.com/j178/prek), a fast Rust-based pre-commit hook manager. +It reads the standard `.pre-commit-config.yaml` format. + +Install `prek` following the [installation guide](https://github.com/j178/prek#installation), +then install the hooks with `prek install`. +Now `prek` will run automatically on git commit! + +It's usually a good idea to run the hooks against all the files when adding new hooks (usually `prek` +will only run on the changed files during git hooks). Use `prek run --all-files` to check all files. + +To run a single hook use `prek run --all-files ` + +To update use `prek autoupdate` Sometimes you might need to skip one or more hooks which can be done with the `SKIP` environment variable. `$ SKIP=yamllint git commit -m "foo"` -* [Quick start](https://pre-commit.com/#quick-start) -* [Usage](https://pre-commit.com/#usage) -* [pre-commit autoupdate](https://pre-commit.com/#pre-commit-autoupdate) -* [Temporarily disabling hooks](https://pre-commit.com/#temporarily-disabling-hooks) +For convenience, we have added `prek run --all-files`, `prek install` and `prek autoupdate` +to both the Makefile and the Rakefile. Run them with: -## Spell Checking +- `make check` or `rake check` +- `make checkinstall` or `rake checkinstall` +- `make checkupdate` or `rake checkupdate` + +To configure hooks you can modify the config file [.pre-commit-config.yaml](.pre-commit-config.yaml). +We use [GitHub Actions](.github/workflows/pre-commit.yml) to run `prek` on every pull request. + +### prek quick links + +- [prek GitHub](https://github.com/j178/prek) +- [Installation](https://github.com/j178/prek#installation) +- [Usage](https://github.com/j178/prek#usage) + +## Docker + +We have both a `Dockerfile` and `docker-compose.yml` files in the repository root. +You can run these with the command line or use +[Docker Desktop](https://www.docker.com/products/docker-desktop/). + +The Docker image is running Debian bullseye with Ruby and Python installed. +You can build the Docker image with: + +`$ docker-compose build test` + +So far we just have one service: `test`. Running the default `docker-compose` +command will create the Docker image, spin up a container and then build and +run all mruby tests. + +The default `docker-compose` command is: + +`$ docker-compose -p mruby run test` + +You can also use Make or Rake to run the default `docker-compose` +command from above: + +- `make composetest` +- `rake composetest` -We are running [misspell](https://github.com/client9/misspell) which is mainly written in -[Golang](https://golang.org/) to check spelling with [GitHub Actions](.github/workflows/lint.yml). -Correct commonly misspelled English words quickly with `misspell`. You can run `misspell` locally -against all files with: +List your Docker images with: -```bash -find . -type f | xargs ./misspell -error +```console +$ docker images +REPOSITORY TAG IMAGE ID CREATED SIZE +mruby-test latest ec60f9536948 29 seconds ago 1.29GB ``` -Notable `misspell` help options or flags are: +You can also run any custom `docker-compose` command which will override +the default. For example to run `prek run --all-files` type: -* `-i` string: ignore the following corrections, comma separated -* `-w`: Overwrite file with corrections (default is just to display) +`$ docker-compose -p mruby run test prek run --all-files` + +For convenience, you can also run `prek` with: + +- `make composecheck` +- `rake composecheck` + +The bonus of running `prek` with `docker-compose` is that you won't need +to install `prek` and the hooks on your local machine. + +Note limitation: currently running `prek` with `docker-compose` we +skip the `check-executables-have-shebangs` hook. + +Two more examples of custom `docker-compose` commands are: + +- `$ docker-compose -p mruby run test ls` +- `$ docker-compose -p mruby run test rake doc:api` + +If you want to test using a different `docker-compose` YAML config file you +can use the `-f` flag: + +`$ docker-compose -p mruby -f docker-compose.test.yml run test` + +- +- + +## Spell Checking + +We are using `prek` to run [codespell](https://github.com/codespell-project/codespell) +to check code for common misspellings. We have a small custom dictionary file [codespell.txt](.github/linters/codespell.txt). ## Coding conventions @@ -104,5 +176,18 @@ language itself. Please note the following hints for your Ruby code: #### Comply with the Ruby standard (ISO/IEC 30170:2012) mruby is currently targeting to execute Ruby code which complies to ISO/IEC -30170:2012 (), +30170:2012 (), unless there's a clear reason, e.g. the latest Ruby has changed behavior from ISO. + +## Building documentation + +### mruby API + +- [YARD](https://yardoc.org/) - YARD is a documentation generation tool for the Ruby programming language +- [yard-mruby](https://rubygems.org/gems/yard-mruby) - Document mruby sources with YARD +- [yard-coderay](https://rubygems.org/gems/yard-coderay) - Adds coderay syntax highlighting to YARD docs + +### C API + +- [Doxygen](https://www.doxygen.nl/) - Generate documentation from source code +- [Graphviz](https://graphviz.org/) - Graphviz is open source graph visualization software diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000..34936c9370 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM ruby:3.2.2-bullseye + +RUN apt-get update && apt-get install --no-install-recommends -y python3-pip shellcheck \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY Gemfile Gemfile.lock .pre-commit-config.yaml ./ + +RUN bundle install && pip3 install --no-cache-dir pre-commit && git init . && pre-commit install-hooks + +COPY . . diff --git a/Doxyfile b/Doxyfile index 0dd49db430..aaaf13bcf9 100644 --- a/Doxyfile +++ b/Doxyfile @@ -1,75 +1,97 @@ -# Doxyfile 1.8.13 +# Doxyfile 1.9.6 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). +# +# Note: +# +# Use doxygen to compare the used configuration file with the template +# configuration file: +# doxygen -x [configFile] +# Use doxygen to compare the used configuration file with the template +# configuration file without replacing the environment variables or CMake type +# replacement variables: +# doxygen -x_noenv [configFile] #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- -DOXYFILE_ENCODING = UTF-8 -PROJECT_NAME = "mruby" -PROJECT_NUMBER = 3.1.0 - -PROJECT_BRIEF = "mruby is the lightweight implementation of the Ruby language" - -PROJECT_LOGO = doc/mruby_logo_red_icon.png - -OUTPUT_DIRECTORY = doc/capi +# This tag specifies the encoding used for all characters in the configuration +# file that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# https://www.gnu.org/software/libiconv/ for the list of possible encodings. +# The default value is: UTF-8. -USE_MDFILE_AS_MAINPAGE = README.md +DOXYFILE_ENCODING = UTF-8 -INPUT = README.md \ - src \ - include \ - include/mruby \ - mrblib \ - doc \ - doc/guides +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. -# Red for Ruby -HTML_COLORSTYLE_HUE = 359 +PROJECT_NAME = mruby -# The following expansions -ENABLE_PREPROCESSING = YES -MACRO_EXPANSION = YES -EXPAND_ONLY_PREDEF = NO -PREDEFINED = -EXPAND_AS_DEFINED = -SKIP_FUNCTION_MACROS = NO +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. -# This tells doxygen to search the places that make sense -SEARCH_INCLUDES = YES -INCLUDE_PATH = include include/mruby -INCLUDE_FILE_PATTERNS = *.h +PROJECT_NUMBER = 4.0.0 -CLANG_ASSISTED_PARSING = NO -CLANG_OPTIONS = -I./include +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. -# This thing creates documentation elements for everything, even when its not documented. Its a little ugly to do it right now because huge swathes of code aren't documented. -EXTRACT_ALL = NO +PROJECT_BRIEF = "mruby is the lightweight implementation of the Ruby language" -# Document MRB_INLINE functions -EXTRACT_STATIC = YES +# With the PROJECT_LOGO tag one can specify a logo or an icon that is included +# in the documentation. The maximum height of the logo should not exceed 55 +# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy +# the logo to the output directory. -JAVADOC_AUTOBRIEF = YES -QT_AUTOBRIEF = NO +PROJECT_LOGO = doc/mruby_logo_red_icon.png -QUIET = YES -WARN_IF_UNDOCUMENTED = NO +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. -#=========================================================================== -# BELOW THIS LINE IS CRUFT GENERATED BY doxygen -g -# If you edit anything below this, bring it up here so its easier to read. -#=========================================================================== +OUTPUT_DIRECTORY = doc/capi -# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- -# directories (in 2 levels) under the output directory of each output format and -# will distribute the generated files over these directories. Enabling this +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096 +# sub-directories (in 2 levels) under the output directory of each output format +# and will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes -# performance problems for the file system. +# performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to +# control the number of sub-directories. # The default value is: NO. CREATE_SUBDIRS = NO +# Controls the number of sub-directories that will be created when +# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every +# level increment doubles the number of directories, resulting in 4096 +# directories at level 8 which is the default and also the maximum value. The +# sub-directories are organized in 2 levels, the first level always has a fixed +# number of 16 directories. +# Minimum value: 0, maximum value: 8, default value: 8. +# This tag requires that the tag CREATE_SUBDIRS is set to YES. + +CREATE_SUBDIRS_LEVEL = 8 + # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode @@ -81,14 +103,14 @@ ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, -# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), -# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, -# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, -# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, -# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, -# Ukrainian and Vietnamese. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian, +# Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English +# (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek, +# Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with +# English messages), Korean, Korean-en (Korean with English messages), Latvian, +# Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, +# Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, +# Swedish, Turkish, Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English @@ -180,6 +202,32 @@ STRIP_FROM_INC_PATH = SHORT_NAMES = NO +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = YES + +# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line +# such as +# /*************** +# as being the beginning of a Javadoc-style comment "banner". If set to NO, the +# Javadoc-style will behave just like regular comments and it will not be +# interpreted by doxygen. +# The default value is: NO. + +JAVADOC_BANNER = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as @@ -193,6 +241,14 @@ SHORT_NAMES = NO MULTILINE_CPP_IS_BRIEF = NO +# By default Python docstrings are displayed as preformatted text and doxygen's +# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the +# doxygen's special commands can be used and the contents of the docstring +# documentation blocks is shown as doxygen documentation. +# The default value is: YES. + +PYTHON_DOCSTRING = YES + # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. @@ -216,20 +272,19 @@ TAB_SIZE = 4 # the documentation. An alias has the form: # name=value # For example adding -# "sideeffect=@par Side Effects:\n" +# "sideeffect=@par Side Effects:^^" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines. +# "Side Effects:". Note that you cannot put \n's in the value part of an alias +# to insert newlines (in the resulting output). You can put ^^ in the value part +# of an alias to insert a newline as if a physical newline was in the original +# file. When you need a literal { or } or , in the value part of an alias you +# have to escape them by means of a backslash (\), this can lead to conflicts +# with the commands \{ and \} for these it is advised to use the version @{ and +# @} or use a double escape (\\{ and \\}) ALIASES = -# This tag can be used to specify a number of word-keyword mappings (TCL only). -# A mapping has the form "name=value". For example adding "class=itcl::class" -# will allow you to use the command class in the itcl::class meaning. - -TCL_SUBST = - # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all @@ -258,28 +313,40 @@ OPTIMIZE_FOR_FORTRAN = NO OPTIMIZE_OUTPUT_VHDL = NO +# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice +# sources only. Doxygen will then generate output that is more tailored for that +# language. For instance, namespaces will be presented as modules, types will be +# separated into more groups, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_SLICE = NO + # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and -# language is one of the parsers supported by doxygen: IDL, Java, Javascript, -# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: -# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: -# Fortran. In the later case the parser tries to guess whether the code is fixed -# or free formatted code, this is the default for Fortran type files), VHDL. For -# instance to make doxygen treat .inc files as Fortran files (default is PHP), -# and .f files as C (default is Fortran), use: inc=Fortran f=C. +# language is one of the parsers supported by doxygen: IDL, Java, JavaScript, +# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice, +# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: +# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser +# tries to guess whether the code is fixed or free formatted code, this is the +# default for Fortran type files). For instance to make doxygen treat .inc files +# as Fortran files (default is PHP), and .f files as C (default is Fortran), +# use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise -# the files are not read by doxygen. +# the files are not read by doxygen. When specifying no_extension you should add +# * to the FILE_PATTERNS. +# +# Note see also the list of default file extension mappings. -EXTENSION_MAPPING = +EXTENSION_MAPPING = no_extension=md # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable -# documentation. See http://daringfireball.net/projects/markdown/ for details. +# documentation. See https://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. @@ -291,7 +358,7 @@ MARKDOWN_SUPPORT = YES # to that level are automatically included in the table of contents, even if # they do not have an id attribute. # Note: This feature currently applies only to Markdown headings. -# Minimum value: 0, maximum value: 99, default value: 0. +# Minimum value: 0, maximum value: 99, default value: 5. # This tag requires that the tag MARKDOWN_SUPPORT is set to YES. TOC_INCLUDE_HEADINGS = 0 @@ -321,7 +388,7 @@ BUILTIN_STL_SUPPORT = NO CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: -# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. @@ -407,6 +474,19 @@ TYPEDEF_HIDES_STRUCT = NO LOOKUP_CACHE_SIZE = 0 +# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use +# during processing. When set to 0 doxygen will based this on the number of +# cores available in the system. You can set it explicitly to a value larger +# than 0 to get more control over the balance between CPU load and processing +# speed. At this moment only the input processing can be done using multiple +# threads. Since this is still an experimental feature the default is set to 1, +# which effectively disables parallel processing. Please report any issues you +# encounter. Generating dot graphs in parallel is controlled by the +# DOT_NUM_THREADS setting. +# Minimum value: 0, maximum value: 32, default value: 1. + +NUM_PROC_THREADS = 1 + #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- @@ -419,6 +499,7 @@ LOOKUP_CACHE_SIZE = 0 # normally produced when WARNINGS is set to YES. # The default value is: NO. +EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. @@ -426,6 +507,12 @@ LOOKUP_CACHE_SIZE = 0 EXTRACT_PRIVATE = NO +# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual +# methods of a class will be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIV_VIRTUAL = NO + # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. @@ -436,6 +523,7 @@ EXTRACT_PACKAGE = NO # included in the documentation. # The default value is: NO. +EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, @@ -462,6 +550,13 @@ EXTRACT_LOCAL_METHODS = NO EXTRACT_ANON_NSPACES = NO +# If this flag is set to YES, the name of an unnamed parameter in a declaration +# will be determined by the corresponding definition. By default unnamed +# parameters remain unnamed in the output. +# The default value is: YES. + +RESOLVE_UNNAMED_PARAMS = YES + # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation @@ -473,14 +568,15 @@ HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option -# has no effect if EXTRACT_ALL is enabled. +# will also hide undocumented C++ concepts if enabled. This option has no effect +# if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend -# (class|struct|union) declarations. If set to NO, these declarations will be -# included in the documentation. +# declarations. If set to NO, these declarations will be included in the +# documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO @@ -499,12 +595,20 @@ HIDE_IN_BODY_DOCS = NO INTERNAL_DOCS = NO -# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file -# names in lower-case letters. If set to YES, upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. -# The default value is: system dependent. +# With the correct setting of option CASE_SENSE_NAMES doxygen will better be +# able to match the capabilities of the underlying filesystem. In case the +# filesystem is case sensitive (i.e. it supports files in the same directory +# whose names only differ in casing), the option must be set to YES to properly +# deal with such files in case they appear in the input. For filesystems that +# are not case sensitive the option should be set to NO to properly deal with +# output files written for symbols that only differ in casing, such as for two +# classes, one named CLASS and the other named Class, and to also support +# references to files without having to specify the exact matching casing. On +# Windows (including Cygwin) and MacOS, users should typically set this option +# to NO, whereas on Linux or other Unix flavors it should typically be set to +# YES. +# Possible values are: SYSTEM, NO and YES. +# The default value is: SYSTEM. CASE_SENSE_NAMES = YES @@ -522,6 +626,12 @@ HIDE_SCOPE_NAMES = NO HIDE_COMPOUND_REFERENCE= NO +# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class +# will show which file needs to be included to use the class. +# The default value is: YES. + +SHOW_HEADERFILE = YES + # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. @@ -679,7 +789,8 @@ FILE_VERSION_FILTER = # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. +# will be used as the name of the layout file. See also section "Changing the +# layout of pages" for information. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE @@ -690,7 +801,7 @@ LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool -# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. @@ -701,6 +812,12 @@ CITE_BIB_FILES = # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = YES # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES @@ -711,25 +828,51 @@ CITE_BIB_FILES = WARNINGS = YES +# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = NO # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. +# potential errors in the documentation, such as documenting some parameters in +# a documented function twice, or documenting parameters that don't exist or +# using markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES +# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete +# function parameter documentation. If set to NO, doxygen will accept that some +# parameters have no documentation without warning. +# The default value is: YES. + +WARN_IF_INCOMPLETE_DOC = YES + # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. +# value. If set to NO, doxygen will only warn about wrong parameter +# documentation, but not about the absence of documentation. If EXTRACT_ALL is +# set to YES then this flag will automatically be disabled. See also +# WARN_IF_INCOMPLETE_DOC # The default value is: NO. WARN_NO_PARAMDOC = NO +# If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about +# undocumented enumeration values. If set to NO, doxygen will accept +# undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: NO. + +WARN_IF_UNDOC_ENUM_VAL = NO + # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when -# a warning is encountered. +# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS +# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but +# at the end of the doxygen process doxygen will return with a non-zero status. +# Possible values are: NO, YES and FAIL_ON_WARNINGS. # The default value is: NO. WARN_AS_ERROR = NO @@ -740,13 +883,27 @@ WARN_AS_ERROR = NO # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) +# See also: WARN_LINE_FORMAT # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" +# In the $text part of the WARN_FORMAT command it is possible that a reference +# to a more specific place is given. To make it easier to jump to this place +# (outside of doxygen) the user can define a custom "cut" / "paste" string. +# Example: +# WARN_LINE_FORMAT = "'vi $file +$line'" +# See also: WARN_FORMAT +# The default value is: at line $line of file $file. + +WARN_LINE_FORMAT = "at line $line of file $file" + # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard -# error (stderr). +# error (stderr). In case the file specified cannot be opened for writing the +# warning and error messages are written to standard error. When as file - is +# specified the warning and error messages are written to standard output +# (stdout). WARN_LOGFILE = @@ -760,16 +917,41 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. +INPUT = CONTRIBUTING.md \ + README.md \ + SECURITY.md \ + TODO.md \ + src \ + include \ + include/mruby \ + mrblib \ + doc \ + doc/guides \ + doc/internal \ + LEGAL \ + LICENSE \ + NEWS # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv -# documentation (see: http://www.gnu.org/software/libiconv) for the list of -# possible encodings. +# documentation (see: +# https://www.gnu.org/software/libiconv/) for the list of possible encodings. +# See also: INPUT_FILE_ENCODING # The default value is: UTF-8. INPUT_ENCODING = UTF-8 +# This tag can be used to specify the character encoding of the source files +# that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify +# character encoding on a per file pattern basis. Doxygen will compare the file +# name with each pattern and apply the encoding instead of the default +# INPUT_ENCODING) if there is a match. The character encodings are a list of the +# form: pattern=encoding (like *.php=ISO-8859-1). See cfg_input_encoding +# "INPUT_ENCODING" for further information on supported encodings. + +INPUT_FILE_ENCODING = + # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. @@ -778,11 +960,15 @@ INPUT_ENCODING = UTF-8 # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # +# Note the list of default checked file patterns might differ from the list of +# default file extension mappings. +# # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, -# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf. +# *.hh, *.hxx, *.hpp, *.h++, *.l, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, +# *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C +# comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, +# *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = *.c \ *.cc \ @@ -864,7 +1050,7 @@ EXCLUDE_PATTERNS = # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test +# ANamespace::AClass, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* @@ -912,6 +1098,11 @@ IMAGE_PATH = # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # +# Note that doxygen will use the data processed and written to standard output +# for further processing, therefore nothing else, like debug statements or used +# commands (so in case of a Windows batch file always use @echo OFF), should be +# written to standard output. +# # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. @@ -946,6 +1137,22 @@ FILTER_SOURCE_FILES = NO FILTER_SOURCE_PATTERNS = +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = README.md + +# The Fortran standard specifies that for fixed formatted Fortran code all +# characters from position 72 are to be considered as comment. A common +# extension is to allow longer lines before the automatic comment starts. The +# setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can +# be processed before the automatic comment starts. +# Minimum value: 7, maximum value: 10000, default value: 72. + +FORTRAN_COMMENT_AFTER = 72 + #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- @@ -973,7 +1180,7 @@ INLINE_SOURCES = NO STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented -# function all documented functions referencing it will be listed. +# entity all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO @@ -1005,12 +1212,12 @@ SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system -# (see http://www.gnu.org/software/global/global.html). You will need version +# (see https://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global -# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # @@ -1032,23 +1239,6 @@ USE_HTAGS = NO VERBATIM_HEADERS = YES -# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the -# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the -# cost of reduced performance. This can be particularly helpful with template -# rich C++ code for which doxygen's built-in parser lacks the necessary type -# information. -# Note: The availability of this option depends on whether or not doxygen was -# generated with the -Duse-libclang=ON option for CMake. -# The default value is: NO. - - -# If clang assisted parsing is enabled you can provide the compiler with command -# line options that you would normally use when invoking the compiler. Note that -# the include paths will already be set by doxygen for the files and directories -# specified with INPUT and INCLUDE_PATH. -# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. - - #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- @@ -1060,17 +1250,11 @@ VERBATIM_HEADERS = YES ALPHABETICAL_INDEX = YES -# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in -# which the alphabetical index list will be split. -# Minimum value: 1, maximum value: 20, default value: 5. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -COLS_IN_ALPHA_INDEX = 5 - -# In case all classes in a project start with a common prefix, all classes will -# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag -# can be used to specify a prefix (or a list of prefixes) that should be ignored -# while generating the index headers. +# The IGNORE_PREFIX tag can be used to specify a prefix (or a list of prefixes) +# that should be ignored while generating the index headers. The IGNORE_PREFIX +# tag works for classes, function and member names. The entity will be placed in +# the alphabetical list under the first letter of the entity name that remains +# after removing the prefix. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = @@ -1149,7 +1333,12 @@ HTML_STYLESHEET = # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the -# list). For an example see the documentation. +# list). +# Note: Since the styling of scrollbars can currently not be overruled in +# Webkit/Chromium, the styling will be left out of the default doxygen.css if +# one or more extra stylesheets have been specified. So if scrollbar +# customization is desired it has to be added explicitly. For an example see the +# documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = @@ -1164,8 +1353,32 @@ HTML_EXTRA_STYLESHEET = HTML_EXTRA_FILES = +# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output +# should be rendered with a dark or light theme. +# Possible values are: LIGHT always generate light mode output, DARK always +# generate dark mode output, AUTO_LIGHT automatically set the mode according to +# the user preference, use light mode if no preference is set (the default), +# AUTO_DARK automatically set the mode according to the user preference, use +# dark mode if no preference is set and TOGGLE allow to user to switch between +# light and dark mode via a button. +# The default value is: AUTO_LIGHT. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE = AUTO_LIGHT + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the style sheet and background images according to +# this color. Hue is specified as an angle on a color-wheel, see +# https://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 359 + # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A +# in the HTML output. For a value of 0 the output will use gray-scales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. @@ -1192,6 +1405,17 @@ HTML_COLORSTYLE_GAMMA = 80 HTML_TIMESTAMP = NO +# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML +# documentation will contain a main index with vertical navigation menus that +# are dynamically created via JavaScript. If disabled, the navigation index will +# consists of multiple levels of tabs that are statically embedded in every HTML +# page. Disable this option to support browsers that do not have JavaScript, +# like the Qt help browser. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_MENUS = YES + # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. @@ -1215,13 +1439,14 @@ HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development -# environment (see: http://developer.apple.com/tools/xcode/), introduced with -# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a -# Makefile in the HTML output directory. Running make will produce the docset in -# that directory and running make install will install the docset in +# environment (see: +# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To +# create a documentation set, doxygen will generate a Makefile in the HTML +# output directory. Running make will produce the docset in that directory and +# running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at -# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html -# for more information. +# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy +# genXcode/_index.html for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. @@ -1235,6 +1460,13 @@ GENERATE_DOCSET = NO DOCSET_FEEDNAME = "Doxygen generated docs" +# This tag determines the URL of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDURL = + # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. @@ -1260,8 +1492,12 @@ DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on -# Windows. +# on Windows. In the beginning of 2021 Microsoft took the original page, with +# a.o. the download links, offline the HTML help workshop was already many years +# in maintenance mode). You can download the HTML help workshop from the web +# archives at Installation executable (see: +# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo +# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe). # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML @@ -1291,7 +1527,7 @@ CHM_FILE = HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated -# (YES) or that it should be included in the master .chm file (NO). +# (YES) or that it should be included in the main .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. @@ -1336,7 +1572,8 @@ QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace -# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. @@ -1344,8 +1581,8 @@ QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual -# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- -# folders). +# Folders (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. @@ -1353,30 +1590,30 @@ QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- -# filters). +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- -# filters). +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: -# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = -# The QHG_LOCATION tag can be used to specify the location of Qt's -# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the -# generated .qhp file. +# The QHG_LOCATION tag can be used to specify the location (absolute path +# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to +# run qhelpgenerator on the generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = @@ -1419,16 +1656,28 @@ DISABLE_INDEX = NO # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. +# further fine tune the look of the index (see "Fine-tuning the output"). As an +# example, the default style sheet generated by doxygen has an example that +# shows how to put an image at the root of the tree instead of the PROJECT_NAME. +# Since the tree basically has the same information as the tab index, you could +# consider setting DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO +# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the +# FULL_SIDEBAR option determines if the side bar is limited to only the treeview +# area (value NO) or if it should extend to the full height of the window (value +# YES). Setting this to YES gives a layout similar to +# https://docs.readthedocs.io with more room for contents, but less room for the +# project logo, title, and description. If either GENERATE_TREEVIEW or +# DISABLE_INDEX is set to NO, this option has no effect. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FULL_SIDEBAR = NO + # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # @@ -1453,6 +1702,24 @@ TREEVIEW_WIDTH = 250 EXT_LINKS_IN_WINDOW = NO +# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email +# addresses. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +OBFUSCATE_EMAILS = YES + +# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg +# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see +# https://inkscape.org) to generate formulas as SVG images instead of PNGs for +# the HTML output. These images will generally look nicer at scaled resolutions. +# Possible values are: png (the default) and svg (looks nicer but requires the +# pdf2svg or inkscape tool). +# The default value is: png. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FORMULA_FORMAT = png + # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML @@ -1462,19 +1729,14 @@ EXT_LINKS_IN_WINDOW = NO FORMULA_FONTSIZE = 10 -# Use the FORMULA_TRANPARENT tag to determine whether or not the images -# generated for formulas are transparent PNGs. Transparent PNGs are not -# supported properly for IE 6.0, but are supported on all modern browsers. -# -# Note that when changing this option you need to delete any form_*.png files in -# the HTML output directory before the changes have effect. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. +# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands +# to create new LaTeX commands to be used in formulas as building blocks. See +# the section "Including formulas" for details. -FORMULA_TRANSPARENT = YES +FORMULA_MACROFILE = # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see -# http://www.mathjax.org) which uses client side Javascript for the rendering +# https://www.mathjax.org) which uses client side JavaScript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path @@ -1484,11 +1746,29 @@ FORMULA_TRANSPARENT = YES USE_MATHJAX = NO +# With MATHJAX_VERSION it is possible to specify the MathJax version to be used. +# Note that the different versions of MathJax have different requirements with +# regards to the different settings, so it is possible that also other MathJax +# settings have to be changed when switching between the different MathJax +# versions. +# Possible values are: MathJax_2 and MathJax_3. +# The default value is: MathJax_2. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_VERSION = MathJax_2 + # When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/latest/output.html) for more details. +# the MathJax output. For more details about the output format see MathJax +# version 2 (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3 +# (see: +# http://docs.mathjax.org/en/latest/web/components/output.html). # Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. +# compatibility. This is the name for Mathjax version 2, for MathJax version 3 +# this will be translated into chtml), NativeMML (i.e. MathML. Only supported +# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This +# is the name for Mathjax version 3, for MathJax version 2 this will be +# translated into HTML-CSS) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. @@ -1501,22 +1781,29 @@ MATHJAX_FORMAT = HTML-CSS # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of -# MathJax from http://www.mathjax.org before deployment. -# The default value is: http://cdn.mathjax.org/mathjax/latest. +# MathJax from https://www.mathjax.org before deployment. The default value is: +# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2 +# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3 # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example +# for MathJax version 2 (see +# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions): # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# For example for MathJax version 3 (see +# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html): +# MATHJAX_EXTENSIONS = ams # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site -# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. @@ -1544,7 +1831,7 @@ MATHJAX_CODEFILE = SEARCHENGINE = YES # When the SERVER_BASED_SEARCH tag is enabled the search engine will be -# implemented using a web server instead of a web client using Javascript. There +# implemented using a web server instead of a web client using JavaScript. There # are two flavors of web server based searching depending on the EXTERNAL_SEARCH # setting. When disabled, doxygen will generate a PHP script for searching and # an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing @@ -1563,7 +1850,8 @@ SERVER_BASED_SEARCH = NO # # Doxygen ships with an example indexer (doxyindexer) and search engine # (doxysearch.cgi) which are based on the open source search engine library -# Xapian (see: http://xapian.org/). +# Xapian (see: +# https://xapian.org/). # # See the section "External Indexing and Searching" for details. # The default value is: NO. @@ -1576,8 +1864,9 @@ EXTERNAL_SEARCH = NO # # Doxygen ships with an example indexer (doxyindexer) and search engine # (doxysearch.cgi) which are based on the open source search engine library -# Xapian (see: http://xapian.org/). See the section "External Indexing and -# Searching" for details. +# Xapian (see: +# https://xapian.org/). See the section "External Indexing and Searching" for +# details. # This tag requires that the tag SEARCHENGINE is set to YES. SEARCHENGINE_URL = @@ -1628,21 +1917,35 @@ LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. # -# Note that when enabling USE_PDFLATEX this option is only used for generating -# bitmaps for formulas in the HTML output, but not in the Makefile that is -# written to the output directory. -# The default file is: latex. +# Note that when not enabling USE_PDFLATEX the default is latex when enabling +# USE_PDFLATEX the default is pdflatex and when in the later case latex is +# chosen this is overwritten by pdflatex. For specific output languages the +# default can have been set differently, this depends on the implementation of +# the output language. # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate # index for LaTeX. +# Note: This tag is used in the Makefile / make.bat. +# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file +# (.tex). # The default file is: makeindex. # This tag requires that the tag GENERATE_LATEX is set to YES. MAKEINDEX_CMD_NAME = makeindex +# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to +# generate index for LaTeX. In case there is no backslash (\) as first character +# it will be automatically added in the LaTeX code. +# Note: This tag is used in the generated output file (.tex). +# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat. +# The default value is: makeindex. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_MAKEINDEX_CMD = makeindex + # If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX # documents. This may be useful for small projects and may help to save some # trees in general. @@ -1672,29 +1975,31 @@ PAPER_TYPE = a4 EXTRA_PACKAGES = -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the -# generated LaTeX document. The header should contain everything until the first -# chapter. If it is left blank doxygen will generate a standard header. See -# section "Doxygen usage" for information on how to let doxygen write the -# default header to a separate file. +# The LATEX_HEADER tag can be used to specify a user-defined LaTeX header for +# the generated LaTeX document. The header should contain everything until the +# first chapter. If it is left blank doxygen will generate a standard header. It +# is highly recommended to start with a default header using +# doxygen -w latex new_header.tex new_footer.tex new_stylesheet.sty +# and then modify the file new_header.tex. See also section "Doxygen usage" for +# information on how to generate the default header that doxygen normally uses. # -# Note: Only use a user-defined header if you know what you are doing! The -# following commands have a special meaning inside the header: $title, -# $datetime, $date, $doxygenversion, $projectname, $projectnumber, -# $projectbrief, $projectlogo. Doxygen will replace $title with the empty -# string, for the replacement values of the other commands the user is referred -# to HTML_HEADER. +# Note: Only use a user-defined header if you know what you are doing! +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. The following +# commands have a special meaning inside the header (and footer): For a +# description of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_HEADER = -# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the -# generated LaTeX document. The footer should contain everything after the last -# chapter. If it is left blank doxygen will generate a standard footer. See +# The LATEX_FOOTER tag can be used to specify a user-defined LaTeX footer for +# the generated LaTeX document. The footer should contain everything after the +# last chapter. If it is left blank doxygen will generate a standard footer. See # LATEX_HEADER for more information on how to generate a default footer and what -# special commands can be used inside the footer. -# -# Note: Only use a user-defined footer if you know what you are doing! +# special commands can be used inside the footer. See also section "Doxygen +# usage" for information on how to generate the default footer that doxygen +# normally uses. Note: Only use a user-defined footer if you know what you are +# doing! # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_FOOTER = @@ -1727,9 +2032,11 @@ LATEX_EXTRA_FILES = PDF_HYPERLINKS = YES -# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate -# the PDF file directly from the LaTeX files. Set this option to YES, to get a -# higher quality PDF documentation. +# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as +# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX +# files. Set this option to YES, to get a higher quality PDF documentation. +# +# See also section LATEX_CMD_NAME for selecting the engine. # The default value is: YES. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1737,8 +2044,7 @@ USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode # command to the generated LaTeX files. This will instruct LaTeX to keep running -# if errors occur, instead of asking the user for help. This option is also used -# when generating formulas in HTML. +# if errors occur, instead of asking the user for help. # The default value is: NO. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1751,19 +2057,9 @@ LATEX_BATCHMODE = NO LATEX_HIDE_INDICES = NO -# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source -# code with syntax highlighting in the LaTeX output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_SOURCE_CODE = NO - # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. See -# http://en.wikipedia.org/wiki/BibTeX and \cite for more info. +# https://en.wikipedia.org/wiki/BibTeX and \cite for more info. # The default value is: plain. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1777,6 +2073,14 @@ LATEX_BIB_STYLE = plain LATEX_TIMESTAMP = NO +# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute) +# path from which the emoji images will be read. If a relative path is entered, +# it will be relative to the LATEX_OUTPUT directory. If left blank the +# LATEX_OUTPUT directory will be used. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_EMOJI_DIRECTORY = + #--------------------------------------------------------------------------- # Configuration options related to the RTF output #--------------------------------------------------------------------------- @@ -1816,9 +2120,9 @@ COMPACT_RTF = NO RTF_HYPERLINKS = NO -# Load stylesheet definitions from file. Syntax is similar to doxygen's config -# file, i.e. a series of assignments. You only have to provide replacements, -# missing definitions are set to their default value. +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# configuration file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. # # See also section "Doxygen usage" for information on how to generate the # default style sheet that doxygen normally uses. @@ -1827,22 +2131,12 @@ RTF_HYPERLINKS = NO RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an RTF document. Syntax is -# similar to doxygen's config file. A template extensions file can be generated -# using doxygen -e rtf extensionFile. +# similar to doxygen's configuration file. A template extensions file can be +# generated using doxygen -e rtf extensionFile. # This tag requires that the tag GENERATE_RTF is set to YES. RTF_EXTENSIONS_FILE = -# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code -# with syntax highlighting in the RTF output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_RTF is set to YES. - -RTF_SOURCE_CODE = NO - #--------------------------------------------------------------------------- # Configuration options related to the man page output #--------------------------------------------------------------------------- @@ -1914,6 +2208,13 @@ XML_OUTPUT = xml XML_PROGRAMLISTING = YES +# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include +# namespace members in file scope as well, matching the HTML output. +# The default value is: NO. +# This tag requires that the tag GENERATE_XML is set to YES. + +XML_NS_MEMB_FILE_SCOPE = NO + #--------------------------------------------------------------------------- # Configuration options related to the DOCBOOK output #--------------------------------------------------------------------------- @@ -1932,23 +2233,14 @@ GENERATE_DOCBOOK = NO DOCBOOK_OUTPUT = docbook -# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the -# program listings (including syntax highlighting and cross-referencing -# information) to the DOCBOOK output. Note that enabling this will significantly -# increase the size of the DOCBOOK output. -# The default value is: NO. -# This tag requires that the tag GENERATE_DOCBOOK is set to YES. - -DOCBOOK_PROGRAMLISTING = NO - #--------------------------------------------------------------------------- # Configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an -# AutoGen Definitions (see http://autogen.sf.net) file that captures the -# structure of the code including all documentation. Note that this feature is -# still experimental and incomplete at the moment. +# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures +# the structure of the code including all documentation. Note that this feature +# is still experimental and incomplete at the moment. # The default value is: NO. GENERATE_AUTOGEN_DEF = NO @@ -1999,6 +2291,16 @@ PERLMOD_MAKEVAR_PREFIX = # C-preprocessor directives found in the sources and include files. # The default value is: YES. +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names +# in the source code. If set to NO, only conditional compilation will be +# performed. Macro expansion can be done in a controlled way by setting +# EXPAND_ONLY_PREDEF to YES. +# The default value is: NO. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +MACRO_EXPANSION = YES # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then # the macro expansion is limited to the macros specified with the PREDEFINED and @@ -2006,7 +2308,31 @@ PERLMOD_MAKEVAR_PREFIX = # The default value is: NO. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES, the include files in the +# INCLUDE_PATH will be searched if a #include is found. +# The default value is: YES. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by the +# preprocessor. Note that the INCLUDE_PATH is not recursive, so the setting of +# RECURSIVE has no effect here. +# This tag requires that the tag SEARCH_INCLUDES is set to YES. + +INCLUDE_PATH = include \ + include/mruby + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will be +# used. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. +INCLUDE_FILE_PATTERNS = *.h # The PREDEFINED tag can be used to specify one or more macro names that are # defined before the preprocessor is started (similar to the -D option of e.g. @@ -2016,6 +2342,7 @@ PERLMOD_MAKEVAR_PREFIX = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. +PREDEFINED = # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The @@ -2024,6 +2351,7 @@ PERLMOD_MAKEVAR_PREFIX = # definition found in the source code. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. +EXPAND_AS_DEFINED = # If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will # remove all references to function-like macros that are alone on a line, have @@ -2033,6 +2361,7 @@ PERLMOD_MAKEVAR_PREFIX = # The default value is: YES. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. +SKIP_FUNCTION_MACROS = NO #--------------------------------------------------------------------------- # Configuration options related to external references @@ -2080,34 +2409,10 @@ EXTERNAL_GROUPS = YES EXTERNAL_PAGES = YES -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of 'which perl'). -# The default file (with absolute path) is: /usr/bin/perl. - -PERL_PATH = /usr/bin/perl - #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- -# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram -# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to -# NO turns the diagrams off. Note that this option also works with HAVE_DOT -# disabled, but it is recommended to install and use dot, since it yields more -# powerful graphs. -# The default value is: YES. - -CLASS_DIAGRAMS = YES - -# You can define message sequence charts within doxygen comments using the \msc -# command. Doxygen will then run the mscgen tool (see: -# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the -# documentation. The MSCGEN_PATH tag allows you to specify the directory where -# the mscgen tool resides. If left empty the tool is assumed to be found in the -# default search path. - -MSCGEN_PATH = - # You can include diagrams made with dia in doxygen documentation. Doxygen will # then run dia to produce the diagram and insert it in the documentation. The # DIA_PATH tag allows you to specify the directory where the dia binary resides. @@ -2126,7 +2431,7 @@ HIDE_UNDOC_RELATIONS = YES # http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent # Bell Labs. The other options in this section have no effect if this option is # set to NO -# The default value is: YES. +# The default value is: NO. HAVE_DOT = YES @@ -2140,35 +2445,50 @@ HAVE_DOT = YES DOT_NUM_THREADS = 0 -# When you want a differently looking font in the dot files that doxygen -# generates you can specify the font name using DOT_FONTNAME. You need to make -# sure dot is able to find the font, which can be done by putting it in a -# standard location or by setting the DOTFONTPATH environment variable or by -# setting DOT_FONTPATH to the directory containing the font. -# The default value is: Helvetica. +# DOT_COMMON_ATTR is common attributes for nodes, edges and labels of +# subgraphs. When you want a differently looking font in the dot files that +# doxygen generates you can specify fontname, fontcolor and fontsize attributes. +# For details please see Node, +# Edge and Graph Attributes specification You need to make sure dot is able +# to find the font, which can be done by putting it in a standard location or by +# setting the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the +# directory containing the font. Default graphviz fontsize is 14. +# The default value is: fontname=Helvetica,fontsize=10. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_COMMON_ATTR = "fontname=Helvetica,fontsize=10" + +# DOT_EDGE_ATTR is concatenated with DOT_COMMON_ATTR. For elegant style you can +# add 'arrowhead=open, arrowtail=open, arrowsize=0.5'. Complete documentation about +# arrows shapes. +# The default value is: labelfontname=Helvetica,labelfontsize=10. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_FONTNAME = Helvetica +DOT_EDGE_ATTR = "labelfontname=Helvetica,labelfontsize=10" -# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of -# dot graphs. -# Minimum value: 4, maximum value: 24, default value: 10. +# DOT_NODE_ATTR is concatenated with DOT_COMMON_ATTR. For view without boxes +# around nodes set 'shape=plain' or 'shape=plaintext' Shapes specification +# The default value is: shape=box,height=0.2,width=0.4. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_FONTSIZE = 10 +DOT_NODE_ATTR = "shape=box,height=0.2,width=0.4" -# By default doxygen will tell dot to use the default font as specified with -# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set -# the path where dot can find it using this tag. +# You can set the path where dot can find font specified with fontname in +# DOT_COMMON_ATTR and others dot attributes. # This tag requires that the tag HAVE_DOT is set to YES. DOT_FONTPATH = -# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for -# each documented class showing the direct and indirect inheritance relations. -# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO. +# If the CLASS_GRAPH tag is set to YES (or GRAPH) then doxygen will generate a +# graph for each documented class showing the direct and indirect inheritance +# relations. In case HAVE_DOT is set as well dot will be used to draw the graph, +# otherwise the built-in generator will be used. If the CLASS_GRAPH tag is set +# to TEXT the direct and indirect inheritance relations will be shown as texts / +# links. +# Possible values are: NO, YES, TEXT and GRAPH. # The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. CLASS_GRAPH = YES @@ -2182,7 +2502,8 @@ CLASS_GRAPH = YES COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for -# groups, showing the direct groups dependencies. +# groups, showing the direct groups dependencies. See also the chapter Grouping +# in the manual. # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. @@ -2205,10 +2526,32 @@ UML_LOOK = NO # but if the number exceeds 15, the total amount of fields shown is limited to # 10. # Minimum value: 0, maximum value: 100, default value: 10. -# This tag requires that the tag HAVE_DOT is set to YES. +# This tag requires that the tag UML_LOOK is set to YES. UML_LIMIT_NUM_FIELDS = 10 +# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and +# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS +# tag is set to YES, doxygen will add type and arguments for attributes and +# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen +# will not generate fields with class member information in the UML graphs. The +# class diagrams will look similar to the default class diagrams but using UML +# notation for the relationships. +# Possible values are: NO, YES and NONE. +# The default value is: NO. +# This tag requires that the tag UML_LOOK is set to YES. + +DOT_UML_DETAILS = NO + +# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters +# to display on a single line. If the actual line length exceeds this threshold +# significantly it will wrapped across multiple lines. Some heuristics are apply +# to avoid ugly line breaks. +# Minimum value: 0, maximum value: 1000, default value: 17. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_WRAP_THRESHOLD = 17 + # If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and # collaboration graphs will show the relations between templates and their # instances. @@ -2275,6 +2618,13 @@ GRAPHICAL_HIERARCHY = YES DIRECTORY_GRAPH = YES +# The DIR_GRAPH_MAX_DEPTH tag can be used to limit the maximum number of levels +# of child directories generated in directory dependency graphs by dot. +# Minimum value: 1, maximum value: 25, default value: 1. +# This tag requires that the tag DIRECTORY_GRAPH is set to YES. + +DIR_GRAPH_MAX_DEPTH = 1 + # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. For an explanation of the image formats see the section # output formats in the documentation of the dot tool (Graphviz (see: @@ -2282,9 +2632,7 @@ DIRECTORY_GRAPH = YES # Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order # to make the SVG files visible in IE 9+ (other browsers do not have this # requirement). -# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd, -# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo, -# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo, +# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo, # png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and # png:gdiplus:gdiplus. # The default value is: png. @@ -2330,10 +2678,10 @@ MSCFILE_DIRS = DIAFILE_DIRS = # When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the -# path where java can find the plantuml.jar file. If left blank, it is assumed -# PlantUML is not used or called during a preprocessing step. Doxygen will -# generate a warning when it encounters a \startuml command in this case and -# will not generate output for the diagram. +# path where java can find the plantuml.jar file or to the filename of jar file +# to be used. If left blank, it is assumed PlantUML is not used or called during +# a preprocessing step. Doxygen will generate a warning when it encounters a +# \startuml command in this case and will not generate output for the diagram. PLANTUML_JAR_PATH = @@ -2371,18 +2719,6 @@ DOT_GRAPH_MAX_NODES = 50 MAX_DOT_GRAPH_DEPTH = 0 -# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent -# background. This is disabled by default, because dot on Windows does not seem -# to support this out of the box. -# -# Warning: Depending on the platform used, enabling this option may lead to -# badly anti-aliased labels on the edges of a graph (i.e. they become hard to -# read). -# The default value is: NO. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_TRANSPARENT = NO - # Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) support @@ -2395,14 +2731,18 @@ DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page # explaining the meaning of the various boxes and arrows in the dot generated # graphs. +# Note: This tag requires that UML_LOOK isn't set, i.e. the doxygen internal +# graphical representation for inheritance and collaboration diagrams is used. # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. GENERATE_LEGEND = YES -# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot +# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate # files that are used to generate the various graphs. +# +# Note: This setting is not only used for dot files but also for msc temporary +# files. # The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. DOT_CLEANUP = YES diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000000..497e42ffc2 --- /dev/null +++ b/Gemfile @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +source 'https://rubygems.org' + +gem 'rake' +gem 'yard' +gem 'yard-coderay' +gem 'yard-mruby' diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000000..6943833262 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,25 @@ +GEM + remote: https://rubygems.org/ + specs: + coderay (1.1.3) + rake (13.4.2) + yard (0.9.44) + yard-coderay (0.1.0) + coderay + yard + yard-mruby (0.3.0) + yard (~> 0.9.0) + +PLATFORMS + ruby + x86_64-darwin-21 + x86_64-linux + +DEPENDENCIES + rake + yard + yard-coderay + yard-mruby + +BUNDLED WITH + 2.4.10 diff --git a/LEGAL b/LEGAL index ecddf6b958..9697d5747e 100644 --- a/LEGAL +++ b/LEGAL @@ -3,3 +3,60 @@ LEGAL NOTICE INFORMATION All the files in this distribution are covered under the MIT license (see the file LICENSE) except some files mentioned below: + +- src/string.c (memsearch_swar): 2 clause BSD license code by Wojciech Muła (@WojciechMula) +- src/fmt_fp.c: public domain by Dave Hylands (@dhylands) +- mrbgems/mruby-dir/src/Win/dirent.c: MIT-like license by Kevlin Henney + +[src/string.c] +The implementation of mrb_memsearch_ss() is taken from +https://github.com/WojciechMula/sse4-strstr.git + +Copyright (c) 2008-2016, Wojciech Muła +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +[src/fmt_fp.c] + +The code in this function was inspired from Fred Bayer's pdouble.c. +Since pdouble.c was released as Public Domain, I'm releasing this +code as public domain as well. + +Dave Hylands + +The original code can be found in https://github.com/dhylands/format-float + +[mrbgems/mruby-dir/src/Win/dirent.c] used only for Windows platform + +Copyright Kevlin Henney, 1997, 2003, 2012. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose is hereby granted without fee, provided +that this copyright and permissions notice appear in all copies and +derivatives. + +This software is supplied "as is" without express or implied warranty. + +But that said, if there are any problems please get in touch. diff --git a/LICENSE b/LICENSE index 985dd36656..b21dbf39e0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2010-2021 mruby developers +Copyright (c) 2010- mruby developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), diff --git a/Makefile b/Makefile index 0884794986..65a5231dcb 100644 --- a/Makefile +++ b/Makefile @@ -1,19 +1,63 @@ # mruby is using Rake (https://ruby.github.io/rake/) as a build tool. RAKE = rake +DOCKER_COMPOSE = docker-compose +PRE_COMMIT = prek -all : +define check_command + @command -v $(1) >/dev/null 2>&1 || { \ + echo "Error: $(1) is not installed or not in PATH."; \ + exit 1; \ + } +endef + +# For colors +ifneq ($(shell tty -s),) + CYAN := $(shell tput setaf 6) + RESET := $(shell tput sgr0) +else + CYAN := + RESET := +endif + +.PHONY: all test clean check checkinstall checkupdate composecheck composetest check_rake check_docker_compose check_pre_commit help +.DEFAULT_GOAL := all + +all: check_rake ## build all targets, install (locally) in-repo $(RAKE) -.PHONY : all -test : all +test: check_rake all ## build and run all mruby tests $(RAKE) test -.PHONY : test -clean : +clean: check_rake ## clean all built and in-repo installed artifacts $(RAKE) clean -.PHONY : clean -check : - pre-commit run --all-files -.PHONY : check +check: check_pre_commit ## run all prek hooks against all files + $(PRE_COMMIT) run --all-files + +checkinstall: check_pre_commit ## install the prek hooks + $(PRE_COMMIT) install + +checkupdate: check_pre_commit ## check the prek hooks for updates + $(PRE_COMMIT) autoupdate + +composecheck: check_docker_compose check_pre_commit ## run all prek hooks against all files with docker-compose + $(DOCKER_COMPOSE) -p mruby run test $(PRE_COMMIT) run --all-files + +composetest: check_docker_compose ## build and run all mruby tests with docker-compose + $(DOCKER_COMPOSE) -p mruby run test + +check_rake: ## check if Rake is installed + $(call check_command, $(RAKE)) + +check_docker_compose: ## check if docker-compose is installed + $(call check_command, $(DOCKER_COMPOSE)) + +check_pre_commit: ## check if prek is installed + $(call check_command, $(PRE_COMMIT)) + +help: ## display this help message + @echo "Usage: make " + @echo + @echo "Available targets:" + @grep -E '^[a-z_-]+:.*##' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*## *"}; {printf " $(CYAN)%-20s$(RESET) %s\n", $$1, $$2}' diff --git a/NEWS b/NEWS deleted file mode 100644 index 8e8c1e41a0..0000000000 --- a/NEWS +++ /dev/null @@ -1,11 +0,0 @@ -* NEWS - -This document is a list of user visible feature changes made between -releases except for bug fixes. - -Note that each entry is kept so brief that no reason behind or -reference information is supplied with. For a full list of changes -with all sufficient information, see the ChangeLog file. - - -** Information about first release v1.0.0 diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000000..200b58b9e3 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,367 @@ +# User visible changes in `mruby4.0` from `mruby3.4` + +"**_NOTE_**:" are changes to be aware of. + +# The language + +## Pattern Matching + +mruby now supports pattern matching (case/in) syntax: + +- Basic pattern matching with `case`/`in` syntax ([dadfac6](https://github.com/mruby/mruby/commit/dadfac6)) +- Array pattern matching ([ec67fd9](https://github.com/mruby/mruby/commit/ec67fd9)) +- Hash pattern matching ([2147263](https://github.com/mruby/mruby/commit/2147263)) +- Find pattern matching (`[*pre, target, *post]`) ([6c4d98b](https://github.com/mruby/mruby/commit/6c4d98b)) +- Pin operator (`^variable`) ([1de6340](https://github.com/mruby/mruby/commit/1de6340)) +- Guard clauses (`if`/`unless` conditions) ([07ac110](https://github.com/mruby/mruby/commit/07ac110)) +- One-line pattern matching (`expr in pattern`) ([e76ce24](https://github.com/mruby/mruby/commit/e76ce24)) +- Brace-less hash pattern support ([e8096bf](https://github.com/mruby/mruby/commit/e8096bf)) + +## Other Language Changes + +- `&nil` in formal parameters to explicitly opt out of block arguments ([b07518e](https://github.com/mruby/mruby/commit/b07518e)) +- Trailing comma in method definition parameters: `def foo(a, b,)` ([f78334b](https://github.com/mruby/mruby/commit/f78334b)) +- Array/Hash/String subclasses can now override `[]` and `[]=` methods ([#6675](https://github.com/mruby/mruby/pull/6675)) +- `OP_SETIDX` optimization for Array and Hash ([ddd8fe1](https://github.com/mruby/mruby/commit/ddd8fe1)) +- `case`/`in` without `else` now raises `NoMatchingPatternError` ([d8de35b](https://github.com/mruby/mruby/commit/d8de35b)) +- Allow compound statement in parenthesized argument context ([919cbd8](https://github.com/mruby/mruby/commit/919cbd8)) + +# Changes in C API + +- **_NOTE_**: `mrb_alloca()` renamed to `mrb_temp_alloc()` ([7fe5c2e](https://github.com/mruby/mruby/commit/7fe5c2e)) +- **_NOTE_**: `mruby/ext/io.h` renamed to `mruby/io.h` ([2813f79](https://github.com/mruby/mruby/commit/2813f79)) +- `mrb_gc_add_region()` for contiguous heap region support ([072855a](https://github.com/mruby/mruby/commit/072855a)) +- `mrb_class_outer()` to get the outer class/module ([3a1b771](https://github.com/mruby/mruby/commit/3a1b771)) +- `MRB_ENSURE()` macro for exception-safe cleanup ([3ac682b](https://github.com/mruby/mruby/commit/3ac682b)) +- `mrb_time_get_tm()` for accessing struct tm ([daaaafe](https://github.com/mruby/mruby/commit/daaaafe)) +- `MRB_OPEN_FAILURE()` macro for checking mrb_open result ([40b0cb9](https://github.com/mruby/mruby/commit/40b0cb9)) +- `mrb_print_error()` now handles NULL gracefully ([8e50a45](https://github.com/mruby/mruby/commit/8e50a45)) +- `mrb_open()` returns mrb_state with exc set on init failure ([05ffe0c](https://github.com/mruby/mruby/commit/05ffe0c)) +- `mrb_utf8_to_buf()` for UTF-8 encoding consolidation ([7e28e68](https://github.com/mruby/mruby/commit/7e28e68)) +- `kh_is_end()` macro for safe khash iteration ([893cc75](https://github.com/mruby/mruby/commit/893cc75)) +- `mrb_bigint_p()` always defined regardless of bigint gem presence ([6c4a8c0](https://github.com/mruby/mruby/commit/6c4a8c0)) +- `RInteger` and `RFloat` added to `RVALUE` union ([13dbca0](https://github.com/mruby/mruby/commit/13dbca0)) + +# ROM Method Tables + +All built-in classes and most extension gems now use read-only method +tables stored in `.rodata` instead of heap-allocated hash tables. Method +definitions no longer consume heap memory, significantly reducing memory +footprint for embedded use. + +Core classes converted: BasicObject, Object, Module, Class, Kernel, +String, Array, Hash, Numeric, Integer, Float, NilClass, TrueClass, +FalseClass, Range, Symbol, Exception, Proc. + +Extension gems converted: mruby-string-ext, mruby-array-ext, mruby-set, +mruby-struct, mruby-class-ext, mruby-numeric-ext, mruby-random, +mruby-kernel-ext, mruby-complex, mruby-rational, mruby-io, mruby-socket, +mruby-method, mruby-metaprog, mruby-time, mruby-hash-ext, mruby-proc-ext, +mruby-symbol-ext, mruby-range-ext, mruby-object-ext. + +# GC and Memory + +- **_NOTE_**: `MRB_NO_PRESYM` removed; presym is now always enabled ([81689045](https://github.com/mruby/mruby/commit/81689045)) +- Replace `gcnext` gray linked list with fixed-size gray stack, reducing per-object overhead ([31fea170](https://github.com/mruby/mruby/commit/31fea170)) +- `mrb_gc_add_region()` for providing contiguous memory buffers as GC heap pages ([072855a](https://github.com/mruby/mruby/commit/072855a)) +- Chunk-based pool for symbol string allocation ([e05bd8f](https://github.com/mruby/mruby/commit/e05bd8f)) +- Reduce `IV_INITIAL_SIZE` from 4 to 2 ([6bd1f51](https://github.com/mruby/mruby/commit/6bd1f51)) +- Lossless float encoding using rotation in word boxing ([b6148c8](https://github.com/mruby/mruby/commit/b6148c8)) +- Lossless rotation encoding for 32-bit float32 word boxing ([14a5cfb](https://github.com/mruby/mruby/commit/14a5cfb)) +- Consolidated irep allocation for .mrb loading ([74fb045](https://github.com/mruby/mruby/commit/74fb045)) +- Object shapes (hidden classes) for `MRB_TT_OBJECT` IV storage, sharing key layouts across objects with the same instance variable assignment order ([8d10056](https://github.com/mruby/mruby/commit/8d10056)) + +# Build & Configuration + +- **_NOTE_**: `MRB_WORDBOX_NO_FLOAT_TRUNCATE` renamed to `MRB_WORDBOX_NO_INLINE_FLOAT` (old name still works) ([59e1fe2](https://github.com/mruby/mruby/commit/59e1fe2)) +- **_NOTE_**: `MRB_INT64` on 32-bit now requires `MRB_NO_BOXING` (other boxing modes cannot guarantee alignment for heap-allocated 64-bit integers) ([eaaa66b](https://github.com/mruby/mruby/commit/eaaa66b)) +- Amalgamation support via `rake amalgam` task ([d995ca2](https://github.com/mruby/mruby/commit/d995ca2)) +- New Platform: Cosmopolitan Libc ([#6681](https://github.com/mruby/mruby/pull/6681)) +- Emscripten: use native WASM exception handling ([ca364e3](https://github.com/mruby/mruby/commit/ca364e3)) +- HAL (Hardware Abstraction Layer) for platform abstraction in mruby-io, mruby-socket, mruby-dir, mruby-task ([74ca22f](https://github.com/mruby/mruby/commit/74ca22f)) +- `MRUBY_MIRB_READLINE` environment variable to control readline library selection ([0aafb83](https://github.com/mruby/mruby/commit/0aafb83)) +- MSYS2 drive letter support in build script ([77f6ffe](https://github.com/mruby/mruby/commit/77f6ffe)) +- Inter-gem headers separated from external API headers ([#6671](https://github.com/mruby/mruby/pull/6671)) + +# Changes in mrbgems + +## New Gems + +- **mruby-task**: Cooperative multitasking with preemptive scheduling ([ae0d7a0](https://github.com/mruby/mruby/commit/ae0d7a0)) +- **mruby-benchmark**: Benchmarking gem ([2f40f3d](https://github.com/mruby/mruby/commit/2f40f3d)) +- **mruby-strftime**: Time#strftime implementation ([b31e22f](https://github.com/mruby/mruby/commit/b31e22f)) + +## mruby-bin-mirb Improvements + +- Custom multi-line editor replacing readline ([527018c](https://github.com/mruby/mruby/commit/527018c)) +- Syntax highlighting for keywords, strings, result values, hash key symbols ([624272b](https://github.com/mruby/mruby/commit/624272b), [1713d4a](https://github.com/mruby/mruby/commit/1713d4a)) +- Automatic light/dark theme detection via OSC 11 ([db4c8d9](https://github.com/mruby/mruby/commit/db4c8d9)) +- Tab completion support ([2f15282](https://github.com/mruby/mruby/commit/2f15282)) +- Colored output for prompts and errors ([b36e0b4](https://github.com/mruby/mruby/commit/b36e0b4)) +- Auto-indentation and auto-dedent ([d52f318](https://github.com/mruby/mruby/commit/d52f318), [e901b6d](https://github.com/mruby/mruby/commit/e901b6d)) +- Command history with Up/Down navigation ([5f85c1b](https://github.com/mruby/mruby/commit/5f85c1b)) +- Line numbers in multi-line prompts ([5a3f0e2](https://github.com/mruby/mruby/commit/5a3f0e2)) +- UTF-8 multibyte character support ([4a97da3](https://github.com/mruby/mruby/commit/4a97da3)) + +## mruby-bigint Improvements + +- Toom-3 multiplication for large numbers ([99620804](https://github.com/mruby/mruby/commit/99620804)) +- Karatsuba multiplication for medium-sized numbers ([85e81072](https://github.com/mruby/mruby/commit/85e81072)) +- Balance multiplication for asymmetric operands ([0220ec2b](https://github.com/mruby/mruby/commit/0220ec2b)) +- Divide-and-conquer optimization for `to_s` ([990ff90f](https://github.com/mruby/mruby/commit/990ff90f)) +- Consolidated mpn layer for low-level limb operations ([9ef3362f](https://github.com/mruby/mruby/commit/9ef3362f)) +- Always use 32-bit limbs by default ([c747c77f](https://github.com/mruby/mruby/commit/c747c77f)) + +## Other Gem Changes + +- **_NOTE_**: `Hash#deconstruct_keys` removed for CRuby compatibility ([34b9412](https://github.com/mruby/mruby/commit/34b9412)) +- **mruby-enum-lazy**: Fix `Lazy#flat_map` to handle non-enumerable block return values ([#6765](https://github.com/mruby/mruby/pull/6765)) +- **mruby-array-ext**: Add `Array#find` and `Array#rfind` methods +- **mruby-io**: Add `IO#putc` and `Kernel#putc` ([baff6e6](https://github.com/mruby/mruby/commit/baff6e6)) +- **mruby-random**: Replace xoshiro with PCG for better memory efficiency ([f1bab01](https://github.com/mruby/mruby/commit/f1bab01)) +- **mruby-compiler**: Variable-sized AST nodes for reduced memory usage +- **mruby-compiler**: `no_return_value` context flag for script optimization ([613b03a](https://github.com/mruby/mruby/commit/613b03a)) +- `initialize_copy` and `respond_to_missing?` defined as private ([#6708](https://github.com/mruby/mruby/pull/6708)) +- Struct keyword argument initialization ([#6574](https://github.com/mruby/mruby/pull/6574)) + +# Compiler Improvements + +- Variable-sized AST nodes for reduced memory consumption ([821b989](https://github.com/mruby/mruby/commit/821b989)) +- Pattern matching bytecode optimizations ([21d4135](https://github.com/mruby/mruby/commit/21d4135)) +- Optimized masgn to generate literals directly into target registers ([fb5d966](https://github.com/mruby/mruby/commit/fb5d966)) +- Optimized splat of literal arrays in args/literals ([1cb8d73](https://github.com/mruby/mruby/commit/1cb8d73)) +- Early termination after too many parse errors ([510ebd7](https://github.com/mruby/mruby/commit/510ebd7)) +- Chunk array literals at 64 elements to reduce register pressure ([f98d641](https://github.com/mruby/mruby/commit/f98d641)) +- Chunk `%w()` and `%i()` literals to reduce register pressure ([62cf0dc](https://github.com/mruby/mruby/commit/62cf0dc)) + +# VM Optimizations + +New super-instructions that fuse common opcode sequences to reduce bytecode size and improve performance: + +- `OP_SEND0`/`OP_SSEND0`: Zero-argument method call, avoiding argument count setup ([9123ef4](https://github.com/mruby/mruby/commit/9123ef4)) +- `OP_TDEF`/`OP_SDEF`: Fused method definition combining TCLASS/SCLASS+METHOD+DEF into single instruction, saving 4 bytes per method ([8d4f47e](https://github.com/mruby/mruby/commit/8d4f47e)) +- `OP_GETIDX0`: Fast path for `array[0]` and `Array#first` access ([680f7ec](https://github.com/mruby/mruby/commit/680f7ec)) +- `OP_ADDILV`/`OP_SUBILV`: Local variable increment/decrement fusion for `i += n` patterns ([43f64b9](https://github.com/mruby/mruby/commit/43f64b9)) +- `OP_RETSELF`: Single-byte instruction for `return self` pattern ([a71db8c](https://github.com/mruby/mruby/commit/a71db8c)) +- `OP_RETNIL`: Single-byte instruction for `return nil` pattern ([64e30bf](https://github.com/mruby/mruby/commit/64e30bf)) +- `OP_RETTRUE`/`OP_RETFALSE`: Single-byte instructions for `return true`/`return false` patterns ([0b15727](https://github.com/mruby/mruby/commit/0b15727)) +- `OP_MATCHERR`: Pattern matching error with conditional execution ([944168a](https://github.com/mruby/mruby/commit/944168a)) +- `OP_BLKCALL`: Direct block call for `yield`, bypassing method dispatch (13-17% faster) ([3aa2872](https://github.com/mruby/mruby/commit/3aa2872)) + +Other optimizations: + +- 1.5x stack growth instead of linear growth for reduced reallocations ([f7988c93](https://github.com/mruby/mruby/commit/f7988c93)) +- Skip keyword argument hash duplication ([5970e350](https://github.com/mruby/mruby/commit/5970e350)) + +# Fixed GitHub Issues + +- [#5531](https://github.com/mruby/mruby/issues/5531) Hash recursion detection +- [#6506](https://github.com/mruby/mruby/issues/6506) Constant lookup in singleton class +- [#6507](https://github.com/mruby/mruby/issues/6507) tally multi-values +- [#6508](https://github.com/mruby/mruby/issues/6508) Enumerable#sum index +- [#6509](https://github.com/mruby/mruby/issues/6509) scope_new nregs initialization +- [#6515](https://github.com/mruby/mruby/issues/6515) y.tab.c in repository +- [#6516](https://github.com/mruby/mruby/issues/6516) Private backquote +- [#6554](https://github.com/mruby/mruby/issues/6554) Socket private #initialize +- [#6570](https://github.com/mruby/mruby/issues/6570) instance_eval crash +- [#6613](https://github.com/mruby/mruby/issues/6613) const_added hook during bootstrapping +- [#6635](https://github.com/mruby/mruby/issues/6635), [#6636](https://github.com/mruby/mruby/issues/6636) Colon3 constant lookup +- [#6637](https://github.com/mruby/mruby/issues/6637) arm64 mingw64 builtin setjmp/longjmp +- [#6642](https://github.com/mruby/mruby/issues/6642) Task segfault when sleep called from C +- [#6645](https://github.com/mruby/mruby/issues/6645) Set memory leak from double initialization +- [#6646](https://github.com/mruby/mruby/issues/6646) IO#gets negative length +- [#6647](https://github.com/mruby/mruby/issues/6647) IO#ungetc buffer overflow +- [#6648](https://github.com/mruby/mruby/issues/6648) sprintf buffer overread +- [#6649](https://github.com/mruby/mruby/issues/6649) Array#sort! use-after-realloc +- [#6650](https://github.com/mruby/mruby/issues/6650) Array#fill validation +- [#6652](https://github.com/mruby/mruby/issues/6652) Array comparison use-after-realloc +- [#6657](https://github.com/mruby/mruby/issues/6657) Exception handling for ||= on class variables +- [#6659](https://github.com/mruby/mruby/issues/6659) Super with keyword arguments +- [#6660](https://github.com/mruby/mruby/issues/6660) Regression on struct/array/hash == override with super +- [#6662](https://github.com/mruby/mruby/issues/6662) Array set operations use-after-free +- [#6664](https://github.com/mruby/mruby/issues/6664) Set#flatten memory leak +- [#6666](https://github.com/mruby/mruby/issues/6666) Regexp literal with encoding +- [#6668](https://github.com/mruby/mruby/issues/6668) Method#== for aliased methods and comparison bug +- [#6671](https://github.com/mruby/mruby/issues/6671) Separate inter-gem headers from external API headers +- [#6674](https://github.com/mruby/mruby/issues/6674) Document pattern matching limitations +- [#6675](https://github.com/mruby/mruby/issues/6675) Allow Hash#[] to be aliased again +- [#6687](https://github.com/mruby/mruby/issues/6687) Expand MRB_SYM/MRB_GVSYM support for symbols with special characters +- [#6698](https://github.com/mruby/mruby/issues/6698) Bigint tests fail on architectures other than x86_64 and i386 +- [#6701](https://github.com/mruby/mruby/issues/6701) Heap-use-after-free in mrb_vm_exec involving mruby-rational / mruby-bigint +- [#6702](https://github.com/mruby/mruby/issues/6702) mruby-bigint doesn't compile in C++ project +- [#6704](https://github.com/mruby/mruby/issues/6704) Heap-buffer-overflow in mrb_vm_exec via malformed source code +- [#6705](https://github.com/mruby/mruby/issues/6705) Can't get outer class of an object in C +- [#6713](https://github.com/mruby/mruby/issues/6713) mruby-polarssl not work +- [#6720](https://github.com/mruby/mruby/issues/6720) Random float range: different behavior from CRuby +- [#6722](https://github.com/mruby/mruby/issues/6722) RBreak size overflow on 32-bit platforms with MRB_NO_BOXING +- [#6740](https://github.com/mruby/mruby/issues/6740) `%w()`/`%i()` register pressure with large literals +- [#6741](https://github.com/mruby/mruby/issues/6741) `case`/`in` without `else` should raise `NoMatchingPatternError` +- [#6760](https://github.com/mruby/mruby/issues/6760) `mrb_gc_unregister()` not removing all matching entries + +# Merged Pull Requests + +- [#6418](https://github.com/mruby/mruby/pull/6418) Add `ls-lint` with GitHub Actions +- [#6492](https://github.com/mruby/mruby/pull/6492) fix a typo, update specs +- [#6493](https://github.com/mruby/mruby/pull/6493) Fix TYPO in memory.md +- [#6495](https://github.com/mruby/mruby/pull/6495) Remove `MRB_ENDIAN_LOHI()` that is no longer in use +- [#6497](https://github.com/mruby/mruby/pull/6497) gha: update `build.yml` try `windows-2025` image +- [#6498](https://github.com/mruby/mruby/pull/6498) Clean up and standardize the pre-commit config +- [#6501](https://github.com/mruby/mruby/pull/6501) Update pre-commit Node.js version to `v22.14.0 LTS` +- [#6502](https://github.com/mruby/mruby/pull/6502) pre-commit: update prettier to the latest version +- [#6503](https://github.com/mruby/mruby/pull/6503) misc: fix typos +- [#6505](https://github.com/mruby/mruby/pull/6505) mrbgems: fix spelling +- [#6510](https://github.com/mruby/mruby/pull/6510) Fixed class method visibility via `module_function` +- [#6511](https://github.com/mruby/mruby/pull/6511) Exclude the external project "lrama" from pre-commit +- [#6513](https://github.com/mruby/mruby/pull/6513) mruby 3.4.0 released +- [#6517](https://github.com/mruby/mruby/pull/6517) core/codegen.c: remove unneeded duplicate semicolon +- [#6518](https://github.com/mruby/mruby/pull/6518) Change mrbc_args.flags bit width from 2 to 3 +- [#6519](https://github.com/mruby/mruby/pull/6519) Add `tools/lrama` to `.prettierignore` +- [#6520](https://github.com/mruby/mruby/pull/6520) pre-commit: autoupdate and update node LTS version +- [#6521](https://github.com/mruby/mruby/pull/6521) Add codespell config file `.codespellrc` +- [#6522](https://github.com/mruby/mruby/pull/6522) gha: label more files +- [#6523](https://github.com/mruby/mruby/pull/6523) add `rand(Range)` and unify implementations of `Random#rand` and `Kernel#rand` +- [#6524](https://github.com/mruby/mruby/pull/6524) Fix Kernel#p when no argument +- [#6525](https://github.com/mruby/mruby/pull/6525) Skip adding empty input to mirb history +- [#6526](https://github.com/mruby/mruby/pull/6526) Add build config for Luckfox Pico embedded SBC +- [#6528](https://github.com/mruby/mruby/pull/6528) misc: fix spelling +- [#6530](https://github.com/mruby/mruby/pull/6530) Revert "class.c (find_visibility_scope): when callinfo returns, \*ep == NULL; #6512" +- [#6531](https://github.com/mruby/mruby/pull/6531) Improve method table performance by rehashing at 75% load factor +- [#6532](https://github.com/mruby/mruby/pull/6532) Reverted method table optimizations to prioritize memory savings +- [#6533](https://github.com/mruby/mruby/pull/6533) Fix calling `extended` callback +- [#6534](https://github.com/mruby/mruby/pull/6534) Add descriptive comment to mrb_read_float function +- [#6535](https://github.com/mruby/mruby/pull/6535) Added descriptive comments for functions/macros in src/mempool.c +- [#6536](https://github.com/mruby/mruby/pull/6536) Add descriptive comments to public functions in src/debug.c +- [#6537](https://github.com/mruby/mruby/pull/6537) Updated comments in `cdump.c` to remove the `@brief` tag +- [#6539](https://github.com/mruby/mruby/pull/6539) Add descriptive comments for functions in src/load.c +- [#6540](https://github.com/mruby/mruby/pull/6540) Add descriptive comments to MRB_API functions in object.c +- [#6541](https://github.com/mruby/mruby/pull/6541) Add descriptive comments for MRB_API functions in src/array.c +- [#6542](https://github.com/mruby/mruby/pull/6542) Add descriptive comments to MRB_API functions in src/symbol.c +- [#6543](https://github.com/mruby/mruby/pull/6543) Add descriptive comments to several functions in src/dump.c +- [#6544](https://github.com/mruby/mruby/pull/6544) Fix build strings that must be mutable +- [#6545](https://github.com/mruby/mruby/pull/6545) Add descriptive comments for MRB_API functions in src/class.c +- [#6548](https://github.com/mruby/mruby/pull/6548) Add descriptive comments to MRB_API functions in src/etc.c +- [#6549](https://github.com/mruby/mruby/pull/6549) Add descriptive comments to kernel functions +- [#6550](https://github.com/mruby/mruby/pull/6550) Add descriptive comments for MRB_API functions in src/proc.c +- [#6551](https://github.com/mruby/mruby/pull/6551) Add descriptive comments for MRB_API functions in src/state.c +- [#6552](https://github.com/mruby/mruby/pull/6552) Fix: Correct placement of comments in src/variable.c +- [#6553](https://github.com/mruby/mruby/pull/6553) Add descriptive comments for MRB_API functions in src/vm.c +- [#6555](https://github.com/mruby/mruby/pull/6555) `mrb_mt_foreach()` needs to update the pointer at each loop +- [#6556](https://github.com/mruby/mruby/pull/6556) `iv_foreach()` needs to update the pointer at each loop +- [#6560](https://github.com/mruby/mruby/pull/6560) Refactor: Improve Set GC marking and freeing +- [#6561](https://github.com/mruby/mruby/pull/6561) pre-commit updates and fix prettier entrypoint +- [#6562](https://github.com/mruby/mruby/pull/6562) misc: fix spelling word case +- [#6563](https://github.com/mruby/mruby/pull/6563) pre-commit add rubocop with one rule spaces for indentation +- [#6564](https://github.com/mruby/mruby/pull/6564) Remove jumanjihouse pre-commit hooks no longer maintained +- [#6565](https://github.com/mruby/mruby/pull/6565) Rubocop: fix target Ruby version; add two more cops; fix lint error +- [#6566](https://github.com/mruby/mruby/pull/6566) Removed unreferenced variables in `CrossBuild#run_bintest` +- [#6567](https://github.com/mruby/mruby/pull/6567) Avoid array object creation in `cmd_bin` method in bintest +- [#6568](https://github.com/mruby/mruby/pull/6568) mruby-bin-debugger depends on mruby-bin-mrbc in bintest +- [#6569](https://github.com/mruby/mruby/pull/6569) sed s/Mruby/MRuby/g +- [#6571](https://github.com/mruby/mruby/pull/6571) Update limitations.md to add behavior on small hash +- [#6572](https://github.com/mruby/mruby/pull/6572) Add Claude Code GitHub Workflow +- [#6573](https://github.com/mruby/mruby/pull/6573) pre-commit fixes and updates +- [#6574](https://github.com/mruby/mruby/pull/6574) Support initializing structs via keyword arguments +- [#6575](https://github.com/mruby/mruby/pull/6575) Fix typo in file time methods +- [#6581](https://github.com/mruby/mruby/pull/6581) Merge `mrb_obj_iv_inspect()` into `mrb_obj_inspect()` +- [#6582](https://github.com/mruby/mruby/pull/6582) Stricter type tag in `mrb_obj_alloc()` +- [#6583](https://github.com/mruby/mruby/pull/6583) Add fallback to local build_config.rb before using default configuration +- [#6585](https://github.com/mruby/mruby/pull/6585) Fix typo in mruby3.2 docs +- [#6586](https://github.com/mruby/mruby/pull/6586) Makefile: refactor add docs and add command line `help` target +- [#6587](https://github.com/mruby/mruby/pull/6587) Add Set#hash tests +- [#6588](https://github.com/mruby/mruby/pull/6588) Add CodeQL Analysis for GitHub Actions +- [#6589](https://github.com/mruby/mruby/pull/6589) Add pre-commit hook `check-zip-file-is-not-committed` +- [#6591](https://github.com/mruby/mruby/pull/6591) mruby-eval fix license link in README +- [#6593](https://github.com/mruby/mruby/pull/6593) README: Add Contributors Avatars, Star History, Table of Contents +- [#6598](https://github.com/mruby/mruby/pull/6598) Fix heap buffer overflow in `#method_missing` +- [#6599](https://github.com/mruby/mruby/pull/6599) pre-commit: run `markdown-link-check`, `oxipng`, `prettier` manually +- [#6600](https://github.com/mruby/mruby/pull/6600) `dreamcast_shelf build config`: update to use KallistiOS wrappers +- [#6601](https://github.com/mruby/mruby/pull/6601) fix: skip local build_config.rb when working in MRUBY_ROOT +- [#6602](https://github.com/mruby/mruby/pull/6602) Improved iseq annotations for `new` and `!=` +- [#6604](https://github.com/mruby/mruby/pull/6604) pre-commit config updates +- [#6607](https://github.com/mruby/mruby/pull/6607) fix bigint on raspberry pi +- [#6610](https://github.com/mruby/mruby/pull/6610) Extract golden ratio prime into constant +- [#6614](https://github.com/mruby/mruby/pull/6614) Fix uninitialized variable in io_gets causing segmentation fault +- [#6617](https://github.com/mruby/mruby/pull/6617) Fix various minor problems and speed up build +- [#6618](https://github.com/mruby/mruby/pull/6618) Stop generating unnecessary C++ files in mruby-bin-mruby +- [#6621](https://github.com/mruby/mruby/pull/6621) Set up all GEMS before mruby core tasks definition +- [#6624](https://github.com/mruby/mruby/pull/6624) Fixed wrong `MRuby::Build.current` at the top level of `mrbgem.rake` +- [#6628](https://github.com/mruby/mruby/pull/6628) Revert `File.absolute_path` logic +- [#6629](https://github.com/mruby/mruby/pull/6629) pre-commit update +- [#6631](https://github.com/mruby/mruby/pull/6631) Revert "Rakefile: make the whole thing parallel unless SERIAL=1" +- [#6633](https://github.com/mruby/mruby/pull/6633) Fix a heap-buffer-overflow in str strip! methods +- [#6643](https://github.com/mruby/mruby/pull/6643) Fix crash caused by an incorrect node type check in `codegen_masgn` +- [#6651](https://github.com/mruby/mruby/pull/6651) Address stack-use-after-return in the mruby bigint implementation +- [#6653](https://github.com/mruby/mruby/pull/6653) Improve HAL-related components for MinGW +- [#6655](https://github.com/mruby/mruby/pull/6655) Preventing Memory Leaks in `Array#__combination_init` +- [#6656](https://github.com/mruby/mruby/pull/6656) Fix integer overflow in allocation size calculation +- [#6663](https://github.com/mruby/mruby/pull/6663) Added the `kh_is_end()` macro function +- [#6665](https://github.com/mruby/mruby/pull/6665) Fixed use-after-free with `Set#join` +- [#6670](https://github.com/mruby/mruby/pull/6670) Arranging VM dispatch macros +- [#6673](https://github.com/mruby/mruby/pull/6673) Adjust broken license links; clean up Markdown +- [#6677](https://github.com/mruby/mruby/pull/6677) gha: run pre-commit with `--color=always` +- [#6678](https://github.com/mruby/mruby/pull/6678) Put ls-lint and pre-commit in separate workflow files +- [#6679](https://github.com/mruby/mruby/pull/6679) pre-commit autoupdate; update node and prettier +- [#6681](https://github.com/mruby/mruby/pull/6681) Add Cosmopolitan Libc build configuration +- [#6689](https://github.com/mruby/mruby/pull/6689) docs: fix pre-commit manual hooks; fix link +- [#6694](https://github.com/mruby/mruby/pull/6694) Fix mirb build under Cosmopolitan +- [#6695](https://github.com/mruby/mruby/pull/6695) Dependabot: add a cooldown period for new releases +- [#6696](https://github.com/mruby/mruby/pull/6696) Fix parse error with required kwargs and omitted parens +- [#6699](https://github.com/mruby/mruby/pull/6699) Fix mruby-task for PicoRuby Integration +- [#6700](https://github.com/mruby/mruby/pull/6700) Fix float/double pack/unpack on s390x +- [#6706](https://github.com/mruby/mruby/pull/6706) Refactor task class to use symbol IDs +- [#6708](https://github.com/mruby/mruby/pull/6708) `initialize_copy` and `respond_to_missing?` defined as private +- [#6709](https://github.com/mruby/mruby/pull/6709) Add the `MRB_ENSURE()` macro +- [#6711](https://github.com/mruby/mruby/pull/6711) Fix out of bounds read and write in IO.select +- [#6714](https://github.com/mruby/mruby/pull/6714) Fix OP_DEBUG operand type and add NULL check for debug_op_hook +- [#6716](https://github.com/mruby/mruby/pull/6716) Fixes identity for proc object +- [#6717](https://github.com/mruby/mruby/pull/6717) Fix mruby-task: wrapping by critical section and setting initial task receiver +- [#6718](https://github.com/mruby/mruby/pull/6718) Add installation instructions for conda and Homebrew +- [#6723](https://github.com/mruby/mruby/pull/6723) Add `RInteger` and `RFloat` to `RVALUE` +- [#6727](https://github.com/mruby/mruby/pull/6727) Language documentation: update wording of "overloading" section +- [#6729](https://github.com/mruby/mruby/pull/6729) Simplifying dependency addition for gensym task +- [#6730](https://github.com/mruby/mruby/pull/6730) Simplifying presym file generation actions +- [#6733](https://github.com/mruby/mruby/pull/6733) Include `mruby/presym.h` for all source files +- [#6734](https://github.com/mruby/mruby/pull/6734) Chunk array literals at 64 elements to reduce register pressure +- [#6735](https://github.com/mruby/mruby/pull/6735) Prevent full recompilation without changes to presym file +- [#6739](https://github.com/mruby/mruby/pull/6739) Fix MSYS2 build error with drive letters +- [#6743](https://github.com/mruby/mruby/pull/6743) Chunk `%w()` and `%i()` literals to reduce register pressure +- [#6744](https://github.com/mruby/mruby/pull/6744) Raise `NoMatchingPatternError` in `case`/`in` without `else` +- [#6747](https://github.com/mruby/mruby/pull/6747) Correctly handle empty hash as default named argument +- [#6749](https://github.com/mruby/mruby/pull/6749) Fix microcontroller profile +- [#6750](https://github.com/mruby/mruby/pull/6750) Fix out-of-bounds read and divide-by-zero in `Array#product` +- [#6752](https://github.com/mruby/mruby/pull/6752) Fix `attr_reader`-generated methods accepting extra arguments +- [#6753](https://github.com/mruby/mruby/pull/6753) Further optimize `Array#product` +- [#6754](https://github.com/mruby/mruby/pull/6754) Mark `attr_reader` procs as noarg +- [#6755](https://github.com/mruby/mruby/pull/6755) Reload `ci` after `mrb_hash_delete_key()` in keyword argument handling +- [#6756](https://github.com/mruby/mruby/pull/6756) Avoid impact of object modifications caused by `mrb_vm_exec()` calls +- [#6758](https://github.com/mruby/mruby/pull/6758) Don't assign result of `mrb_funcall()` directly to `regs` +- [#6759](https://github.com/mruby/mruby/pull/6759) Define `mrb_bigint_p()` always +- [#6761](https://github.com/mruby/mruby/pull/6761) Fix `mrb_gc_unregister()` to remove all matching entries +- [#6762](https://github.com/mruby/mruby/pull/6762) Write generated test C files atomically to avoid build race condition +- [#6765](https://github.com/mruby/mruby/pull/6765) Fix `Lazy#flat_map` to handle non-enumerable block return values +- [#6767](https://github.com/mruby/mruby/pull/6767) Allow compound statement in parenthesized argument context +- [#6780](https://github.com/mruby/mruby/pull/6780) Fix `String#prepend` with self-referencing arguments +- [#6781](https://github.com/mruby/mruby/pull/6781) Protect `sprintf` format string from mutation during callbacks +- [#6783](https://github.com/mruby/mruby/pull/6783) Pin GitHub Actions workflows to commit hashes + +# Security Fixes + +- Buffer overflow in bigint uadd ([3f2611e](https://github.com/mruby/mruby/commit/3f2611e)) +- Stack buffer overflow in Montgomery reduction ([edce0a3](https://github.com/mruby/mruby/commit/edce0a3)) +- Buffer overflow in pack_uu encoding ([2993302](https://github.com/mruby/mruby/commit/2993302)) +- Buffer overflow in IO#ungetc ([01ab2ff](https://github.com/mruby/mruby/commit/01ab2ff)) +- Heap-buffer-overflow in pattern alternation codegen ([eea9e30](https://github.com/mruby/mruby/commit/eea9e30)) +- Out of bounds read and write in IO.select ([44831711](https://github.com/mruby/mruby/commit/44831711)) +- Off-by-one in bounds check for symbol names and pool strings in load.c ([b3b8c01](https://github.com/mruby/mruby/commit/b3b8c01)) +- Use-after-free in Set operations ([a6b55e7](https://github.com/mruby/mruby/commit/a6b55e7)) +- Use-after-free in Array set operations ([729b84c](https://github.com/mruby/mruby/commit/729b84c)) +- Use-after-free in Set#join ([0e653eb](https://github.com/mruby/mruby/commit/0e653eb)) +- Use-after-realloc in Array#sort! ([eb39897](https://github.com/mruby/mruby/commit/eb39897)) +- Heap-use-after-free in insertion_sort ([099d2c47](https://github.com/mruby/mruby/commit/099d2c47)) +- Integer overflow in str_check_length ([6afff1c3](https://github.com/mruby/mruby/commit/6afff1c3)) +- Integer overflow in Integer#lcm ([070bef24](https://github.com/mruby/mruby/commit/070bef24)) +- Heap buffer overflow in `#method_missing` ([550d10a](https://github.com/mruby/mruby/commit/550d10a)) +- Out-of-bounds read and divide-by-zero in `Array#product` ([8441eaf](https://github.com/mruby/mruby/commit/8441eaf)) +- Heap buffer overflow in `String#prepend` with self-referencing arguments ([18ba026](https://github.com/mruby/mruby/commit/18ba026)) +- Use-after-free in `sprintf` via `to_s` callback mutating format string ([48fc422](https://github.com/mruby/mruby/commit/48fc422)) +- Multiple memory leak fixes in bigint, Set, Array, and Task gems diff --git a/README.md b/README.md index 190fca3f40..90b674d720 100644 --- a/README.md +++ b/README.md @@ -1,41 +1,70 @@ -# mruby - -[![Build Status][build-status-img]][travis-ci] -[![GitHub Super-Linter](https://github.com/mruby/mruby/workflows/Lint%20Code%20Base/badge.svg)](https://github.com/marketplace/actions/super-linter) +
+

+ + The mruby programming language + +

+

mruby

+ + GitHub Super-Linter + +
+ +### Table of contents + +- [What is mruby](#what-is-mruby) +- [How to get mruby](#how-to-get-mruby) +- [mruby homepage](#mruby-homepage) +- [Mailing list](#mailing-list) +- [How to compile, test, and install (mruby and gems)](#how-to-compile-test-and-install-mruby-and-gems) +- [Amalgamation (single-file build)](#amalgamation-single-file-build) +- [Building documentation](#building-documentation) +- [How to customize mruby (mrbgems)](#how-to-customize-mruby-mrbgems) +- [Index of Document](#index-of-document) +- [License](#license) +- [Note for License](#note-for-license) +- [How to Contribute](#how-to-contribute) +- [Star History](#star-history) +- [Contributors](#contributors) ## What is mruby mruby is the lightweight implementation of the Ruby language complying to (part -of) the [ISO standard][ISO-standard]. Its syntax is Ruby 2.x compatible. +of) the [ISO standard][ISO-standard] with more recent features provided by Ruby 4.x. +Also, its syntax is Ruby 4.x compatible. -mruby can be linked and embedded within your application. We provide the -interpreter program "mruby", and the interactive mruby shell "mirb" as examples. -You can also compile Ruby programs into compiled byte code using the mruby -compiler "mrbc". All those tools reside in the "bin" directory. "mrbc" is -also able to generate compiled byte code in a C source file, see the "mrbtest" -program under the "test" directory for an example. +You can link and embed mruby within your application. The "mruby" interpreter +program and the interactive "mirb" shell are provided as examples. You can also +compile Ruby programs into compiled byte code using the "mrbc" compiler. All +these tools are located in the "bin" directory. "mrbc" can also generate +compiled byte code in a C source file. See the "mrbtest" program under the +"test" directory for an example. This achievement was sponsored by the Regional Innovation Creation R&D Programs of the Ministry of Economy, Trade and Industry of Japan. ## How to get mruby -The stable version 3.1.0 of mruby can be downloaded via the following URL: [https://github.com/mruby/mruby/archive/3.1.0.zip](https://github.com/mruby/mruby/archive/3.1.0.zip) +To get mruby, you can download the stable version 4.0.0 from the official mruby +GitHub repository or clone the trunk of the mruby source tree with the "git +clone" command. You can also install and compile mruby using [ruby-install](https://github.com/postmodern/ruby-install), [ruby-build](https://github.com/rbenv/ruby-build), [rvm](https://github.com/rvm/rvm), [conda](https://anaconda.org/channels/conda-forge/packages/mruby/overview) or [Homebrew](https://formulae.brew.sh/formula/mruby). + +The release candidate version 4.0.0 of mruby can be downloaded via the following URL: [https://github.com/mruby/mruby/archive/4.0.0-rc3.zip](https://github.com/mruby/mruby/archive/4.0.0-rc3.zip) The latest development version of mruby can be downloaded via the following URL: [https://github.com/mruby/mruby/zipball/master](https://github.com/mruby/mruby/zipball/master) The trunk of the mruby source tree can be checked out with the following command: -``` +```console $ git clone https://github.com/mruby/mruby.git ``` -You can also install and compile mruby using [ruby-install](https://github.com/postmodern/ruby-install), [ruby-build](https://github.com/rbenv/ruby-build) or [rvm](https://github.com/rvm/rvm). +## mruby homepage -## mruby home-page - -The URL of the mruby home-page is: . +The URL of the mruby homepage is: . ## Mailing list @@ -43,31 +72,81 @@ We don't have a mailing list, but you can use [GitHub issues](https://github.com ## How to compile, test, and install (mruby and gems) -See the [compile.md](doc/guides/compile.md) file. +For the simplest case, type + +```console +rake all test +``` + +See the [compile.md](doc/guides/compile.md) file for the detail. + +## Amalgamation (single-file build) + +mruby supports amalgamation, which combines all source files into a single +`mruby.c` and `mruby.h` for easy embedding (similar to SQLite). + +```console +rake amalgam +``` + +Output files are generated in `build/host/amalgam/`. To use: + +```console +gcc -I./build/host/amalgam your_app.c ./build/host/amalgam/mruby.c -o your_app -lm +``` ## Building documentation -There are two sets of documentation in mruby: the mruby API (generated by yard) and C API (Doxygen) +There are two sets of documentation in mruby: the mruby API (generated by YARD) and C API (Doxygen and Graphviz) To build both of them, simply go -``` +```console rake doc ``` You can also view them in your browser -``` +```console rake view_api rake view_capi ``` ## How to customize mruby (mrbgems) -mruby contains a package manager called *mrbgems*. To create extensions -in C and/or Ruby you should create a *GEM*. For a documentation of how to -use mrbgems consult the file [mrbgems.md](doc/guides/mrbgems.md). -For example code of how to use mrbgems look into the folder [examples/mrbgems/](examples/mrbgems). +mruby contains a package manager called "mrbgems" that you can use to create +extensions in C and/or Ruby. For a guide on how to use mrbgems, consult the +[mrbgems.md](doc/guides/mrbgems.md) file, and for example code, refer to the +[examples/mrbgems/](examples/mrbgems) folder. + +## Index of Document + + + + + +- [About the Limitations of mruby](doc/limitations.md) +- [About Amalgamation (Single-File Build)](doc/guides/amalgamation.md) +- [C API Reference](doc/guides/capi.md) +- [About the Compile](doc/guides/compile.md) +- [About the Debugger with the `mrdb` Command](doc/guides/debugger.md) +- [About GC Arena](doc/guides/gc-arena-howto.md) +- [Getting Started with mruby](doc/guides/getting-started.md) +- [About the mruby directory structure](doc/guides/hier.md) +- [About Linking with `libmruby`](doc/guides/link.md) +- [About Memory Allocator Customization and Heap Regions](doc/guides/memory.md) +- [About Build-time Configurations](doc/guides/mrbconf.md) +- [About the Build-time Library Manager](doc/guides/mrbgems.md) +- [ROM Method Tables for Memory-Efficient Method Registration](doc/guides/rom-method-table.md) +- [About the Symbols](doc/guides/symbol.md) +- [Internal Implementation / About mruby Architecture](doc/internal/architecture.md) +- [Internal Implementation / About Value Boxing](doc/internal/boxing.md) +- [Internal Implementation / About mruby Virtual Machine Instructions](doc/internal/opcode.md) + + ## License @@ -79,7 +158,7 @@ mruby has chosen a MIT License due to its permissive license allowing developers to target various environments such as embedded systems. However, the license requires the display of the copyright notice and license information in manuals for instance. Doing so for big projects can be -complicated or troublesome. This is why mruby has decided to display "mruby +complicated or troublesome. This is why mruby has decided to display "mruby developers" as the copyright name to make it simple conventionally. In the future, mruby might ask you to distribute your new code (that you will commit,) under the MIT License as a member of @@ -92,11 +171,16 @@ Please ask us if you want to distribute your code under another license. ## How to Contribute -See the [contribution guidelines][contribution-guidelines], and then send a pull -request to . We consider you have granted -non-exclusive right to your contributed code under MIT license. +To contribute to mruby, please refer to the [contribution guidelines][contribution-guidelines] and send a pull request to the [mruby GitHub repository](https://github.com/mruby/mruby). +By contributing, you grant non-exclusive rights to your code under the MIT License. + +## Star History + +[![mruby Star History](https://api.star-history.com/svg?repos=mruby/mruby&type=Date)](https://www.star-history.com/#mruby/mruby&Date) + +## Contributors + +[![mruby Contributors](https://contrib.rocks/image?repo=mruby/mruby&anon=1&max=500)](https://github.com/mruby/mruby/graphs/contributors) -[ISO-standard]: https://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=59579 -[build-status-img]: https://travis-ci.org/mruby/mruby.svg?branch=master -[contribution-guidelines]: https://github.com/mruby/mruby/blob/master/CONTRIBUTING.md -[travis-ci]: https://travis-ci.org/mruby/mruby +[ISO-standard]: https://www.iso.org/standard/59579.html +[contribution-guidelines]: CONTRIBUTING.md diff --git a/Rakefile b/Rakefile index 93b5f668d8..3bab7ad7cb 100644 --- a/Rakefile +++ b/Rakefile @@ -1,4 +1,3 @@ -# encoding: utf-8 # Build description. # basic build file for mruby MRUBY_ROOT = File.dirname(File.expand_path(__FILE__)) @@ -17,6 +16,11 @@ require "mruby/build" MRUBY_CONFIG = MRuby::Build.mruby_config_path load MRUBY_CONFIG +# set up all gems +MRuby.each_target do + gems.setup(self) if enable_gems? +end + # load basic rules MRuby.each_target do |build| build.define_rules @@ -32,6 +36,8 @@ load "#{MRUBY_ROOT}/tasks/presym.rake" load "#{MRUBY_ROOT}/tasks/test.rake" load "#{MRUBY_ROOT}/tasks/benchmark.rake" load "#{MRUBY_ROOT}/tasks/doc.rake" +load "#{MRUBY_ROOT}/tasks/install.rake" +load "#{MRUBY_ROOT}/tasks/amalgam.rake" ############################## # generic build targets, rules @@ -57,7 +63,7 @@ task :clean do rm_rf build.build_dir rm_f build.products end - puts "Cleaned up target build folder" + puts "Cleaned up target build directory" end desc "clean everything!" @@ -65,37 +71,32 @@ task :deep_clean => %w[clean doc:clean] do MRuby.each_target do |build| rm_rf build.gem_clone_dir end - puts "Cleaned up mrbgems build folder" + rm_rf "#{MRUBY_ROOT}/bin" + rm_rf "#{MRUBY_ROOT}/build" + puts "Cleaned up mrbgems build directory" end -PREFIX = ENV['PREFIX'] || ENV['INSTALL_PREFIX'] || '/usr/local' - -desc "install compiled products" -task :install => :install_bin do - if host = MRuby.targets['host'] - install_D host.libmruby_static, File.join(PREFIX, "lib", File.basename(host.libmruby_static)) - # install mruby.h and mrbconf.h - Dir.glob(File.join(MRUBY_ROOT, "include", "*.h")) do |src| - install_D src, File.join(PREFIX, "include", File.basename(src)) - end - Dir.glob(File.join(MRUBY_ROOT, "include", "mruby", "*.h")) do |src| - install_D src, File.join(PREFIX, "include", "mruby", File.basename(src)) - end - Dir.glob(File.join(File.join(MRUBY_ROOT, "build", "host", "include", "mruby", "presym", "*.h"))) do |src| - install_D src, File.join(PREFIX, "include", "mruby", "presym", File.basename(src)) - end - end +desc "run all pre-commit hooks against all files" +task :check do + sh "prek run --all-files" end -desc "install compiled executable (on host)" -task :install_bin => :all do - if host = MRuby.targets['host'] - Dir.glob(File.join(MRUBY_ROOT, "bin", "*")) do |src| - install_D src, File.join(PREFIX, "bin", File.basename(src)) - end - end +desc "install the pre-commit hooks" +task :checkinstall do + sh "prek install" end -task :check do - sh "pre-commit run --all-files" +desc "check the pre-commit hooks for updates" +task :checkupdate do + sh "prek autoupdate" +end + +desc "run all pre-commit hooks against all files with docker-compose" +task :composecheck do + sh "docker-compose -p mruby run test prek run --all-files" +end + +desc "build and run all mruby tests with docker-compose" +task :composetest do + sh "docker-compose -p mruby run test" end diff --git a/SECURITY.md b/SECURITY.md index 8f4ebf9fce..574749e40c 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,19 +2,53 @@ ## Reporting a Vulnerability -If you have any security concern, contact . +To report a security vulnerability, please email the mruby team at . We appreciate your efforts to disclose your findings responsibly. ## Scope -We consider the following issues as vulnerabilities: +mruby is an embeddable Ruby implementation. Its security model is designed for integration into a host application, which is responsible for sandboxing and resource management. This policy defines what we consider a security vulnerability within the mruby interpreter itself. -* Remote code execution -* Crash caused by a valid Ruby script +### High Priority Security Vulnerabilities -We *don't* consider the following issues as vulnerabilities: +We consider the following issues to be **high priority security vulnerabilities**: -* Runtime C undefined behavior (including integer overflow) -* Crash caused by misused API -* Crash caused by modified compiled binary -* ASAN/Valgrind warning for too big memory allocation - mruby assumes `malloc(3)` returns `NULL` for too big allocations +- **Remote Code Execution (RCE)**: The ability to execute arbitrary machine code or shell commands from within a Ruby script, beyond the intended execution scope of the script itself. + +### Lower Priority: Crashes (Preferably Report as Bugs) + +We **accept but deprioritize** the following issues. We recommend reporting them as **bug reports** on our issue tracker rather than security reports: + +- **VM Crash on Valid Ruby Code**: Segmentation faults, assertion failures, or other interpreter crashes triggered by syntactically and semantically valid Ruby scripts. + - _Recommendation_: Please report these as bugs on our issue tracker. + - _Rationale_: While we will fix these issues, they typically only result in denial of service (DoS), not arbitrary code execution. They are lower priority than RCE vulnerabilities. + - _Note_: This does not include standard Ruby exceptions like `TypeError` or `ZeroDivisionError`, which are expected behavior. + - _Example_: A segmentation fault when running `[1, 2, 3].map { |x| x * 2 }` is best reported as a bug. + +### Out of Scope: Not Considered Security Vulnerabilities + +We do **not** consider the following issues to be security vulnerabilities: + +- **Resource Exhaustion**: Infinite loops, excessive memory allocation, or high CPU usage originating from a Ruby script. + - _Rationale_: The host application is responsible for implementing resource limits, sandboxing, and execution timeouts. mruby provides the execution engine; the host provides the constraints. + - _Example_: `loop {}` or `"a" * (2**30)` are not vulnerabilities, even if they lead to memory or CPU exhaustion. + +- **Crashes from Malformed Bytecode**: Crashes resulting from loading or executing corrupted or intentionally malformed `.mrb` files. + - _Rationale_: mruby's bytecode format is not a security boundary. Applications should only execute bytecode from trusted sources. + - _Example_: A crash discovered by fuzzing `.mrb` files is not considered a vulnerability. + +- **Crashes from C API Misuse**: Crashes caused by incorrect usage of mruby's C API from the embedding application. + - _Rationale_: The C API is a trusted interface for developers. The caller is responsible for adhering to the API contract (e.g., not passing `NULL` pointers, managing object lifetimes correctly). + - _Example_: Calling `mrb_funcall()` with an invalid `mrb_state*` pointer is not a vulnerability. + +- **Theoretical Undefined Behavior (UB)**: Issues reported by tools like ASAN, UBSan, or Valgrind that do not lead to a demonstrable crash or exploitable behavior in practice. + - _Rationale_: While we strive for clean, well-defined code, our focus is on practical security impact. We prioritize fixing UB that is exploitable over issues that are purely theoretical. + - _Example_: An integer overflow in an intermediate calculation that gets handled correctly before affecting program output or control flow. + +- **Warnings on Large Memory Allocations**: Tooling warnings related to large memory allocations that do not result in a crash. + - _Rationale_: mruby is designed to handle `malloc(3)` returning `NULL` on large allocation requests. This is considered graceful error handling, not a vulnerability. + +### Summary + +- **High Priority Security Reports**: Remote code execution vulnerabilities. +- **Accepted (but preferably as bug reports)**: VM crashes from valid Ruby code. +- **Not Accepted as Security Issues**: Resource exhaustion, malformed bytecode, C API misuse, theoretical undefined behavior, or allocation warnings. diff --git a/TODO.md b/TODO.md index eea31df2d4..cac5adc269 100644 --- a/TODO.md +++ b/TODO.md @@ -1,13 +1,17 @@ # Things to Do in the future -# After mruby 3.1 +# After mruby 3.4 -* parser and code generator independent from `mrb_state` (picoruby?) -* variable sized AST node -* iv/hash entry cache -* more peephole optimization (if possible) +- parser and code generator independent from `mrb_state` (picoruby?) +- iv/hash entry cache +- method inline caching improvements (cache method lookup results) +- more peephole optimization (if possible) +- built-in profiler (method call tracing, stack profiling, detailed memory analysis) +- improved REPL (mirb) features (syntax highlighting) +- configurable memory pools (per-object-type, memory-constrained devices) +- suspend/resume VM state (serialize/deserialize for power cycling) +- CMake build support (better IDE integration, standard C tooling) # Things to do (Things we need to consider) -* `begin ... end while cond` to behave as CRuby -* special variables ($1,$2..) +- special variables ($1,$2..) diff --git a/appveyor.yml b/appveyor.yml index 8782f59aa4..369ec93c06 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -4,42 +4,44 @@ shallow_clone: true environment: matrix: - - job_name: Visual Studio 2019 64bit + - job_name: Visual Studio 2022 64-bit + visualcpp: C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat + appveyor_build_worker_image: Visual Studio 2022 + + - job_name: Visual Studio 2019 64-bit visualcpp: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat appveyor_build_worker_image: Visual Studio 2019 - - job_name: Visual Studio 2019 32bit + - job_name: Visual Studio 2019 32-bit visualcpp: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars32.bat appveyor_build_worker_image: Visual Studio 2019 - - job_name: Visual Studio 2017 64bit + - job_name: Visual Studio 2017 64-bit visualcpp: C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat appveyor_build_worker_image: Visual Studio 2017 - - job_name: Visual Studio 2017 32bit + - job_name: Visual Studio 2017 32-bit visualcpp: C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars32.bat appveyor_build_worker_image: Visual Studio 2017 - - job_name: Visual Studio 2015 64bit + - job_name: Visual Studio 2015 64-bit visualcpp: C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat appveyor_build_worker_image: Visual Studio 2015 machine: x86_amd64 - - job_name: Visual Studio 2015 32bit + - job_name: Visual Studio 2015 32-bit visualcpp: C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat appveyor_build_worker_image: Visual Studio 2015 machine: x86 init: - call "%visualcpp%" %machine% - # For using RubyInstaller's Ruby 2.6 64bit + # For using RubyInstaller's Ruby 2.6 64-bit # 2.6 is the highest supported Ruby version across all historical # Visual Studio AppVeyor images. Ruby 2.7 is only on the 2019 image. - set PATH=C:\Ruby26-x64\bin;%PATH% - ruby --version - build_script: - set MRUBY_CONFIG=ci/msvc - - rake -m test:build - - rake test:run + - rake -m test:run:serial diff --git a/benchmark/bm_ao_render.rb b/benchmark/bm_ao_render.rb index 91e2d34192..d48f1e1940 100644 --- a/benchmark/bm_ao_render.rb +++ b/benchmark/bm_ao_render.rb @@ -253,6 +253,7 @@ def ambient_occlusion(isect) def render(w, h, nsubsamples) nsf = nsubsamples.to_f + nsfs = nsf * nsf h.times do |y| w.times do |x| rad = Vec.new(0.0, 0.0, 0.0) @@ -288,12 +289,10 @@ def render(w, h, nsubsamples) end end - r = rad.x / (nsf * nsf) - g = rad.y / (nsf * nsf) - b = rad.z / (nsf * nsf) - printf("%c", clamp(r)) - printf("%c", clamp(g)) - printf("%c", clamp(b)) + r = rad.x / nsfs + g = rad.y / nsfs + b = rad.z / nsfs + print([clamp(r), clamp(g), clamp(b)].pack("CCC")) end end end @@ -302,7 +301,7 @@ def render(w, h, nsubsamples) # File.open("ao.ppm", "w") do |fp| printf("P6\n") printf("%d %d\n", IMAGE_WIDTH, IMAGE_HEIGHT) - printf("255\n", IMAGE_WIDTH, IMAGE_HEIGHT) + printf("255\n") Scene.new.render(IMAGE_WIDTH, IMAGE_HEIGHT, NSUBSAMPLES) # Scene.new.render(256, 256, 2) # end diff --git a/benchmark/bm_fib.rb b/benchmark/bm_fib.rb index 4b395f9ccf..cb43b4a79d 100644 --- a/benchmark/bm_fib.rb +++ b/benchmark/bm_fib.rb @@ -1,4 +1,3 @@ - def fib n return n if n < 2 fib(n-2) + fib(n-1) diff --git a/benchmark/bm_mandel_term.rb b/benchmark/bm_mandel_term.rb new file mode 100644 index 0000000000..460c2a6b9f --- /dev/null +++ b/benchmark/bm_mandel_term.rb @@ -0,0 +1,34 @@ +def mandelbrot(c_r, c_i) + limit=95 + iterations=0 + cr = (c_r * 100).to_i + ci = (c_i * 100).to_i + zr = zi = 0 + # Avoid sqrt by squaring the threshold: sqrt(x) < 1000 => x < 1000000 + while iterations= 1000000 + zr, zi = (zr2-zi2)/100+cr, (zr*zi*2)/100+ci + iterations+=1 + end + return iterations +end + +def mandel_calc(min_r, min_i, max_r, max_i, res) + cur_i = min_i + while cur_i > max_i + putc "|" + cur_r = min_r + while cur_r < max_r + ch = 127 - mandelbrot(cur_r, cur_i) + putc ch # Use putc with integer - no string allocation! + cur_r += res + end + putc "|" + putc "\n" + cur_i -= res + end +end + +mandel_calc(-2, 1, 1, -1, 0.04) diff --git a/benchmark/bm_so_lists.rb b/benchmark/bm_so_lists.rb index f620297f43..3b9f291b86 100644 --- a/benchmark/bm_so_lists.rb +++ b/benchmark/bm_so_lists.rb @@ -5,7 +5,7 @@ NUM = 300 SIZE = 10000 -def test_lists() +def test_lists # create a list of integers (Li1) from 1 to SIZE li1 = (1..SIZE).to_a # copy the list to li2 (not by individual items) diff --git a/benchmark/bm_so_mandelbrot.rb b/benchmark/bm_so_mandelbrot.rb new file mode 100644 index 0000000000..b5299636c4 --- /dev/null +++ b/benchmark/bm_so_mandelbrot.rb @@ -0,0 +1,65 @@ +# The Computer Language Benchmarks Game +# http://shootout.alioth.debian.org/ +# +# contributed by Karl von Laudermann +# modified by Jeremy Echols +# optimized: while loops instead of for..in to avoid closure overhead + +size = 600 # ARGV[0].to_i + +puts "P4\n#{size} #{size}" + +# Cache constants in local variables to avoid repeated constant lookup +iter = 49 +limit_squared = 4.0 + +byte_acc = 0 +bit_num = 0 + +count_size = size - 1 + +# Use while loops instead of for..in to avoid closure/upvalue overhead +y = 0 +while y <= count_size + x = 0 + while x <= count_size + zr = 0.0 + zi = 0.0 + cr = (2.0*x/size)-1.5 + ci = (2.0*y/size)-1.0 + escape = false + + # Use while instead of for..in to avoid closure overhead + i = 0 + while i <= iter + tr = zr*zr - zi*zi + cr + ti = 2*zr*zi + ci + zr = tr + zi = ti + + if (zr*zr+zi*zi) > limit_squared + escape = true + break + end + i += 1 + end + + byte_acc = (byte_acc << 1) | (escape ? 0b0 : 0b1) + bit_num += 1 + + # Code is very similar for these cases, but using separate blocks + # ensures we skip the shifting when it's unnecessary, which is most cases. + if (bit_num == 8) + print byte_acc.chr + byte_acc = 0 + bit_num = 0 + elsif (x == count_size) + byte_acc <<= (8 - bit_num) + print byte_acc.chr + byte_acc = 0 + bit_num = 0 + end + x += 1 + end + y += 1 +end diff --git a/benchmark/vm_dispatch_bench.c b/benchmark/vm_dispatch_bench.c new file mode 100644 index 0000000000..1115bb04cd --- /dev/null +++ b/benchmark/vm_dispatch_bench.c @@ -0,0 +1,197 @@ +/* + * VM Dispatch Micro-benchmark + * + * This benchmark measures the raw dispatch overhead of the mruby VM + * by executing minimal bytecode sequences. + * + * Compile: + * cc -O2 -I include -I build/host/include \ + * benchmark/vm_dispatch_bench.c \ + * build/host/lib/libmruby.a -lm -o vm_dispatch_bench + * + * Run: + * ./vm_dispatch_bench + */ + +#include +#include +#include +#include +#include +#include + +#define ITERATIONS 10 + +static double +get_time_ms(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * 1000.0 + ts.tv_nsec / 1000000.0; +} + +static void +run_benchmark(mrb_state *mrb, const char *name, const char *code, int iterations) +{ + double times[ITERATIONS]; + double total = 0.0; + double min_time = 1e9; + double max_time = 0.0; + + /* Compile once */ + mrbc_context *cxt = mrbc_context_new(mrb); + struct mrb_parser_state *p = mrb_parse_string(mrb, code, cxt); + if (!p || p->nerr > 0) { + fprintf(stderr, "Failed to parse: %s\n", name); + if (p) mrb_parser_free(p); + mrbc_context_free(mrb, cxt); + return; + } + struct RProc *proc = mrb_generate_code(mrb, p); + mrb_parser_free(p); + mrbc_context_free(mrb, cxt); + + if (!proc) { + fprintf(stderr, "Failed to compile: %s\n", name); + return; + } + + /* Warm up */ + for (int i = 0; i < 3; i++) { + mrb_top_run(mrb, proc, mrb_top_self(mrb), 0); + mrb->exc = NULL; + } + + /* Measure */ + for (int i = 0; i < iterations; i++) { + mrb_gc_arena_save(mrb); + mrb_full_gc(mrb); + + double t0 = get_time_ms(); + mrb_top_run(mrb, proc, mrb_top_self(mrb), 0); + double t1 = get_time_ms(); + + times[i] = t1 - t0; + total += times[i]; + if (times[i] < min_time) min_time = times[i]; + if (times[i] > max_time) max_time = times[i]; + + mrb->exc = NULL; + mrb_gc_arena_restore(mrb, 0); + } + + double avg = total / iterations; + printf("%-30s avg: %8.2f ms min: %8.2f ms max: %8.2f ms\n", + name, avg, min_time, max_time); +} + +int +main(int argc, char **argv) +{ + mrb_state *mrb = mrb_open(); + if (!mrb) { + fprintf(stderr, "Failed to create mrb_state\n"); + return 1; + } + + printf("========================================\n"); + printf("mruby VM Dispatch Micro-benchmarks\n"); + printf("========================================\n\n"); + + /* 1. Pure dispatch overhead */ + printf("--- Dispatch Overhead ---\n"); + + run_benchmark(mrb, "empty_loop_1M", + "i = 0; while i < 1000000; i += 1; end", ITERATIONS); + + run_benchmark(mrb, "empty_loop_10M", + "i = 0; while i < 10000000; i += 1; end", ITERATIONS); + + /* 2. Arithmetic operations */ + printf("\n--- Arithmetic ---\n"); + + run_benchmark(mrb, "int_add_1M", + "x = 0; i = 0; while i < 1000000; x = x + 1; i += 1; end", ITERATIONS); + + run_benchmark(mrb, "int_mul_1M", + "x = 1; i = 0; while i < 1000000; x = x * 1; i += 1; end", ITERATIONS); + + run_benchmark(mrb, "float_add_1M", + "x = 0.0; i = 0; while i < 1000000; x = x + 1.0; i += 1; end", ITERATIONS); + + /* 3. Method calls */ + printf("\n--- Method Calls ---\n"); + + run_benchmark(mrb, "empty_method_100K", + "class X; def m; end; end; " + "o = X.new; i = 0; while i < 100000; o.m; i += 1; end", ITERATIONS); + + run_benchmark(mrb, "method_1arg_100K", + "class Y; def m(a); a; end; end; " + "o = Y.new; i = 0; while i < 100000; o.m(1); i += 1; end", ITERATIONS); + + run_benchmark(mrb, "method_2arg_100K", + "class Z; def m(a,b); a+b; end; end; " + "o = Z.new; i = 0; while i < 100000; o.m(1,2); i += 1; end", ITERATIONS); + + /* 4. Array access */ + printf("\n--- Array/Hash ---\n"); + + run_benchmark(mrb, "array_read_1M", + "a = [0,1,2,3,4,5,6,7,8,9]; " + "i = 0; s = 0; while i < 1000000; s += a[i % 10]; i += 1; end", ITERATIONS); + + run_benchmark(mrb, "array_write_1M", + "a = [0,0,0,0,0,0,0,0,0,0]; " + "i = 0; while i < 1000000; a[i % 10] = i; i += 1; end", ITERATIONS); + + run_benchmark(mrb, "hash_read_100K", + "h = {0=>0,1=>1,2=>2,3=>3,4=>4,5=>5,6=>6,7=>7,8=>8,9=>9}; " + "i = 0; s = 0; while i < 100000; s += h[i % 10]; i += 1; end", ITERATIONS); + + /* 5. Comparison and branching */ + printf("\n--- Comparison/Branch ---\n"); + + run_benchmark(mrb, "lt_compare_1M", + "i = 0; c = 0; while i < 1000000; c += 1 if i < 500000; i += 1; end", ITERATIONS); + + run_benchmark(mrb, "eq_compare_1M", + "i = 0; c = 0; while i < 1000000; c += 1 if i == 500000; i += 1; end", ITERATIONS); + + /* 6. Block calls */ + printf("\n--- Blocks ---\n"); + + run_benchmark(mrb, "times_100K", + "s = 0; 100000.times { |i| s += i }", ITERATIONS); + + run_benchmark(mrb, "each_100K", + "a = (0...1000).to_a; s = 0; 100.times { a.each { |x| s += x } }", ITERATIONS); + + /* 7. Recursion */ + printf("\n--- Recursion ---\n"); + + run_benchmark(mrb, "fib_25", + "def fib(n); n < 2 ? n : fib(n-1) + fib(n-2); end; fib(25)", ITERATIONS); + + run_benchmark(mrb, "fib_30", + "def fib(n); n < 2 ? n : fib(n-1) + fib(n-2); end; fib(30)", ITERATIONS); + + /* 8. Local variable access */ + printf("\n--- Local Variables ---\n"); + + run_benchmark(mrb, "few_vars_1M", + "i = 0; a = 0; b = 0; " + "while i < 1000000; a += 1; b += 1; i += 1; end", ITERATIONS); + + run_benchmark(mrb, "many_vars_1M", + "i = 0; a = 0; b = 0; c = 0; d = 0; e = 0; f = 0; g = 0; h = 0; " + "while i < 1000000; a += 1; b += 1; c += 1; d += 1; e += 1; " + "f += 1; g += 1; h += 1; i += 1; end", ITERATIONS); + + printf("\n========================================\n"); + printf("Benchmark complete\n"); + printf("========================================\n"); + + mrb_close(mrb); + return 0; +} diff --git a/benchmark/vm_optimization_bench.rb b/benchmark/vm_optimization_bench.rb new file mode 100644 index 0000000000..a3277e5171 --- /dev/null +++ b/benchmark/vm_optimization_bench.rb @@ -0,0 +1,513 @@ +# VM Optimization Benchmarks for mruby +# Usage: ./bin/mruby benchmark/vm_optimization_bench.rb +# +# Each benchmark is designed to isolate specific VM behaviors: +# - Dispatch overhead +# - Arithmetic operations +# - Method calls +# - Array/Hash access +# - Loop performance + +# Benchmark infrastructure +def measure(name, iterations = 1) + # Warm up + 3.times { yield } + + # Force GC before measurement + GC.start + + t0 = Time.now + iterations.times { yield } + elapsed = Time.now - t0 + + puts "#{name}: #{elapsed * 1000 / iterations} ms" + elapsed +end + +N = 1_000_000 +M = 100_000 + +puts "=" * 60 +puts "mruby VM Optimization Benchmarks" +puts "=" * 60 +puts + +#============================================================================= +# 1. DISPATCH OVERHEAD BENCHMARKS +# Target: Tail-call threading, computed goto efficiency +#============================================================================= +puts "--- Dispatch Overhead ---" + +# 1a. Empty loop (pure dispatch cost) +measure("empty_loop", 10) do + i = 0 + while i < N + i += 1 + end +end + +# 1b. NOP-heavy (many instructions, minimal work) +measure("nop_sequence", 10) do + i = 0 + while i < M + a = 1; b = 2; c = 3; d = 4; e = 5 + a = 1; b = 2; c = 3; d = 4; e = 5 + a = 1; b = 2; c = 3; d = 4; e = 5 + a = 1; b = 2; c = 3; d = 4; e = 5 + i += 1 + end +end + +#============================================================================= +# 2. ARITHMETIC BENCHMARKS +# Target: Type specialization, register variables, ADDI fusion +#============================================================================= +puts +puts "--- Arithmetic Operations ---" + +# 2a. Integer addition (tests OP_ADD fast path) +measure("int_add", 10) do + x = 0 + i = 0 + while i < N + x = x + 1 + i += 1 + end + x +end + +# 2b. Integer increment (tests potential OP_INCI fusion) +measure("int_increment", 10) do + x = 0 + i = 0 + while i < N + x += 1 + i += 1 + end + x +end + +# 2c. Mixed arithmetic (tests type checking overhead) +measure("mixed_arith", 10) do + x = 0 + y = 1.5 + i = 0 + while i < M + x = x + 1 + y = y + 0.5 + i += 1 + end + x +end + +# 2d. Comparison in loop (tests OP_LT + JMPNOT fusion potential) +measure("comparison_loop", 10) do + x = 0 + while x < N + x += 1 + end + x +end + +# 2e. Multiple comparisons (branch prediction) +measure("multi_compare", 10) do + i = 0 + count = 0 + while i < M + count += 1 if i > 100 + count += 1 if i < 50000 + count += 1 if i == 25000 + i += 1 + end + count +end + +#============================================================================= +# 3. METHOD CALL BENCHMARKS +# Target: Inline caching, method dispatch optimization +#============================================================================= +puts +puts "--- Method Calls ---" + +class BenchClass + def empty_method + end + + def simple_add(a, b) + a + b + end + + def self.class_method + end +end + +$obj = BenchClass.new + +# 3a. Empty method call (pure dispatch overhead) +measure("empty_method_call", 10) do + obj = $obj + i = 0 + while i < M + obj.empty_method + i += 1 + end +end + +# 3b. Method with arguments +measure("method_with_args", 10) do + obj = $obj + i = 0 + while i < M + obj.simple_add(1, 2) + i += 1 + end +end + +# 3c. Self method call (tests OP_SENDSELF potential) +class SelfCallBench + def run + i = 0 + while i < M + helper + i += 1 + end + end + + def helper + end +end + +measure("self_method_call", 10) do + SelfCallBench.new.run +end + +# 3d. Polymorphic call site (tests inline cache invalidation) +class Duck1 + def quack; 1; end +end +class Duck2 + def quack; 2; end +end + +$duck1 = Duck1.new +$duck2 = Duck2.new + +measure("polymorphic_call", 10) do + d1, d2 = $duck1, $duck2 + i = 0 + sum = 0 + while i < M + sum += d1.quack + sum += d2.quack + i += 1 + end + sum +end + +#============================================================================= +# 4. ARRAY/HASH BENCHMARKS +# Target: GETIDX/SETIDX fast path, bounds checking +#============================================================================= +puts +puts "--- Array/Hash Access ---" + +$ary = Array.new(1000) { |i| i } +$hash = {} +1000.times { |i| $hash[i] = i } + +# 4a. Array read (sequential) +measure("array_read_seq", 10) do + ary = $ary + i = 0 + sum = 0 + while i < M + sum += ary[i % 1000] + i += 1 + end + sum +end + +# 4b. Array read (constant index - tests constant propagation) +measure("array_read_const", 10) do + ary = $ary + i = 0 + sum = 0 + while i < M + sum += ary[500] + i += 1 + end + sum +end + +# 4c. Array write +measure("array_write", 10) do + ary = Array.new(1000, 0) + i = 0 + while i < M + ary[i % 1000] = i + i += 1 + end +end + +# 4d. Hash read +measure("hash_read", 10) do + h = $hash + i = 0 + sum = 0 + while i < M + sum += h[i % 1000] + i += 1 + end + sum +end + +#============================================================================= +# 5. LOOP PATTERN BENCHMARKS +# Target: Loop optimization, branch prediction +#============================================================================= +puts +puts "--- Loop Patterns ---" + +# 5a. Simple while loop +measure("while_loop", 10) do + i = 0 + while i < N + i += 1 + end +end + +# 5b. times iterator (block overhead) +measure("times_iterator", 10) do + sum = 0 + M.times do |i| + sum += i + end + sum +end + +# 5c. each iterator on array +$small_ary = (0...1000).to_a +measure("each_iterator", 10) do + ary = $small_ary + total = 0 + 1000.times do + ary.each { |x| total += x } + end + total +end + +# 5d. Nested loops +measure("nested_loop", 10) do + sum = 0 + i = 0 + while i < 1000 + j = 0 + while j < 1000 + sum += 1 + j += 1 + end + i += 1 + end + sum +end + +#============================================================================= +# 6. CONSTANT LOADING BENCHMARKS +# Target: Constant pre-computation, pool access +#============================================================================= +puts +puts "--- Constant Loading ---" + +# 6a. Integer literals (tests LOADI optimization) +measure("int_literals", 10) do + i = 0 + sum = 0 + while i < M + sum += 1 + sum += 2 + sum += 3 + sum += 42 + sum += 100 + i += 1 + end + sum +end + +# 6b. Large integer literals (tests LOADL from pool) +measure("large_int_literals", 10) do + i = 0 + sum = 0 + while i < M + sum += 1000000 + sum += 2000000 + sum += 3000000 + i += 1 + end + sum +end + +# 6c. Float literals +measure("float_literals", 10) do + i = 0 + sum = 0.0 + while i < M + sum += 1.5 + sum += 2.5 + sum += 3.5 + i += 1 + end + sum +end + +# 6d. String literals (allocation vs interning) +measure("string_literals", 5) do + i = 0 + while i < 100000 + s = "hello" + s = "world" + s = "test" + i += 1 + end +end + +#============================================================================= +# 7. BRANCH PREDICTION BENCHMARKS +# Target: mrb_likely/mrb_unlikely effectiveness +#============================================================================= +puts +puts "--- Branch Prediction ---" + +# 7a. Predictable branch (always true) +measure("predictable_true", 10) do + i = 0 + count = 0 + while i < N + count += 1 if true + i += 1 + end + count +end + +# 7b. Predictable branch (always false) +measure("predictable_false", 10) do + i = 0 + count = 0 + while i < N + count += 1 if false + i += 1 + end + count +end + +# 7c. Unpredictable branch (50/50) +measure("unpredictable_50", 10) do + i = 0 + count = 0 + while i < M + count += 1 if i & 1 == 0 + i += 1 + end + count +end + +# 7d. Rare branch (error path simulation) +measure("rare_branch", 10) do + i = 0 + count = 0 + while i < N + count += 1 if i == -1 # Never true + i += 1 + end + count +end + +#============================================================================= +# 8. REGISTER PRESSURE BENCHMARKS +# Target: Register variable optimization +#============================================================================= +puts +puts "--- Register Pressure ---" + +# 8a. Few local variables (should fit in registers) +measure("few_locals", 10) do + i = 0 + a = 0 + while i < N + a += 1 + i += 1 + end + a +end + +# 8b. Many local variables (register spilling) +measure("many_locals", 10) do + i = 0 + a = 0; b = 0; c = 0; d = 0; e = 0 + f = 0; g = 0; h = 0; j = 0; k = 0 + l = 0; m = 0; n = 0; o = 0; p = 0 + while i < M + a += 1; b += 1; c += 1; d += 1; e += 1 + f += 1; g += 1; h += 1; j += 1; k += 1 + l += 1; m += 1; n += 1; o += 1; p += 1 + i += 1 + end + a + b + c + d + e + f + g + h + j + k + l + m + n + o + p +end + +#============================================================================= +# 9. COMPOSITE BENCHMARKS (Real-world-ish) +#============================================================================= +puts +puts "--- Composite Benchmarks ---" + +# 9a. Fibonacci (recursion + arithmetic) +def fib(n) + return n if n < 2 + fib(n - 1) + fib(n - 2) +end + +measure("fibonacci_30", 3) do + fib(30) +end + +# 9b. Tak function (heavy recursion) +def tak(x, y, z) + if y < x + tak(tak(x - 1, y, z), tak(y - 1, z, x), tak(z - 1, x, y)) + else + z + end +end + +measure("tak_18_12_6", 3) do + tak(18, 12, 6) +end + +# 9c. Array manipulation +measure("array_manipulation", 5) do + ary = [] + 10000.times { |i| ary << i } + ary.map! { |x| x * 2 } + ary.select { |x| x % 3 == 0 }.size +end + +# 9d. String operations +measure("string_ops", 5) do + s = "" + 10000.times { |i| s = s + i.to_s } + s.size +end + +# 9e. Hash operations +measure("hash_ops", 5) do + h = {} + 50000.times { |i| h[i.to_s] = i } + sum = 0 + h.each { |k, v| sum += v } + sum +end + +puts +puts "=" * 60 +puts "Benchmark complete" +puts "=" * 60 diff --git a/build_config/ArduinoDue.rb b/build_config/ArduinoDue.rb index 9baf1ef926..94fdac0344 100644 --- a/build_config/ArduinoDue.rb +++ b/build_config/ArduinoDue.rb @@ -30,7 +30,7 @@ #configuration for low memory environment cc.defines << %w(MRB_HEAP_PAGE_SIZE=64) - cc.defines << %w(KHASH_DEFAULT_SIZE=8) + cc.defines << %w(KHASH_INITIAL_SIZE=8) cc.defines << %w(MRB_GC_STRESS) #cc.defines << %w(MRB_NO_STDIO) #if you don't need stdio. #cc.defines << %w(POOL_PAGE_SIZE=1000) #effective only for use with mruby-eval @@ -60,7 +60,6 @@ conf.disable_cxx_exception #gems from core - conf.gem :core => "mruby-print" conf.gem :core => "mruby-math" conf.gem :core => "mruby-enum-ext" diff --git a/build_config/IntelGalileo.rb b/build_config/IntelGalileo.rb index f1f39e5dc5..85c706fb4e 100644 --- a/build_config/IntelGalileo.rb +++ b/build_config/IntelGalileo.rb @@ -12,7 +12,7 @@ #ARDUINO_GALILEO_PATH = '/opt/arduino' GALILEO_BIN_PATH = "#{GALILEO_ARDUINO_PATH}/hardware/tools/x86/i386-pokysdk-darwin/usr/bin/i586-poky-linux-uclibc" - GALILEO_SYSROOT = "#{GALILEO_ARDUINO_PATH}/hardware/tools/x86/i586-poky-linux-uclibc" + GALILEO_SYSROOT = "#{GALILEO_ARDUINO_PATH}/hardware/tools/x86/i586-poky-linux-uclibc" GALILEO_X86_PATH = "#{GALILEO_ARDUINO_PATH}/hardware/arduino/x86" @@ -56,7 +56,6 @@ #official mrbgems conf.gem :core => "mruby-sprintf" - conf.gem :core => "mruby-print" conf.gem :core => "mruby-math" conf.gem :core => "mruby-time" conf.gem :core => "mruby-struct" diff --git a/build_config/RX630.rb b/build_config/RX630.rb index 7c3cfc7922..0f5f6fd963 100644 --- a/build_config/RX630.rb +++ b/build_config/RX630.rb @@ -16,7 +16,7 @@ #configuration for low memory environment cc.defines << %w(MRB_USE_FLOAT32) cc.defines << %w(MRB_HEAP_PAGE_SIZE=64) - cc.defines << %w(KHASH_DEFAULT_SIZE=8) + cc.defines << %w(KHASH_INITIAL_SIZE=8) cc.defines << %w(MRB_GC_STRESS) cc.defines << %w(MRB_NO_STDIO) #if you don't need stdio. #cc.defines << %w(POOL_PAGE_SIZE=1000) #effective only for use with mruby-eval @@ -50,7 +50,6 @@ #gems from core conf.gem :core => "mruby-sprintf" - conf.gem :core => "mruby-print" conf.gem :core => "mruby-math" conf.gem :core => "mruby-enum-ext" conf.gem :core => "mruby-numeric-ext" diff --git a/build_config/android_arm64_v8a.rb b/build_config/android_arm64_v8a.rb index 8763b00842..6dda612dde 100644 --- a/build_config/android_arm64_v8a.rb +++ b/build_config/android_arm64_v8a.rb @@ -2,8 +2,8 @@ MRuby::CrossBuild.new('android-arm64-v8a') do |conf| params = { :arch => 'arm64-v8a', - :sdk_version => 26, - :toolchain => :clang, + :sdk_version => 33, + :toolchain => :clang } toolchain :android, params diff --git a/build_config/android_armeabi.rb b/build_config/android_armeabi.rb deleted file mode 100644 index cef0e3adca..0000000000 --- a/build_config/android_armeabi.rb +++ /dev/null @@ -1,11 +0,0 @@ -# Requires Android NDK r13 or later. -MRuby::CrossBuild.new('android-armeabi') do |conf| - params = { - :arch => 'armeabi', - :sdk_version => 26, - :toolchain => :clang, - } - toolchain :android, params - - conf.gembox 'default' -end diff --git a/build_config/android_armeabi_v7a_neon_hard.rb b/build_config/android_armeabi_v7a_neon_hard.rb index 150d1d2043..3dae241851 100644 --- a/build_config/android_armeabi_v7a_neon_hard.rb +++ b/build_config/android_armeabi_v7a_neon_hard.rb @@ -4,8 +4,8 @@ :arch => 'armeabi-v7a', :mfpu => 'neon', :mfloat_abi => 'hard', - :sdk_version => 26, - :toolchain => :clang, + :sdk_version => 33, + :toolchain => :clang } toolchain :android, params diff --git a/build_config/chipKITMax32.rb b/build_config/chipKITMax32.rb index cb31926109..cde3edf36e 100644 --- a/build_config/chipKITMax32.rb +++ b/build_config/chipKITMax32.rb @@ -27,7 +27,7 @@ #configuration for low memory environment cc.defines << %w(MRB_HEAP_PAGE_SIZE=64) - cc.defines << %w(KHASH_DEFAULT_SIZE=8) + cc.defines << %w(KHASH_INITIAL_SIZE=8) cc.defines << %w(MRB_GC_STRESS) #cc.defines << %w(MRB_NO_STDIO) #if you don't need stdio. #cc.defines << %w(POOL_PAGE_SIZE=1000) #effective only for use with mruby-eval @@ -56,7 +56,6 @@ conf.disable_cxx_exception #gems from core - conf.gem :core => "mruby-print" conf.gem :core => "mruby-math" conf.gem :core => "mruby-enum-ext" diff --git a/build_config/ci/gcc-clang.rb b/build_config/ci/gcc-clang.rb index eeaab5d0ff..f6303e49e4 100644 --- a/build_config/ci/gcc-clang.rb +++ b/build_config/ci/gcc-clang.rb @@ -6,7 +6,7 @@ # include all core GEMs conf.gembox 'full-core' - conf.cc.defines += %w(MRB_GC_STRESS MRB_USE_DEBUG_HOOK MRB_UTF8_STRING) + conf.cc.defines += %w(MRB_GC_STRESS MRB_USE_DEBUG_HOOK) conf.enable_test end @@ -18,7 +18,7 @@ conf.gembox 'full-core' conf.gem :core => 'mruby-bin-debugger' conf.compilers.each do |c| - c.defines += %w(MRB_GC_FIXED_ARENA MRB_UTF8_STRING) + c.defines += %w(MRB_GC_FIXED_ARENA) end conf.enable_bintest conf.enable_test @@ -30,7 +30,7 @@ conf.gembox 'full-core' conf.cc.flags += %w(-fpermissive -std=gnu++03) conf.compilers.each do |c| - c.defines += %w(MRB_GC_FIXED_ARENA MRB_UTF8_STRING) + c.defines += %w(MRB_GC_FIXED_ARENA) end conf.enable_test diff --git a/build_config/cosmopolitan.rb b/build_config/cosmopolitan.rb new file mode 100644 index 0000000000..c7f46b4326 --- /dev/null +++ b/build_config/cosmopolitan.rb @@ -0,0 +1,84 @@ +# Cosmopolitan Libc build configuration for mruby +# https://github.com/jart/cosmopolitan +# +# Produces Actually Portable Executables (APE) - single binaries that run on: +# - Linux (x86_64, ARM64) +# - macOS (x86_64, ARM64) +# - Windows (x86_64) +# - FreeBSD (x86_64) +# - OpenBSD (x86_64) +# - NetBSD (x86_64) +# +# Requirements: +# Download cosmocc toolchain from https://cosmo.zip/pub/cosmocc/ +# +# Usage: +# COSMO_ROOT=/path/to/cosmocc rake MRUBY_CONFIG=cosmopolitan +# +# The resulting binaries in bin/ will have .com extension and run on all +# supported platforms without recompilation. + +COSMO_ROOT = ENV['COSMO_ROOT'] + +unless COSMO_ROOT && File.directory?(COSMO_ROOT) + msg = <<~MSG + Cosmopolitan toolchain not found. + + Please set COSMO_ROOT environment variable to the cosmocc directory: + + mkdir -p ~/cosmo && cd ~/cosmo + wget https://cosmo.zip/pub/cosmocc/cosmocc.zip + unzip cosmocc.zip + export COSMO_ROOT=~/cosmo + + Then run: + COSMO_ROOT=~/cosmo rake MRUBY_CONFIG=cosmopolitan + MSG + raise msg +end + +MRuby::Build.new do |conf| + # C compiler + conf.cc do |cc| + cc.command = "#{COSMO_ROOT}/bin/cosmocc" + cc.flags = %w[-Os -fno-omit-frame-pointer] + end + + # C++ compiler + conf.cxx do |cxx| + cxx.command = "#{COSMO_ROOT}/bin/cosmoc++" + cxx.flags = conf.cc.flags.dup + end + + # Linker + conf.linker do |linker| + linker.command = "#{COSMO_ROOT}/bin/cosmocc" + linker.flags = %w[-static] + end + + # Archiver + conf.archiver do |archiver| + archiver.command = "#{COSMO_ROOT}/bin/cosmoar" + end + + # APE binaries use .com extension + conf.exts.executable = '.com' + + # Cosmopolitan provides POSIX compatibility + conf.ports :posix + + # Standard library + conf.gembox 'stdlib' + conf.gembox 'stdlib-ext' + conf.gembox 'stdlib-io' # Includes mruby-io, mruby-socket, mruby-dir + conf.gembox 'math' + conf.gembox 'metaprog' + + # Binary tools + # Note: mruby-bin-config is a shell script and incompatible with .com extension + conf.gem core: 'mruby-bin-mrbc' + conf.gem core: 'mruby-bin-mruby' + conf.gem core: 'mruby-bin-strip' + conf.gem core: 'mruby-bin-mirb' + conf.gem core: 'mruby-bin-debugger' +end diff --git a/build_config/cross-mingw-winetest.rb b/build_config/cross-mingw-winetest.rb index fad06b2657..94fa67d177 100644 --- a/build_config/cross-mingw-winetest.rb +++ b/build_config/cross-mingw-winetest.rb @@ -1,4 +1,3 @@ - # Cross-compile using MinGW and test using Wine. # # Steps: @@ -16,11 +15,11 @@ # # 4. Confirm that drive 'z:' is mapped to your root filesystem. # (This is supposed to be a default but it helps to -# double-check.) To confirm, run: +# double-check.) To confirm, run: # # wine cmd /c dir 'z:\\' # -# This should give you a DOS-style equivalent of 'ls /'. If not, +# This should give you a DOS-style equivalent of 'ls /'. If not, # you'll need to fix that with winecfg or by adding a symlink to # '~/.wine/dosdevices'. # @@ -40,8 +39,8 @@ # # 1. This works by using a helper script that rewrites test output # to make it look *nix-like and then handing it back to the test -# cases. Some of the existing tests were (slightly) modified to -# make this easier but only for the 'full-core' gembox. Other +# cases. Some of the existing tests were (slightly) modified to +# make this easier but only for the 'full-core' gembox. Other # gems' bintests may or may not work with the helper script and # may or may not be fixable by extending the script. # @@ -50,11 +49,11 @@ # # 3. This script assumes you are running it on a *nix-style OS. # -# 4. I recommend building 64-bit targets only. Building a 32-bit +# 4. I recommend building 64-bit targets only. Building a 32-bit # Windows binary with i686-w64-mingw32 seems to work (at least, # it did for me) but the resulting executable failed a number of -# unit tests due to small errors in some floating point -# operations. It's unclear if this indicates more serious problems. +# unit tests due to small errors in some floating-point +# operations. It's unclear if this indicates more serious problems. # diff --git a/build_config/default.rb b/build_config/default.rb index f5e2cbb711..21795b4536 100644 --- a/build_config/default.rb +++ b/build_config/default.rb @@ -60,7 +60,7 @@ # gperf settings # conf.gperf do |gperf| # gperf.command = 'gperf' - # gperf.compile_options = %q[-L ANSI-C -C -p -j1 -i 1 -g -o -t -N mrb_reserved_word -k"1,3,$" "%{infile}" > "%{outfile}"] + # gperf.compile_options = %q[-L ANSI-C -C -j1 -i 1 -o -t -N mrb_reserved_word -k"1,3,$" "%{infile}" > "%{outfile}"] # end # file extensions @@ -73,6 +73,9 @@ # file separator # conf.file_separator = '/' + # change library directory name from the default "lib" if necessary + # conf.libdir_name = 'lib64' + # Turn on `enable_debug` for better debugging # conf.enable_debug conf.enable_bintest diff --git a/build_config/dreamcast_shelf.rb b/build_config/dreamcast_shelf.rb index 092fde0d02..e65465f48e 100644 --- a/build_config/dreamcast_shelf.rb +++ b/build_config/dreamcast_shelf.rb @@ -4,114 +4,78 @@ # Requires KallistiOS (KOS) # http://gamedev.allusion.net/softprj/kos/ # -# Tested on GNU/Linux, macOS and Windows (through MinGW-w64/MSYS2, Cygwin and DreamSDK). -# DreamSDK is based on MinGW/MSYS: https://dreamsdk.org/ +# This configuration has been improved to be used as KallistiOS Port (kos-ports) +# Updated: 2025-07-31 # -# Input this command on the directory where mruby is installed: -# make MRUBY_CONFIG=dreamcast_shelf +# Tested on GNU/Linux, macOS and Windows (MinGW-w64/MSYS2, Cygwin, DreamSDK) +# DreamSDK is based on both MinGW/MSYS and MinGW-w64/MSYS2: https://dreamsdk.org/ +# +# Install mruby for Sega Dreamcast using the "mruby" kos-port. +# See: https://github.com/kallistios/kallistios +# +# If you want to see examples, check the /examples/dreamcast/mruby directory +# in the KallistiOS repository. # MRuby::CrossBuild.new("dreamcast") do |conf| toolchain :gcc - # Support for DreamSDK (based on MinGW/MSYS) - # To compile mruby with DreamSDK, RubyInstaller for Windows should be installed - DREAMSDK_HOME = ENV["DREAMSDK_HOME"] - MSYS_ROOT = !(DREAMSDK_HOME.nil? || DREAMSDK_HOME.empty?) ? "#{DREAMSDK_HOME}/msys/1.0" : "" + # Getting critical environment variables + KOS_BASE = ENV["KOS_BASE"] + KOS_CC_BASE = ENV["KOS_CC_BASE"] - # Setting paths - DREAMCAST_PATH = "#{MSYS_ROOT}/opt/toolchains/dc" - KOS_PATH = "#{DREAMCAST_PATH}/kos" - BIN_PATH = "#{DREAMCAST_PATH}/sh-elf/bin" + # Check environment variables + if KOS_BASE.to_s.empty? + raise "Error: KallistiOS is required; KOS_BASE need to be declared; Stop." + end + + # Root directory for KallistiOS wrappers + # This will handle specific DreamSDK wrappers if needed + KOS_WRAPPERS_BASE = if ENV["ENVIRONMENT_NAME"] == "DreamSDK" && ENV["RAKE_AVAILABLE"] == "0" + "#{KOS_CC_BASE}/bin" + else + "#{KOS_BASE}/utils/build_wrappers" + end # C compiler - # Flags were extracted from KallistiOS environment files conf.cc do |cc| - cc.command = "#{BIN_PATH}/sh-elf-gcc" - cc.include_paths << ["#{KOS_PATH}/include", "#{KOS_PATH}/kernel/arch/dreamcast/include", "#{KOS_PATH}/addons/include", "#{KOS_PATH}/../kos-ports/include"] - cc.flags << ["-O2", "-fomit-frame-pointer", "-ml", "-m4-single-only", "-ffunction-sections", "-fdata-sections", "-Wall", "-g", "-fno-builtin", "-ml", "-m4-single-only", "-Wl,-Ttext=0x8c010000", "-Wl,--gc-sections", "-T#{KOS_PATH}/utils/ldscripts/shlelf.xc", "-nodefaultlibs"] - cc.compile_options = %Q[%{flags} -o "%{outfile}" -c "%{infile}"] - cc.defines << %w(_arch_dreamcast) - cc.defines << %w(_arch_sub_pristine) + cc.command = "#{KOS_WRAPPERS_BASE}/kos-cc" end # C++ compiler conf.cxx do |cxx| - cxx.command = conf.cc.command.dup - cxx.include_paths = conf.cc.include_paths.dup - cxx.flags = conf.cc.flags.dup - cxx.flags << %w(-fno-rtti -fno-exceptions) - cxx.defines = conf.cc.defines.dup - cxx.compile_options = conf.cc.compile_options.dup + cxx.command = "#{KOS_WRAPPERS_BASE}/kos-c++" end # Linker - # There is an issue when making the mruby library with KallistiOS: - # 'newlib_kill.o' and 'newlib_getpid.o' aren't found so they are explicitly - # specified here at least for now. conf.linker do |linker| - linker.command="#{BIN_PATH}/sh-elf-gcc" - linker.flags << ["#{MSYS_ROOT}/opt/toolchains/dc/kos/kernel/build/newlib_kill.o", "#{MSYS_ROOT}/opt/toolchains/dc/kos/kernel/build/newlib_getpid.o", "-Wl,--start-group -lkallisti -lc -lgcc -Wl,--end-group"] - linker.library_paths << ["#{KOS_PATH}/lib/dreamcast", "#{KOS_PATH}/addons/lib/dreamcast", "#{KOS_PATH}/../kos-ports/lib"] + linker.command = "#{KOS_WRAPPERS_BASE}/kos-ld" end # Archiver conf.archiver do |archiver| - archiver.command = "#{BIN_PATH}/sh-elf-ar" - archiver.archive_options = 'rcs "%{outfile}" %{objs}' + archiver.command = "#{KOS_WRAPPERS_BASE}/kos-ar" end - # No executables + # No executables needed for KallistiOS conf.bins = [] - # Do not build executable test + # Do not build test binaries conf.build_mrbtest_lib_only - # Disable C++ exception - conf.disable_cxx_exception - - # Gems from core - # Some Gems are incompatible and were disabled. + # Gemboxes + conf.gembox "default-no-stdio" + conf.gembox "stdlib-ext" + conf.gembox "metaprog" - conf.gem :core => "mruby-array-ext" + # Additional Gems + # Currently unsupported on KallistiOS: "mruby-io", "mruby-dir", "mruby-socket" conf.gem :core => "mruby-binding" conf.gem :core => "mruby-catch" - conf.gem :core => "mruby-class-ext" - conf.gem :core => "mruby-cmath" - conf.gem :core => "mruby-compar-ext" - conf.gem :core => "mruby-compiler" - conf.gem :core => "mruby-complex" conf.gem :core => "mruby-enum-chain" - conf.gem :core => "mruby-enum-ext" - conf.gem :core => "mruby-enum-lazy" - conf.gem :core => "mruby-enumerator" + conf.gem :core => "mruby-errno" conf.gem :core => "mruby-error" - conf.gem :core => "mruby-eval" conf.gem :core => "mruby-exit" - conf.gem :core => "mruby-fiber" - conf.gem :core => "mruby-hash-ext" -# conf.gem :core => "mruby-io" - conf.gem :core => "mruby-kernel-ext" - conf.gem :core => "mruby-math" - conf.gem :core => "mruby-metaprog" - conf.gem :core => "mruby-method" - conf.gem :core => "mruby-numeric-ext" - conf.gem :core => "mruby-object-ext" - conf.gem :core => "mruby-objectspace" conf.gem :core => "mruby-os-memsize" - conf.gem :core => "mruby-pack" - conf.gem :core => "mruby-print" conf.gem :core => "mruby-proc-binding" - conf.gem :core => "mruby-proc-ext" - conf.gem :core => "mruby-random" - conf.gem :core => "mruby-range-ext" - conf.gem :core => "mruby-rational" conf.gem :core => "mruby-sleep" -# conf.gem :core => "mruby-socket" - conf.gem :core => "mruby-sprintf" - conf.gem :core => "mruby-string-ext" - conf.gem :core => "mruby-struct" - conf.gem :core => "mruby-symbol-ext" -# conf.gem :core => "mruby-test" -# conf.gem :core => "mruby-time" - conf.gem :core => "mruby-toplevel-ext" end diff --git a/build_config/emscripten-cxx.rb b/build_config/emscripten-cxx.rb new file mode 100644 index 0000000000..3f2d14f9b3 --- /dev/null +++ b/build_config/emscripten-cxx.rb @@ -0,0 +1,12 @@ +# Make sure to add these compile options: +# build/emscripten-cxx/host-bin/mruby-config --cxxflags +# +# Make sure to add these link options: +# build/emscripten-cxx/host-bin/mruby-config --ldflags --libs +MRuby::CrossBuild.new('emscripten-cxx') do |conf| + conf.toolchain :emscripten + + conf.gembox 'default' + + conf.enable_cxx_abi +end diff --git a/build_config/emscripten.rb b/build_config/emscripten.rb new file mode 100644 index 0000000000..4933179257 --- /dev/null +++ b/build_config/emscripten.rb @@ -0,0 +1,10 @@ +# Make sure to add these compile options: +# build/emscripten/host-bin/mruby-config --cflags +# +# Make sure to add these link options: +# build/emscripten/host-bin/mruby-config --ldflags --libs +MRuby::CrossBuild.new('emscripten') do |conf| + conf.toolchain :emscripten + + conf.gembox 'default' +end diff --git a/build_config/gameboyadvance.rb b/build_config/gameboyadvance.rb index 964dc7af1e..256e92dc73 100644 --- a/build_config/gameboyadvance.rb +++ b/build_config/gameboyadvance.rb @@ -47,7 +47,6 @@ conf.gem core: "mruby-metaprog" conf.gem core: "mruby-pack" conf.gem core: "mruby-sprintf" - conf.gem core: "mruby-print" conf.gem core: "mruby-math" conf.gem core: "mruby-time" conf.gem core: "mruby-struct" diff --git a/build_config/glib_hal_test.rb b/build_config/glib_hal_test.rb new file mode 100644 index 0000000000..c90dd47fed --- /dev/null +++ b/build_config/glib_hal_test.rb @@ -0,0 +1,88 @@ +MRuby::Build.new do |conf| + # load specific toolchain settings + conf.toolchain + + # Use mrbgems + # conf.gem 'examples/mrbgems/ruby_extension_example' + # conf.gem 'examples/mrbgems/c_extension_example' do |g| + # g.cc.flags << '-g' # append cflags in this gem + # end + # conf.gem 'examples/mrbgems/c_and_ruby_extension_example' + # conf.gem :core => 'mruby-eval' + # conf.gem :mgem => 'mruby-onig-regexp' + # conf.gem :github => 'mattn/mruby-onig-regexp' + # conf.gem :git => 'git@github.com:mattn/mruby-onig-regexp.git', :branch => 'master', :options => '-v' + + # include the GEM box + #conf.gembox 'default' + + # C compiler settings + # conf.cc do |cc| + # cc.command = ENV['CC'] || 'gcc' + # cc.flags = [ENV['CFLAGS'] || %w()] + # cc.include_paths = ["#{root}/include"] + # cc.defines = %w() + # cc.option_include_path = %q[-I"%s"] + # cc.option_define = '-D%s' + # cc.compile_options = %Q[%{flags} -MMD -o "%{outfile}" -c "%{infile}"] + # end + + # mrbc settings + # conf.mrbc do |mrbc| + # mrbc.compile_options = "-g -B%{funcname} -o-" # The -g option is required for line numbers + # end + + # Linker settings + # conf.linker do |linker| + # linker.command = ENV['LD'] || 'gcc' + # linker.flags = [ENV['LDFLAGS'] || []] + # linker.flags_before_libraries = [] + # linker.libraries = %w() + # linker.flags_after_libraries = [] + # linker.library_paths = [] + # linker.option_library = '-l%s' + # linker.option_library_path = '-L%s' + # linker.link_options = %Q[%{flags} -o "%{outfile}" %{objs} %{libs}] + # end + + # Archiver settings + # conf.archiver do |archiver| + # archiver.command = ENV['AR'] || 'ar' + # archiver.archive_options = 'rs "%{outfile}" %{objs}' + # end + + # Parser generator settings + # conf.yacc do |yacc| + # yacc.command = ENV['YACC'] || 'bison' + # yacc.compile_options = %q[-o "%{outfile}" "%{infile}"] + # end + + # gperf settings + # conf.gperf do |gperf| + # gperf.command = 'gperf' + # gperf.compile_options = %q[-L ANSI-C -C -j1 -i 1 -o -t -N mrb_reserved_word -k"1,3,$" "%{infile}" > "%{outfile}"] + # end + + # file extensions + # conf.exts do |exts| + # exts.object = '.o' + # exts.executable = '' # '.exe' if Windows + # exts.library = '.a' + # end + + # file separator + # conf.file_separator = '/' + + # change library directory name from the default "lib" if necessary + # conf.libdir_name = 'lib64' + + # Turn on `enable_debug` for better debugging + conf.enable_sanitizer 'address,undefined' + conf.enable_debug + conf.enable_bintest + conf.enable_test + conf.ports :glib + conf.cc.defines << 'MRB_TASK_BUILD_DEMO' + conf.gem core: 'mruby-task' + conf.gem core: 'mruby-compiler' +end diff --git a/build_config/helpers/wine_runner.rb b/build_config/helpers/wine_runner.rb index 9a7eb46b08..67da288e98 100755 --- a/build_config/helpers/wine_runner.rb +++ b/build_config/helpers/wine_runner.rb @@ -35,7 +35,7 @@ def clean(output, stderr = false) result_text = results.join("\n") result_text += "\n" if ends_with_newline - return result_text + result_text end @@ -46,7 +46,7 @@ def main end # For simplicity, just read all of stdin into memory and pass that - # as an argument when invoking wine. (Skipped if STDIN was not + # as an argument when invoking wine. (Skipped if STDIN was not # redirected.) if !STDIN.tty? input = STDIN.read diff --git a/build_config/host-cxx.rb b/build_config/host-cxx.rb index 15cb0c3476..5dc37edd09 100644 --- a/build_config/host-cxx.rb +++ b/build_config/host-cxx.rb @@ -1,4 +1,4 @@ -MRuby::Build.new do |conf| +MRuby::Build.new('host-cxx') do |conf| conf.toolchain # include the default GEMs diff --git a/build_config/host-debug.rb b/build_config/host-debug.rb index 66e622ad64..8565b77890 100644 --- a/build_config/host-debug.rb +++ b/build_config/host-debug.rb @@ -8,11 +8,13 @@ conf.gembox 'full-core' # C compiler settings - conf.cc.defines = %w(MRB_USE_DEBUG_HOOK MRB_NO_BOXING MRB_UTF8_STRING) + conf.cc.defines = %w(MRB_USE_DEBUG_HOOK MRB_NO_BOXING) # Generate mruby debugger command (require mruby-eval) conf.gem :core => "mruby-bin-debugger" + # Regexp is included via stdlib.gembox + # test conf.enable_test # bintest diff --git a/build_config/host-m32-f32.rb b/build_config/host-m32-f32.rb new file mode 100644 index 0000000000..9d14f322eb --- /dev/null +++ b/build_config/host-m32-f32.rb @@ -0,0 +1,16 @@ +MRuby::Build.new do |conf| + # load specific toolchain settings + toolchain :gcc + + # include the GEM box + conf.gembox 'full-core' + + conf.cc.flags << '-m32' + conf.cc.defines << 'MRB_USE_FLOAT32' + conf.linker.flags << '-m32' + + # Turn on `enable_debug` for better debugging + conf.enable_debug + conf.enable_test + conf.enable_bintest +end diff --git a/build_config/host-shared.rb b/build_config/host-shared.rb index 38cab8145b..9eaef55f24 100644 --- a/build_config/host-shared.rb +++ b/build_config/host-shared.rb @@ -1,36 +1,94 @@ -MRuby::Build.new do |conf| - # load specific toolchain settings +# Build mruby with a shared libmruby.so (in addition to the usual +# libmruby.a / executables). +# +# Produces (in build/host/lib/): +# libmruby.a the static archive (as in the default build) +# libmruby.so the shared library, with SONAME=libmruby.so.. +# libmruby.so.. symlink to libmruby.so (matches SONAME) +# libmruby.map linker version script (MRUBY_) +# +# Also produces the matching libmruby_core.so + symlink for completeness. +# +# Symbol versioning ties to MRUBY_RELEASE_NO (e.g. MRUBY_40000 for 4.0.0). +# mruby has historically had ABI breaks between TEENY versions, so the +# version tag uses the full release number rather than just MAJOR.MINOR. +# +# The shared library is built FROM the static archive via +# `-Wl,--whole-archive`, so the existing static-build pipeline (including +# the test infrastructure) is unaffected. Executables in build/host/bin +# remain statically linked; distros that want dynamically-linked +# executables can rebuild them against the .so. +# +# NOTE: gcc/clang only — VisualC++ support requires a separate config. - # Gets set by the VS command prompts. - if ENV['VisualStudioVersion'] || ENV['VSINSTALLDIR'] - toolchain :visualcpp - else - toolchain :gcc - end +require "mruby/source" + +MRuby::Build.new do |conf| + conf.toolchain # include the GEM box conf.gembox 'default' - # C compiler settings - conf.cc do |cc| - cc.flags = '-fPIC' + # -fPIC so the static archive's contents can be linked into the .so. + conf.compilers.each do |cc| + cc.flags << '-fPIC' end - conf.archiver do |archiver| - archiver.command = 'gcc' - archiver.archive_options = '-shared -o %{outfile} %{objs}' - end - - # file extensions - conf.exts do |exts| - exts.library = '.so' - end - - # file separator - # conf.file_separator = '/' - # Turn on `enable_debug` for better debugging conf.enable_debug conf.enable_bintest conf.enable_test end + +# Add the shared-library targets as a post-build pass, so the default +# static-build pipeline remains untouched. +MRuby.each_target do + next unless name == "host" + + libdir = File.join(build_dir, libdir_name) + vermap = File.join(build_dir, "libmruby.map") + vertag = "MRUBY_#{MRuby::Source::MRUBY_RELEASE_NO}" + + # Generate the version script eagerly — it has no .o dependencies and is + # tiny enough that lazy generation isn't worth the rake plumbing. + mkdir_p File.dirname(vermap) + File.write(vermap, <<~MAP) + #{vertag} { + global: *; + local: *; + }; + MAP + + major = MRuby::Source::MRUBY_RELEASE_MAJOR + minor = MRuby::Source::MRUBY_RELEASE_MINOR + + [ + [libmruby_static, "libmruby"], + [libmruby_core_static, "libmruby_core"], + ].each do |archive, basename| + so = File.join(libdir, "#{basename}.so") + symlink = "#{so}.#{major}.#{minor}" + soname = "#{basename}.so.#{major}.#{minor}" + + # Build .so from the static archive via --whole-archive. + file so => [archive, vermap] do |t| + _pp "LD", so.relative_path + sh "#{cc.command} -shared -fPIC -o #{so}" \ + " -Wl,-soname,#{soname}" \ + " -Wl,--version-script=#{vermap}" \ + " -Wl,--whole-archive #{archive} -Wl,--no-whole-archive" \ + " -lm" + end + products << so + + # SONAME-matching symlink: needed so executables linked with -lmruby + # (which embeds the SONAME as DT_NEEDED) can find the library at + # runtime via standard search paths. + file symlink => so do |t| + _pp "LN", "#{symlink.relative_path} -> #{File.basename(so)}" + rm_f symlink + File.symlink(File.basename(so), symlink) + end + products << symlink + end +end diff --git a/build_config/i586-pc-msdosdjgpp.rb b/build_config/i586-pc-msdosdjgpp.rb new file mode 100644 index 0000000000..c1ea5167f8 --- /dev/null +++ b/build_config/i586-pc-msdosdjgpp.rb @@ -0,0 +1,76 @@ +# Cross Compiling configuration for MS-DOS +# +# Requires DJGPP cross-compiler, see +# https://github.com/andrewwutw/build-djgpp/releases + +MRuby::CrossBuild.new("i586-pc-msdosdjgpp") do |conf| + toolchain :gcc + + # If DJGPP is not in the PATH, set this to the bin directory + DJGPP_PATH = nil + + GCC = 'i586-pc-msdosdjgpp-gcc' + GXX = 'i586-pc-msdosdjgpp-g++' + AR = 'i586-pc-msdosdjgpp-ar' + + conf.cc do |cc| + cc.command = DJGPP_PATH ? File.join(DJGPP_PATH, GCC) : GCC + cc.defines << 'MRB_NO_IO_PREAD_PWRITE' + cc.defines << 'MRB_UTF8_STRING' + end + + conf.cxx do |cxx| + cxx.command = DJGPP_PATH ? File.join(DJGPP_PATH, GXX) : GXX + cxx.defines << 'MRB_NO_IO_PREAD_PWRITE' + cxx.defines << 'MRB_UTF8_STRING' + end + + conf.linker do |linker| + linker.command = DJGPP_PATH ? File.join(DJGPP_PATH, GXX) : GXX + linker.libraries = %w(m) + end + + conf.archiver do |archiver| + archiver.command = DJGPP_PATH ? File.join(DJGPP_PATH, AR) : AR + end + + # All provided gems that can be reasonably made to compile: + # default.gembox, minus mruby-socket and replacing mruby-cmath with mruby-cmath-alt + conf.gembox "stdlib" + conf.gembox "stdlib-ext" + + conf.gem :core => 'mruby-io' # stdlib-io.gembox <- default.gembox +# No socket support in DJGPP +# conf.gem :core => 'mruby-socket' # stdlib-io.gembox <- default.gembox + conf.gem :core => 'mruby-errno' # stdlib-io.gembox <- default.gembox + conf.gem :core => 'mruby-dir' # stdlib-io.gembox <- default.gembox + + conf.gem :core => 'mruby-bigint' # math.gembox <- default.gembox + conf.gem :core => 'mruby-complex' # math.gembox <- default.gembox + conf.gem :core => 'mruby-math' # math.gembox <- default.gembox + conf.gem :core => 'mruby-rational' # math.gembox <- default.gembox + # Alternative implementation of cmath, not requiring +# conf.gem :github => 'chasonr/mruby-cmath-alt' # math.gembox <- default.gembox + + conf.gembox "metaprog" + + conf.gem :core => 'mruby-bin-mrbc' # default.gembox + conf.gem :core => 'mruby-bin-debugger' # default.gembox + conf.gem :core => 'mruby-bin-mirb' # default.gembox + conf.gem :core => 'mruby-bin-mruby' # default.gembox + conf.gem :core => 'mruby-bin-strip' # default.gembox + conf.gem :core => 'mruby-bin-config' # default.gembox + + # Other compilable gems + conf.gem :core => 'mruby-binding' + conf.gem :core => 'mruby-catch' + conf.gem :core => 'mruby-enum-chain' + conf.gem :core => 'mruby-error' + conf.gem :core => 'mruby-exit' + conf.gem :core => 'mruby-os-memsize' + conf.gem :core => 'mruby-proc-binding' + conf.gem :core => 'mruby-sleep' + + # For Onigmo regular expression support + conf.gem :github => 'mattn/mruby-onig-regexp' +end diff --git a/build_config/luckfox_pico.rb b/build_config/luckfox_pico.rb new file mode 100644 index 0000000000..5a31050b10 --- /dev/null +++ b/build_config/luckfox_pico.rb @@ -0,0 +1,106 @@ +# Cross Compiling configuration for Luckfox Pico embedded SBC. +# +# To build on Ubuntu x86_64: rake MRUBY_CONFIG=build_config/luckfox_pico.rb +# Uses Buildroot SDK for this board. Binaries run on the corresponding Linux image. +# Requires: https://github.com/LuckfoxTECH/luckfox-pico +# +# NOTE: default config includes all standard mrbgems, EXCEPT: mruby-cmath +# +MRuby::CrossBuild.new("luckfox_pico") do |conf| + # Clone the luckfox-pico repo above next to (same directory level) as mruby. + SDK_BASE = File.realpath(File.expand_path("../../../", File.expand_path(__FILE__)) + "/luckfox-pico") + TOOLCHAIN_BASE = "#{SDK_BASE}/tools/linux/toolchain/arm-rockchip830-linux-uclibcgnueabihf" + SYSROOT = "#{TOOLCHAIN_BASE}/arm-rockchip830-linux-uclibcgnueabihf/sysroot" + + toolchain :gcc + + # C compiler settings + conf.cc do |cc| + cc.command = "#{TOOLCHAIN_BASE}/bin/arm-rockchip830-linux-uclibcgnueabihf-gcc" + + cc.include_paths << "#{TOOLCHAIN_BASE}/lib/gcc/arm-rockchip830-linux-uclibcgnueabihf/8.3.0/include" + cc.include_paths << "#{TOOLCHAIN_BASE}/lib/gcc/arm-rockchip830-linux-uclibcgnueabihf/8.3.0/include-fixed" + cc.include_paths << "#{TOOLCHAIN_BASE}/arm-rockchip830-linux-uclibcgnueabihf/include/c++/8.3.0/arm-rockchip830-linux-uclibcgnueabihf" + cc.include_paths << "#{SYSROOT}/usr/include" + + # Flags taken from the SDK's Makefile + cc.flags << ["-march=armv7-a", "-mfpu=neon", "-mfloat-abi=hard"] + cc.flags << ["-D_LARGEFILE_SOURCE", "-D_LARGEFILE64_SOURCE", "-D_FILE_OFFSET_BITS=64", "-ffunction-sections", "-fdata-sections"] + cc.flags << ["-O2", "-fPIC"] + cc.flags << ["-Wl,--copy-dt-needed-entries", "-Wl,-lc,-lgcc_s"] + end + + # Linker settings + conf.linker do |linker| + linker.command = cc.command + linker.library_paths << "#{TOOLCHAIN_BASE}/arm-rockchip830-linux-uclibcgnueabihf/lib" + linker.flags = cc.flags + end + + # Archiver settings + conf.archiver do |archiver| + archiver.command = "#{TOOLCHAIN_BASE}/bin/arm-rockchip830-linux-uclibcgnueabihf-ar" + end + + # Do not build executable test + conf.build_mrbtest_lib_only + + # Disable C++ exception + conf.disable_cxx_exception + + # All standard gems. + conf.gem 'mrbgems/mruby-array-ext/' + conf.gem 'mrbgems/mruby-bigint/' + conf.gem 'mrbgems/mruby-bin-config/' + conf.gem 'mrbgems/mruby-bin-debugger/' + conf.gem 'mrbgems/mruby-bin-mirb/' + conf.gem 'mrbgems/mruby-bin-mrbc/' + conf.gem 'mrbgems/mruby-bin-mruby/' + conf.gem 'mrbgems/mruby-bin-strip/' + conf.gem 'mrbgems/mruby-binding/' + conf.gem 'mrbgems/mruby-catch/' + conf.gem 'mrbgems/mruby-class-ext/' + # SDK doesn't include complex math for uClibc + # conf.gem 'mrbgems/mruby-cmath/' + conf.gem 'mrbgems/mruby-compar-ext/' + conf.gem 'mrbgems/mruby-compiler/' + conf.gem 'mrbgems/mruby-complex/' + conf.gem 'mrbgems/mruby-data/' + conf.gem 'mrbgems/mruby-dir/' + conf.gem 'mrbgems/mruby-enum-chain/' + conf.gem 'mrbgems/mruby-enum-ext/' + conf.gem 'mrbgems/mruby-enum-lazy/' + conf.gem 'mrbgems/mruby-enumerator/' + conf.gem 'mrbgems/mruby-errno/' + conf.gem 'mrbgems/mruby-error/' + conf.gem 'mrbgems/mruby-eval/' + conf.gem 'mrbgems/mruby-exit/' + conf.gem 'mrbgems/mruby-fiber/' + conf.gem 'mrbgems/mruby-hash-ext/' + conf.gem 'mrbgems/mruby-io/' + conf.gem 'mrbgems/mruby-kernel-ext/' + conf.gem 'mrbgems/mruby-math/' + conf.gem 'mrbgems/mruby-metaprog/' + conf.gem 'mrbgems/mruby-method/' + conf.gem 'mrbgems/mruby-numeric-ext/' + conf.gem 'mrbgems/mruby-object-ext/' + conf.gem 'mrbgems/mruby-objectspace/' + conf.gem 'mrbgems/mruby-os-memsize/' + conf.gem 'mrbgems/mruby-pack/' + conf.gem 'mrbgems/mruby-proc-binding/' + conf.gem 'mrbgems/mruby-proc-ext/' + conf.gem 'mrbgems/mruby-random/' + conf.gem 'mrbgems/mruby-range-ext/' + conf.gem 'mrbgems/mruby-rational/' + conf.gem 'mrbgems/mruby-set/' + conf.gem 'mrbgems/mruby-sleep/' + conf.gem 'mrbgems/mruby-socket/' + conf.gem 'mrbgems/mruby-sprintf/' + conf.gem 'mrbgems/mruby-string-ext/' + conf.gem 'mrbgems/mruby-struct/' + conf.gem 'mrbgems/mruby-symbol-ext/' + # conf.gem 'mrbgems/mruby-test-inline-struct/' + # conf.gem 'mrbgems/mruby-test/' + conf.gem 'mrbgems/mruby-time/' + conf.gem 'mrbgems/mruby-toplevel-ext/' +end diff --git a/build_config/milkv_duo.rb b/build_config/milkv_duo.rb new file mode 100644 index 0000000000..3961e15712 --- /dev/null +++ b/build_config/milkv_duo.rb @@ -0,0 +1,106 @@ +# Cross Compiling configuration for Milk-V Duo. +# To build (on Ubuntu 24.04): rake MRUBY_CONFIG=build_config/milkv_duo.rb +# +# Requires: https://github.com/milkv-duo/duo-sdk +# +MRuby::CrossBuild.new("milkv_duo") do |conf| + # Set this string to match your board: milkv_duo, milkv_duo256m, milkv_duos + MILKV_DUO_VARIANT = "milkv_duo256m" + + # Expect duo-sdk directory is same level as (next to) mruby top-level directory. + SDK_BASE = File.expand_path("../../../", File.expand_path(__FILE__)) + "/duo-sdk" + TOOLCHAIN_BASE = "#{SDK_BASE}/riscv64-linux-musl-x86_64" + SYSROOT = "#{SDK_BASE}/rootfs" + + toolchain :gcc + + # C compiler settings + conf.cc do |cc| + cc.command = "#{TOOLCHAIN_BASE}/bin/riscv64-unknown-linux-musl-gcc" + cc.include_paths << "#{TOOLCHAIN_BASE}/lib/gcc/riscv64-unknown-linux/musl/10.2.0/include-fixed" + cc.include_paths << "#{TOOLCHAIN_BASE}/lib/gcc/riscv64-unknown-linux/musl/10.2.0/include" + cc.include_paths << "#{TOOLCHAIN_BASE}/riscv64-unknown-linux/include" + cc.include_paths << "#{TOOLCHAIN_BASE}/include" + cc.include_paths << "#{SYSROOT}/usr/include" + cc.flags << ["-mcpu=c906fdv", "-march=rv64imafdcv0p7xthead", "-mcmodel=medany", "-mabi=lp64d"] + cc.flags << ["-D_LARGEFILE_SOURCE", "-D_LARGEFILE64_SOURCE", "-D_FILE_OFFSET_BITS=64"] + cc.flags << ["-Wl,--copy-dt-needed-entries", "-Wl,-lc,-lgcc_s,-lwiringx"] + cc.defines << "MILKV_DUO_VARIANT=_#{MILKV_DUO_VARIANT}" + end + + # Linker settings + conf.linker do |linker| + linker.command = cc.command + linker.library_paths << ["#{SYSROOT}/lib", "#{SYSROOT}/usr/lib"] + linker.flags = cc.flags + end + + # Archiver settings + conf.archiver do |archiver| + archiver.command = "#{TOOLCHAIN_BASE}/bin/riscv64-unknown-linux-musl-gcc-ar" + end + + # Do not build executable test + conf.build_mrbtest_lib_only + + # Disable C++ exception + conf.disable_cxx_exception + + # All standard gems. + conf.gem 'mrbgems/mruby-array-ext/' + conf.gem 'mrbgems/mruby-bigint/' + conf.gem 'mrbgems/mruby-bin-config/' + conf.gem 'mrbgems/mruby-bin-debugger/' + conf.gem 'mrbgems/mruby-bin-mirb/' + conf.gem 'mrbgems/mruby-bin-mrbc/' + conf.gem 'mrbgems/mruby-bin-mruby/' + conf.gem 'mrbgems/mruby-bin-strip/' + conf.gem 'mrbgems/mruby-binding/' + conf.gem 'mrbgems/mruby-catch/' + conf.gem 'mrbgems/mruby-class-ext/' + conf.gem 'mrbgems/mruby-cmath/' + conf.gem 'mrbgems/mruby-compar-ext/' + conf.gem 'mrbgems/mruby-compiler/' + conf.gem 'mrbgems/mruby-complex/' + conf.gem 'mrbgems/mruby-data/' + conf.gem 'mrbgems/mruby-dir/' + conf.gem 'mrbgems/mruby-enum-chain/' + conf.gem 'mrbgems/mruby-enum-ext/' + conf.gem 'mrbgems/mruby-enum-lazy/' + conf.gem 'mrbgems/mruby-enumerator/' + conf.gem 'mrbgems/mruby-errno/' + conf.gem 'mrbgems/mruby-error/' + conf.gem 'mrbgems/mruby-eval/' + conf.gem 'mrbgems/mruby-exit/' + conf.gem 'mrbgems/mruby-fiber/' + conf.gem 'mrbgems/mruby-hash-ext/' + conf.gem 'mrbgems/mruby-io/' + conf.gem 'mrbgems/mruby-kernel-ext/' + conf.gem 'mrbgems/mruby-math/' + conf.gem 'mrbgems/mruby-metaprog/' + conf.gem 'mrbgems/mruby-method/' + conf.gem 'mrbgems/mruby-numeric-ext/' + conf.gem 'mrbgems/mruby-object-ext/' + conf.gem 'mrbgems/mruby-objectspace/' + conf.gem 'mrbgems/mruby-os-memsize/' + conf.gem 'mrbgems/mruby-pack/' + conf.gem 'mrbgems/mruby-proc-binding/' + conf.gem 'mrbgems/mruby-proc-ext/' + conf.gem 'mrbgems/mruby-random/' + conf.gem 'mrbgems/mruby-range-ext/' + conf.gem 'mrbgems/mruby-rational/' + conf.gem 'mrbgems/mruby-set/' + conf.gem 'mrbgems/mruby-sleep/' + conf.gem 'mrbgems/mruby-socket/' + conf.gem 'mrbgems/mruby-sprintf/' + conf.gem 'mrbgems/mruby-string-ext/' + conf.gem 'mrbgems/mruby-struct/' + conf.gem 'mrbgems/mruby-symbol-ext/' + # conf.gem 'mrbgems/mruby-test-inline-struct/' + # conf.gem 'mrbgems/mruby-test/' + conf.gem 'mrbgems/mruby-time/' + conf.gem 'mrbgems/mruby-toplevel-ext/' + + # GPIO, ADC, PWM, I2C and SPI support for Milk-V Duo + # conf.gem :github => 'denko-rb/mruby-milkv-duo' +end diff --git a/build_config/mrbc.rb b/build_config/mrbc.rb index 95444b936f..df2a51173b 100644 --- a/build_config/mrbc.rb +++ b/build_config/mrbc.rb @@ -7,5 +7,4 @@ conf.build_mrbc_exec conf.disable_libmruby - conf.disable_presym end diff --git a/build_config/nintendo_wii.rb b/build_config/nintendo_wii.rb new file mode 100644 index 0000000000..62758ab132 --- /dev/null +++ b/build_config/nintendo_wii.rb @@ -0,0 +1,95 @@ +# Cross Compiling configuration for the Nintendo Wii +# This configuration requires devkitPPC +# https://devkitpro.org/wiki/Getting_Started +# +# +MRuby::CrossBuild.new("wii") do |conf| + toolchain :gcc + + DEVKITPRO_PATH = "/opt/devkitpro" + BIN_PATH = "#{DEVKITPRO_PATH}/devkitPPC/bin" + + # C compiler + conf.cc do |cc| + cc.command = "#{BIN_PATH}/powerpc-eabi-gcc" + cc.compile_options = %(%{flags} -o "%{outfile}" -c "%{infile}") + end + + # C++ compiler + conf.cxx do |cxx| + cxx.command = "#{BIN_PATH}/powerpc-eabi-g++" + cxx.include_paths = conf.cc.include_paths.dup + cxx.flags = conf.cc.flags.dup + cxx.defines = conf.cc.defines.dup + cxx.compile_options = conf.cc.compile_options.dup + end + + # Linker + conf.linker do |linker| + linker.command = "#{BIN_PATH}/powerpc-eabi-gcc" + end + + # No executables + conf.bins = [] + + # Do not build executable test + conf.build_mrbtest_lib_only + + # Disable C++ exception + conf.disable_cxx_exception + + # All current core gems with ones with build issues commented out + conf.gem 'mrbgems/mruby-array-ext/' + conf.gem 'mrbgems/mruby-bigint/' + conf.gem 'mrbgems/mruby-bin-config/' + conf.gem 'mrbgems/mruby-bin-debugger/' + conf.gem 'mrbgems/mruby-bin-mirb/' + conf.gem 'mrbgems/mruby-bin-mrbc/' + conf.gem 'mrbgems/mruby-bin-mruby/' + conf.gem 'mrbgems/mruby-bin-strip/' + conf.gem 'mrbgems/mruby-binding/' + conf.gem 'mrbgems/mruby-catch/' + conf.gem 'mrbgems/mruby-class-ext/' + conf.gem 'mrbgems/mruby-cmath/' + conf.gem 'mrbgems/mruby-compar-ext/' + conf.gem 'mrbgems/mruby-compiler/' + conf.gem 'mrbgems/mruby-complex/' + conf.gem 'mrbgems/mruby-data/' + #conf.gem 'mrbgems/mruby-dir/' + conf.gem 'mrbgems/mruby-enum-chain/' + conf.gem 'mrbgems/mruby-enum-ext/' + conf.gem 'mrbgems/mruby-enum-lazy/' + conf.gem 'mrbgems/mruby-enumerator/' + conf.gem 'mrbgems/mruby-errno/' + conf.gem 'mrbgems/mruby-error/' + conf.gem 'mrbgems/mruby-eval/' + conf.gem 'mrbgems/mruby-exit/' + conf.gem 'mrbgems/mruby-fiber/' + conf.gem 'mrbgems/mruby-hash-ext/' + #conf.gem 'mrbgems/mruby-io/' + conf.gem 'mrbgems/mruby-kernel-ext/' + conf.gem 'mrbgems/mruby-math/' + conf.gem 'mrbgems/mruby-metaprog/' + conf.gem 'mrbgems/mruby-method/' + conf.gem 'mrbgems/mruby-numeric-ext/' + conf.gem 'mrbgems/mruby-object-ext/' + conf.gem 'mrbgems/mruby-objectspace/' + conf.gem 'mrbgems/mruby-os-memsize/' + conf.gem 'mrbgems/mruby-pack/' + conf.gem 'mrbgems/mruby-proc-binding/' + conf.gem 'mrbgems/mruby-proc-ext/' + conf.gem 'mrbgems/mruby-random/' + conf.gem 'mrbgems/mruby-range-ext/' + conf.gem 'mrbgems/mruby-rational/' + conf.gem 'mrbgems/mruby-set/' + conf.gem 'mrbgems/mruby-sleep/' + #conf.gem 'mrbgems/mruby-socket/' + conf.gem 'mrbgems/mruby-sprintf/' + conf.gem 'mrbgems/mruby-string-ext/' + conf.gem 'mrbgems/mruby-struct/' + conf.gem 'mrbgems/mruby-symbol-ext/' + conf.gem 'mrbgems/mruby-test-inline-struct/' + #conf.gem 'mrbgems/mruby-test/' + conf.gem 'mrbgems/mruby-time/' + conf.gem 'mrbgems/mruby-toplevel-ext/' +end diff --git a/build_config/no-float.rb b/build_config/no-float.rb new file mode 100644 index 0000000000..e0738cac4c --- /dev/null +++ b/build_config/no-float.rb @@ -0,0 +1,17 @@ +# Define cross build settings +MRuby::CrossBuild.new('no-float') do |conf| + conf.toolchain + + # Add configuration + conf.compilers.each do |c| + c.defines << "MRB_NO_FLOAT" + end + + conf.gem :core => "mruby-bin-mruby" + + conf.test_runner.command = 'env' + + conf.enable_debug +# conf.enable_bintest + conf.enable_test +end diff --git a/build_config/playstationportable.rb b/build_config/playstationportable.rb new file mode 100644 index 0000000000..453082517e --- /dev/null +++ b/build_config/playstationportable.rb @@ -0,0 +1,80 @@ +# Cross Compiling configuration for the Sony PlayStation Portable. +# This configuration requires toolchain from https://github.com/pspdev + +MRuby::CrossBuild.new("playstationportable") do |conf| + toolchain :gcc + + PSPDEV_PATH = "#{ENV['PSPDEV']}" + BIN_PATH = "#{PSPDEV_PATH}/bin" + + # C compiler + conf.cc do |cc| + cc.command = "#{BIN_PATH}/psp-gcc" + cc.flags << ["-O2", "-D_PSP_FW_VERSION=600"] + cc.include_paths << ["#{PSPDEV_PATH}/psp/include", "#{PSPDEV_PATH}/psp/sdk/include"] + cc.compile_options = %(%{flags} -o "%{outfile}" -c "%{infile}") + end + + # C++ compiler + conf.cxx do |cxx| + cxx.command = "#{BIN_PATH}/psp-g++" + cxx.include_paths = conf.cc.include_paths.dup + cxx.flags = conf.cc.flags.dup + cxx.flags << %w[-fno-rtti -fno-exceptions] + cxx.defines = conf.cc.defines.dup + cxx.compile_options = conf.cc.compile_options.dup + end + + # Linker + conf.linker do |linker| + linker.command = "#{BIN_PATH}/psp-gcc" + linker.flags << ["-Wl,-zmax-page-size=128"] + end + + # No executables + conf.bins = [] + + # Do not build executable test + conf.build_mrbtest_lib_only + + # Gems from core + conf.gem :core => "mruby-metaprog" + conf.gem :core => "mruby-pack" + conf.gem :core => "mruby-sprintf" + conf.gem :core => "mruby-math" + conf.gem :core => "mruby-time" + conf.gem :core => "mruby-struct" + conf.gem :core => "mruby-compar-ext" + conf.gem :core => "mruby-enum-ext" + conf.gem :core => "mruby-string-ext" + conf.gem :core => "mruby-numeric-ext" + conf.gem :core => "mruby-array-ext" + conf.gem :core => "mruby-hash-ext" + conf.gem :core => "mruby-range-ext" + conf.gem :core => "mruby-proc-ext" + conf.gem :core => "mruby-symbol-ext" + conf.gem :core => "mruby-random" + conf.gem :core => "mruby-object-ext" + conf.gem :core => "mruby-objectspace" + conf.gem :core => "mruby-fiber" + conf.gem :core => "mruby-enumerator" + conf.gem :core => "mruby-enum-lazy" + conf.gem :core => "mruby-toplevel-ext" + conf.gem :core => "mruby-kernel-ext" + conf.gem :core => "mruby-class-ext" + conf.gem :core => "mruby-compiler" + conf.gem :core => "mruby-binding" + conf.gem :core => "mruby-catch" + conf.gem :core => "mruby-enum-chain" + conf.gem :core => "mruby-errno" + conf.gem :core => "mruby-error" + conf.gem :core => "mruby-exit" + conf.gem :core => "mruby-os-memsize" + conf.gem :core => "mruby-proc-binding" + conf.gem :core => "mruby-sleep" + # Disabled until PSP-specific HALs are available; the POSIX HALs depend on + # APIs that the PSP SDK does not fully provide. + # conf.gem :core => "mruby-io" + # conf.gem :core => "mruby-dir" + # conf.gem :core => "mruby-socket" +end diff --git a/doc/README.md b/doc/README.md new file mode 100644 index 0000000000..0d6f6e6173 --- /dev/null +++ b/doc/README.md @@ -0,0 +1,67 @@ +# mruby Documentation + +## Getting Started + +New to mruby? Start here: + +| Document | Description | +| -------------------------------------------- | -------------------------------------- | +| [Getting Started](guides/getting-started.md) | Build mruby and run your first program | +| [Language Features](guides/language.md) | Ruby subset supported by mruby | +| [Limitations](limitations.md) | Behavioral differences from CRuby | + +## Guides (for embedders and gem authors) + +### Embedding mruby in C + +| Document | Description | +| --------------------------------------- | ------------------------------------------------ | +| [C API Reference](guides/capi.md) | Values, classes, methods, error handling, fibers | +| [GC Arena](guides/gc-arena-howto.md) | Managing temporary objects in C extensions | +| [Linking](guides/link.md) | Linking with `libmruby` | +| [Amalgamation](guides/amalgamation.md) | Single-file build for easy integration | +| [Precompiled Symbols](guides/symbol.md) | Compile-time symbol allocation | + +### Building and Configuring + +| Document | Description | +| ---------------------------------------- | ------------------------------------------- | +| [Compilation](guides/compile.md) | Build system, cross-compilation, toolchains | +| [Build Configuration](guides/mrbconf.md) | Compile-time macros (`MRB_*` flags) | +| [mrbgems](guides/mrbgems.md) | Creating and managing gems | +| [Memory](guides/memory.md) | Allocator customization and heap regions | + +### Tools + +| Document | Description | +| ----------------------------------------------- | ----------------------------------------------- | +| [Debugger](guides/debugger.md) | Using `mrdb` for debugging | +| [ROM Method Tables](guides/rom-method-table.md) | Read-only method tables for constrained devices | + +### Reference + +| Document | Description | +| ------------------------------------- | ------------------ | +| [Directory Structure](guides/hier.md) | Source tree layout | + +## Internals (for mruby contributors) + +Start with [Architecture](internal/architecture.md) for an overview, +then dive into the subsystem you need: + +| Document | Description | +| ----------------------------------------- | -------------------------------------------------- | +| [Architecture](internal/architecture.md) | Overview of object model, VM, GC, compiler | +| [Virtual Machine](internal/vm.md) | Dispatch loop, call frames, method lookup, fibers | +| [Garbage Collector](internal/gc.md) | Tri-color marking, write barriers, generational GC | +| [Compiler Pipeline](internal/compiler.md) | Parser, code generator, IRep, binary format | +| [Opcodes](internal/opcode.md) | VM instruction set reference | +| [Value Boxing](internal/boxing.md) | How `mrb_value` encodes types | + +## Release Notes + +- [mruby 3.4](mruby3.4.md) +- [mruby 3.3](mruby3.3.md) +- [mruby 3.2](mruby3.2.md) +- [mruby 3.1](mruby3.1.md) +- [mruby 3.0](mruby3.0.md) diff --git a/doc/guides/amalgamation.md b/doc/guides/amalgamation.md new file mode 100644 index 0000000000..aa9db3dcf0 --- /dev/null +++ b/doc/guides/amalgamation.md @@ -0,0 +1,153 @@ + + +# Amalgamation + +Amalgamation combines all mruby source files into a single `mruby.c` and +`mruby.h` for easy embedding, similar to SQLite's distribution model. + +## Benefits + +- **Simple integration**: Just two files to add to your project +- **Single compilation unit**: Enables better compiler optimization +- **No build system required**: Compile directly with any C compiler +- **Portable**: No external dependencies beyond standard C library + (but see [Platform-Dependent Gems](#platform-dependent-gems) below) + +## Generating Amalgamation + +```console +rake amalgam +``` + +Output files are generated in `build//amalgam/`: + +- `mruby.h` - All headers concatenated in dependency order +- `mruby.c` - All sources concatenated (core + gems + mrblib) + +### With Custom Configuration + +The amalgamation includes gems specified in your build configuration: + +```console +MRUBY_CONFIG=build_config/minimal.rb rake amalgam +``` + +## Using the Amalgamation + +### Basic Usage + +```c +#include "mruby.h" + +int main(void) { + mrb_state *mrb = mrb_open(); + mrb_load_string(mrb, "puts 'Hello from mruby!'"); + mrb_close(mrb); + return 0; +} +``` + +### Compiling + +```console +gcc -I./build/host/amalgam your_app.c ./build/host/amalgam/mruby.c -o your_app -lm +``` + +For optimized builds: + +```console +gcc -O2 -DNDEBUG -I./build/host/amalgam your_app.c ./build/host/amalgam/mruby.c -o your_app -lm +``` + +## Gem Compatibility + +### Known Working Gems + +The following gems work with amalgamation: + +- `mruby-compiler` - Required for `mrb_load_string` +- `mruby-eval` - `eval`, `Binding` +- `mruby-array-ext`, `mruby-string-ext`, `mruby-hash-ext` +- `mruby-numeric-ext`, `mruby-range-ext`, `mruby-symbol-ext` +- `mruby-proc-ext`, `mruby-kernel-ext`, `mruby-object-ext`, `mruby-class-ext` +- `mruby-enum-ext`, `mruby-compar-ext` +- `mruby-error`, `mruby-math`, `mruby-struct` +- `mruby-bigint`, `mruby-rational`, `mruby-complex` +- `mruby-io` (with the active `ports//` HAL) +- `mruby-task` (with the active `ports//` HAL) + +### Platform-Dependent Gems + +Gems that use a HAL (Hardware Abstraction Layer) include +platform-specific code in the amalgamation. For example, if +`mruby-io` selects its POSIX port on Linux, the generated `mruby.c` +contains POSIX-specific code and cannot be compiled on Windows. + +If you need amalgamated files for multiple platforms, generate them +separately for each target platform (or cross-build configuration). + +### Excluded Gems + +Binary gems (`mruby-bin-*`) are automatically excluded as they contain +their own `main()` function. The amalgamation produces a library, not +an executable. + +## Example Configuration + +A minimal configuration for amalgamation: + +```ruby +# build_config/amalgam.rb +MRuby::Build.new do |conf| + conf.toolchain :gcc + + conf.gem core: 'mruby-compiler' + conf.gem core: 'mruby-error' + conf.gem core: 'mruby-eval' + conf.gem core: 'mruby-array-ext' + conf.gem core: 'mruby-string-ext' + conf.gem core: 'mruby-hash-ext' + conf.gem core: 'mruby-io' +end +``` + +Generate with: + +```console +MRUBY_CONFIG=build_config/amalgam.rb rake amalgam +``` + +## Output Sizes + +Typical sizes depend on included gems: + +- `mruby.h`: 200-500 KB +- `mruby.c`: 2-4 MB + +## Technical Details + +### Header Processing + +- Include guards are stripped to allow concatenation +- Headers are ordered by dependency (foundation types first) +- Internal includes are commented out (already in `mruby.h`) + +### Source Processing + +- Sources are concatenated in proper initialization order +- X-macro headers (like `mruby/ops.h`) are inlined at each use +- Local includes (`.cstub` files) are automatically inlined +- Generated files (`mrblib.c`, `gem_init.c`) are included + +### Gem Defines + +Gems that add preprocessor defines affecting core structures are +automatically detected and included at the top of `mruby.h`. +Supported patterns: `MRB_USE_*`, `MRB_UTF8_*`, `HAVE_MRUBY_*`. + +### Build Order + +1. Core sources (`src/*.c`) +2. Gem sources (`mrbgems/*/src/*.c` or `core/*.c`) +3. Generated mrblib (`build/*/mrblib/mrblib.c`) +4. Gem initialization (`build/*/mrbgems/gem_init.c`) diff --git a/doc/guides/capi.md b/doc/guides/capi.md new file mode 100644 index 0000000000..bc91d9c478 --- /dev/null +++ b/doc/guides/capi.md @@ -0,0 +1,863 @@ + + +# C API Reference + +This document covers the mruby C API for embedding and extending mruby. + +**Contents:** +[Headers](#headers) | +[State Management](#state-management) | +[Values](#values) | +[Defining Classes and Modules](#defining-classes-and-modules) | +[Defining Methods](#defining-methods) | +[Parsing Arguments](#parsing-arguments) | +[Calling Ruby Methods from C](#calling-ruby-methods-from-c) | +[String Operations](#string-operations) | +[Array Operations](#array-operations) | +[Hash Operations](#hash-operations) | +[Wrapping C Structures](#wrapping-c-structures) | +[Exception Handling](#exception-handling) | +[Method Visibility](#method-visibility) | +[Proc and Block Handling](#proc-and-block-handling) | +[Fiber API](#fiber-api) | +[Compilation Contexts](#compilation-contexts) | +[Precompiled Bytecode](#precompiled-bytecode) | +[GC Arena](#gc-arena) | +[Memory Allocation](#memory-allocation) + +## Headers + +```c +#include /* core types, state, class/method definition */ +#include /* mrb_load_string, mrb_load_file */ +#include /* string operations */ +#include /* array operations */ +#include /* hash operations */ +#include /* wrapping C structs */ +#include /* class inspection */ +#include /* value type macros */ +#include /* loading precompiled bytecode */ +#include /* error handling (mrb_protect etc.) */ +#include /* instance/class/global variables */ +``` + +## State Management + +```c +mrb_state *mrb = mrb_open(); /* create state with all gems */ +mrb_state *mrb = mrb_open_core(); /* create state without gems */ +mrb_close(mrb); /* close and free state */ +``` + +`mrb_open()` returns `NULL` on allocation failure. Always check the +return value. + +## Values + +All Ruby values are represented as `mrb_value` in C. + +### Creating Values + +```c +mrb_nil_value() /* nil */ +mrb_true_value() /* true */ +mrb_false_value() /* false */ +mrb_bool_value(mrb_bool b) /* true or false */ +mrb_fixnum_value(mrb_int i) /* Integer */ +mrb_float_value(mrb_state *mrb, mrb_float f) /* Float */ +mrb_symbol_value(mrb_sym sym) /* Symbol */ +mrb_obj_value(void *p) /* object pointer to value */ +mrb_cptr_value(mrb_state *mrb, void *p) /* C pointer */ +``` + +### Type Checking + +```c +mrb_type(v) /* returns enum mrb_vtype */ +mrb_nil_p(v) /* true if nil */ +mrb_integer_p(v) /* true if Integer */ +mrb_float_p(v) /* true if Float */ +mrb_symbol_p(v) /* true if Symbol */ +mrb_string_p(v) /* true if String */ +mrb_array_p(v) /* true if Array */ +mrb_hash_p(v) /* true if Hash */ +mrb_true_p(v) /* true if true */ +mrb_false_p(v) /* true if false */ +mrb_undef_p(v) /* true if undefined */ +mrb_immediate_p(v) /* true if not a heap object */ +``` + +### Extracting C Values + +```c +mrb_integer(v) /* mrb_int from Integer value */ +mrb_float(v) /* mrb_float from Float value */ +mrb_symbol(v) /* mrb_sym from Symbol value */ +mrb_ptr(v) /* void* from object value */ +mrb_str_to_cstr(mrb, v) /* const char* from String value */ +``` + +### Value Types + +| `mrb_vtype` | Ruby Class | Notes | +| ------------------ | ------------------- | ------------------------ | +| `MRB_TT_FALSE` | FalseClass/NilClass | `nil` has `MRB_TT_FALSE` | +| `MRB_TT_TRUE` | TrueClass | | +| `MRB_TT_INTEGER` | Integer | Immediate value | +| `MRB_TT_FLOAT` | Float | May be immediate | +| `MRB_TT_SYMBOL` | Symbol | Immediate value | +| `MRB_TT_STRING` | String | Heap object | +| `MRB_TT_ARRAY` | Array | Heap object | +| `MRB_TT_HASH` | Hash | Heap object | +| `MRB_TT_OBJECT` | Object | User-defined classes | +| `MRB_TT_CLASS` | Class | | +| `MRB_TT_MODULE` | Module | | +| `MRB_TT_PROC` | Proc | | +| `MRB_TT_CDATA` | (C data) | Wrapped C structs | +| `MRB_TT_EXCEPTION` | Exception | | +| `MRB_TT_FIBER` | Fiber | | + +## Defining Classes and Modules + +```c +/* Define a class under Object */ +struct RClass *my_class = mrb_define_class(mrb, "MyClass", mrb->object_class); + +/* Define a class under another class/module */ +struct RClass *inner = mrb_define_class_under(mrb, outer, "Inner", mrb->object_class); + +/* Define a module */ +struct RClass *my_mod = mrb_define_module(mrb, "MyModule"); +struct RClass *inner_mod = mrb_define_module_under(mrb, outer, "InnerMod"); + +/* Include/prepend a module */ +mrb_include_module(mrb, my_class, my_mod); +mrb_prepend_module(mrb, my_class, my_mod); + +/* Look up existing class/module */ +struct RClass *c = mrb_class_get(mrb, "String"); +struct RClass *m = mrb_module_get(mrb, "Kernel"); + +/* Define a constant */ +mrb_define_const(mrb, my_class, "VERSION", mrb_str_new_lit(mrb, "1.0")); +``` + +## Defining Methods + +All C methods have the same signature: + +```c +static mrb_value +my_method(mrb_state *mrb, mrb_value self) +{ + /* self is the receiver */ + return mrb_nil_value(); +} +``` + +Register with: + +```c +mrb_define_method(mrb, klass, "name", my_method, MRB_ARGS_NONE()); +mrb_define_class_method(mrb, klass, "name", my_method, MRB_ARGS_REQ(1)); +mrb_define_module_function(mrb, mod, "name", my_method, MRB_ARGS_ANY()); +``` + +### Argument Specifiers + +| Macro | Meaning | +| ---------------------- | ------------------------------------- | +| `MRB_ARGS_NONE()` | No arguments | +| `MRB_ARGS_REQ(n)` | `n` required arguments | +| `MRB_ARGS_OPT(n)` | `n` optional arguments | +| `MRB_ARGS_ARG(r,o)` | `r` required + `o` optional | +| `MRB_ARGS_REST()` | Splat (`*args`) | +| `MRB_ARGS_BLOCK()` | Block (`&block`) | +| `MRB_ARGS_ANY()` | Any number (same as REST) | +| `MRB_ARGS_KEY(n,rest)` | `n` keyword args, `rest`=1 for `**kw` | + +These can be combined with `|`: + +```c +MRB_ARGS_REQ(1) | MRB_ARGS_OPT(2) | MRB_ARGS_BLOCK() +``` + +## Parsing Arguments + +`mrb_get_args()` extracts arguments from the Ruby call stack: + +```c +mrb_int mrb_get_args(mrb_state *mrb, const char *format, ...); +``` + +### Format Specifiers + +| Spec | Ruby Type | C Type(s) | Notes | +| ---- | ------------- | --------------------------- | -------------------------------- | +| `o` | any | `mrb_value` | No type check | +| `i` | Numeric | `mrb_int` | Coerces to integer | +| `f` | Numeric | `mrb_float` | Coerces to float | +| `b` | any | `mrb_bool` | Truthiness | +| `n` | String/Symbol | `mrb_sym` | Converts to symbol | +| `s` | String | `const char*, mrb_int` | Pointer + length | +| `z` | String | `const char*` | Null-terminated | +| `S` | String | `mrb_value` | String value | +| `A` | Array | `mrb_value` | Array value | +| `H` | Hash | `mrb_value` | Hash value | +| `C` | Class | `mrb_value` | Class/Module value | +| `c` | Class | `struct RClass*` | Class pointer | +| `a` | Array | `const mrb_value*, mrb_int` | Array pointer + length | +| `d` | C Data | `void*` | Requires `mrb_data_type*` | +| `&` | Block | `mrb_value` | Block argument | +| `*` | rest | `const mrb_value*, mrb_int` | Rest arguments | +| `\|` | — | — | Following args are optional | +| `?` | — | `mrb_bool` | Was previous optional arg given? | +| `:` | keywords | `mrb_kwargs` | Keyword arguments | + +Adding `!` to `S`, `A`, `H`, `C`, `c`, `s`, `z`, `a`, `d` allows `nil` +(returns NULL/zero for nil). + +### Examples + +```c +/* def method(name, count) */ +const char *name; mrb_int len, count; +mrb_get_args(mrb, "si", &name, &len, &count); + +/* def method(required, optional=nil) */ +mrb_value req, opt = mrb_nil_value(); +mrb_get_args(mrb, "o|o", &req, &opt); + +/* def method(*args) */ +const mrb_value *args; mrb_int argc; +mrb_get_args(mrb, "*", &args, &argc); + +/* def method(&block) */ +mrb_value block; +mrb_get_args(mrb, "&", &block); + +/* def method(name:, age: 0) */ +mrb_sym kw_names[] = { mrb_intern_lit(mrb, "name"), mrb_intern_lit(mrb, "age") }; +mrb_value kw_values[2]; +mrb_kwargs kw = { 2, 1, kw_names, kw_values, NULL }; +mrb_get_args(mrb, ":", &kw); +/* kw_values[0] = name (required), kw_values[1] = age (optional, undef if not given) */ +``` + +## Calling Ruby Methods from C + +```c +/* Call obj.method(arg1, arg2) */ +mrb_funcall(mrb, obj, "method", 2, arg1, arg2); + +/* Call with symbol (faster, no string lookup) */ +mrb_funcall_id(mrb, obj, mrb_intern_lit(mrb, "method"), 2, arg1, arg2); + +/* Call with argv array */ +mrb_value argv[] = { arg1, arg2 }; +mrb_funcall_argv(mrb, obj, mrb_intern_lit(mrb, "method"), 2, argv); + +/* Call with block */ +mrb_funcall_with_block(mrb, obj, mid, argc, argv, block); + +/* Yield to block */ +mrb_yield(mrb, block, arg); +mrb_yield_argv(mrb, block, argc, argv); +``` + +## String Operations + +```c +/* Creation */ +mrb_str_new_lit(mrb, "hello") /* from string literal */ +mrb_str_new(mrb, ptr, len) /* from pointer + length */ +mrb_str_new_cstr(mrb, cstr) /* from null-terminated C string */ +mrb_str_new_static(mrb, ptr, len) /* from static data (no copy) */ + +/* Access */ +RSTRING_PTR(str) /* char* pointer */ +RSTRING_LEN(str) /* length */ +mrb_str_to_cstr(mrb, str) /* null-terminated (may copy) */ + +/* Modification */ +mrb_str_cat(mrb, str, ptr, len) /* append bytes */ +mrb_str_cat_cstr(mrb, str, cstr) /* append C string */ +mrb_str_cat_str(mrb, str, str2) /* append String */ + +/* Comparison */ +mrb_str_equal(mrb, str1, str2) /* equality */ +mrb_str_cmp(mrb, str1, str2) /* comparison (-1, 0, 1) */ +``` + +## Array Operations + +```c +/* Creation */ +mrb_ary_new(mrb) /* empty array */ +mrb_ary_new_capa(mrb, capa) /* preallocated */ +mrb_ary_new_from_values(mrb, n, vals) /* from C array */ + +/* Access */ +RARRAY_PTR(ary) /* mrb_value* pointer */ +RARRAY_LEN(ary) /* length */ +mrb_ary_entry(ary, idx) /* get element (no mrb needed) */ + +/* Modification */ +mrb_ary_push(mrb, ary, val) /* append */ +mrb_ary_pop(mrb, ary) /* remove last */ +mrb_ary_shift(mrb, ary) /* remove first */ +mrb_ary_unshift(mrb, ary, val) /* prepend */ +mrb_ary_set(mrb, ary, idx, val) /* set element */ +mrb_ary_splice(mrb, ary, pos, len, rpl) /* splice */ +mrb_ary_concat(mrb, ary, other) /* extend */ +``` + +## Hash Operations + +```c +/* Creation */ +mrb_hash_new(mrb) /* empty hash */ + +/* Access */ +mrb_hash_get(mrb, hash, key) /* get value */ +mrb_hash_fetch(mrb, hash, key, def) /* get with default */ +mrb_hash_key_p(mrb, hash, key) /* key exists? */ +mrb_hash_empty_p(mrb, hash) /* empty? */ +mrb_hash_size(mrb, hash) /* number of entries */ + +/* Modification */ +mrb_hash_set(mrb, hash, key, val) /* set key-value */ +mrb_hash_delete_key(mrb, hash, key) /* delete key */ +mrb_hash_merge(mrb, hash1, hash2) /* merge hash2 into hash1 */ + +/* Iteration */ +mrb_hash_keys(mrb, hash) /* Array of keys */ +mrb_hash_values(mrb, hash) /* Array of values */ +``` + +## Wrapping C Structures + +To expose a C struct to Ruby, use `mrb_data_type` and `Data_Wrap_Struct`: + +```c +/* 1. Define the data type with a name and destructor */ +static void point_free(mrb_state *mrb, void *p) { + mrb_free(mrb, p); +} + +static const mrb_data_type point_type = { + "Point", point_free +}; + +/* 2. Allocate and initialize */ +static mrb_value +point_init(mrb_state *mrb, mrb_value self) +{ + mrb_float x, y; + mrb_get_args(mrb, "ff", &x, &y); + + double *data = (double*)mrb_malloc(mrb, sizeof(double) * 2); + data[0] = x; + data[1] = y; + + DATA_PTR(self) = data; + DATA_TYPE(self) = &point_type; + + return self; +} + +/* 3. Access the wrapped data */ +static mrb_value +point_x(mrb_state *mrb, mrb_value self) +{ + double *data = (double*)mrb_data_get_ptr(mrb, self, &point_type); + return mrb_float_value(mrb, data[0]); +} + +/* 4. Register the class */ +struct RClass *point = mrb_define_class(mrb, "Point", mrb->object_class); +MRB_SET_INSTANCE_TT(point, MRB_TT_CDATA); +mrb_define_method(mrb, point, "initialize", point_init, MRB_ARGS_REQ(2)); +mrb_define_method(mrb, point, "x", point_x, MRB_ARGS_NONE()); +``` + +**Do not call into mruby from a `dfree` handler.** The handler runs +from inside GC sweep; allocating Ruby objects, calling +`mrb_funcall`, `mrb_yield`, raising exceptions, or otherwise +re-entering the VM can trigger a recursive GC that revisits the +same object and causes double-free. Keep `dfree` to `mrb_free` / +plain C cleanup of the wrapped data only. + +## Exception Handling + +### Raising Exceptions + +```c +mrb_raise(mrb, E_RUNTIME_ERROR, "something went wrong"); +mrb_raisef(mrb, E_ARGUMENT_ERROR, "expected %d, got %d", expected, actual); +mrb_raise(mrb, E_TYPE_ERROR, "wrong type"); +``` + +Common exception classes: `E_RUNTIME_ERROR`, `E_TYPE_ERROR`, +`E_ARGUMENT_ERROR`, `E_RANGE_ERROR`, `E_NAME_ERROR`, +`E_NOMETHOD_ERROR`, `E_NOTIMP_ERROR`, `E_KEY_ERROR`. + +### Catching Exceptions + +```c +/* Check after mrb_load_string or mrb_funcall */ +mrb_value result = mrb_load_string(mrb, code); +if (mrb->exc) { + mrb_print_error(mrb); + mrb->exc = NULL; /* clear exception */ +} +``` + +### Protected Execution + +`mrb_protect()` executes a function under protection. If an +exception is raised, it is captured as a return value instead of +propagating: + +```c +static mrb_value +safe_operation(mrb_state *mrb, mrb_value data) +{ + /* This function might raise an exception */ + return mrb_funcall(mrb, data, "do_something", 0); +} + +mrb_bool error; +mrb_value result = mrb_protect(mrb, safe_operation, data, &error); +if (error) { + /* result contains the exception object; mrb->exc is cleared */ + mrb_value inspect = mrb_inspect(mrb, result); + fprintf(stderr, "Error: %s\n", mrb_str_to_cstr(mrb, inspect)); +} +``` + +**Note:** `mrb_protect` clears `mrb->exc` after catching the +exception. The exception is returned as `result`. Do not use +`mrb_print_error()` after `mrb_protect` — it reads `mrb->exc` +which is already `NULL`. + +For lower-level protection with a `void*` callback: + +```c +static mrb_value +body(mrb_state *mrb, void *userdata) +{ + /* ... */ +} + +mrb_bool error; +mrb_value result = mrb_protect_error(mrb, body, userdata, &error); +``` + +### Rescue + +`mrb_rescue()` catches `StandardError` (like Ruby's `rescue`): + +```c +static mrb_value +body_func(mrb_state *mrb, mrb_value body_data) +{ + return mrb_funcall(mrb, body_data, "risky_method", 0); +} + +static mrb_value +rescue_func(mrb_state *mrb, mrb_value rescue_data) +{ + /* handle error, rescue_data is the data passed in */ + return mrb_nil_value(); +} + +mrb_value result = mrb_rescue(mrb, body_func, body_data, + rescue_func, rescue_data); +``` + +To rescue specific exception classes: + +```c +struct RClass *classes[] = { + E_ARGUMENT_ERROR, + mrb_class_get(mrb, "IOError") +}; +mrb_value result = mrb_rescue_exceptions(mrb, body_func, body_data, + rescue_func, rescue_data, + 2, classes); +``` + +### Ensure + +`mrb_ensure()` guarantees cleanup runs regardless of exceptions +(like Ruby's `ensure`): + +```c +static mrb_value +body_func(mrb_state *mrb, mrb_value data) +{ + return mrb_funcall(mrb, data, "process", 0); +} + +static mrb_value +cleanup_func(mrb_state *mrb, mrb_value data) +{ + mrb_funcall(mrb, data, "close", 0); + return mrb_nil_value(); +} + +mrb_value result = mrb_ensure(mrb, body_func, body_data, + cleanup_func, cleanup_data); +``` + +The ensure function always executes. If the body raises an +exception, the ensure runs and then the exception is re-raised. + +### Error State Management + +```c +mrb_bool mrb_check_error(mrb_state *mrb); /* check and clear mrb->exc */ +void mrb_clear_error(mrb_state *mrb); /* clear mrb->exc */ +``` + +## Method Visibility + +```c +/* Public (default) */ +mrb_define_method(mrb, klass, "name", func, MRB_ARGS_NONE()); + +/* Private - only callable without explicit receiver */ +mrb_define_private_method(mrb, klass, "name", func, MRB_ARGS_NONE()); + +/* Class method (singleton method on the class object) */ +mrb_define_class_method(mrb, klass, "name", func, MRB_ARGS_NONE()); + +/* Module function (both module method and private instance method) */ +mrb_define_module_function(mrb, mod, "name", func, MRB_ARGS_NONE()); + +/* Singleton method on a specific object */ +mrb_define_singleton_method(mrb, obj, "name", func, MRB_ARGS_NONE()); + +/* Method alias: alias new_name old_name */ +mrb_define_alias(mrb, klass, "new_name", "old_name"); + +/* Remove a method */ +mrb_undef_method(mrb, klass, "name"); +mrb_undef_class_method(mrb, klass, "name"); +``` + +All `_method` variants have `_id` counterparts that accept +`mrb_sym` instead of `const char*` for better performance. + +## Proc and Block Handling + +### Creating Procs from C Functions + +```c +/* Simple C function proc */ +struct RProc *proc = mrb_proc_new_cfunc(mrb, my_func); + +/* C closure with captured local variables */ +struct RProc *proc = mrb_closure_new_cfunc(mrb, my_func, nlocals); +``` + +### C Functions with Environment (requires mruby-proc-ext) + +Store values in a proc's environment, accessible from the C +function: + +```c +mrb_value env_values[] = { mrb_fixnum_value(42) }; +struct RProc *proc = mrb_proc_new_cfunc_with_env(mrb, my_func, 1, env_values); + +/* Inside my_func, retrieve environment values */ +static mrb_value my_func(mrb_state *mrb, mrb_value self) +{ + mrb_value val = mrb_proc_cfunc_env_get(mrb, 0); /* index 0 */ + return val; +} +``` + +## Fiber API + +```c +#include /* fiber types and functions */ +``` + +### Creating and Using Fibers + +```c +/* Create a fiber from a proc */ +mrb_value fiber = mrb_fiber_new(mrb, proc); + +/* Resume the fiber with arguments */ +mrb_value args[] = { mrb_fixnum_value(1) }; +mrb_value result = mrb_fiber_resume(mrb, fiber, 1, args); + +/* Check if fiber is still alive */ +mrb_bool alive = mrb_test(mrb_fiber_alive_p(mrb, fiber)); +``` + +### Yielding from C + +`mrb_fiber_yield()` can only be used as the return value of a C +function — no code may execute after it: + +```c +static mrb_value +my_yield_method(mrb_state *mrb, mrb_value self) +{ + mrb_value yield_args[] = { mrb_str_new_lit(mrb, "yielded") }; + return mrb_fiber_yield(mrb, 1, yield_args); /* must be returned directly */ +} +``` + +### Fiber States + +| State | Meaning | +| ----------------------- | -------------------------------- | +| `MRB_FIBER_CREATED` | Created but not yet resumed | +| `MRB_FIBER_RUNNING` | Currently executing | +| `MRB_FIBER_RESUMED` | Resumed another fiber | +| `MRB_FIBER_SUSPENDED` | Yielded, waiting to resume | +| `MRB_FIBER_TRANSFERRED` | Transferred via `Fiber#transfer` | +| `MRB_FIBER_TERMINATED` | Finished execution | + +**Limitation:** fibers cannot yield across C function boundaries. +You cannot call `mrb_fiber_yield` from within a C-implemented +method, except via `mrb_fiber_yield` at function return. + +## Compilation Contexts + +For advanced compilation control, use `mrb_ccontext`: + +```c +#include + +mrb_ccontext *cxt = mrb_ccontext_new(mrb); + +/* Set source filename for error messages and debug info */ +mrb_ccontext_filename(mrb, cxt, "my_script.rb"); + +/* Compile and execute with context */ +mrb_value result = mrb_load_string_cxt(mrb, "1 + 2", cxt); + +/* Clean up */ +mrb_ccontext_free(mrb, cxt); +``` + +### Context Options + +The `mrb_ccontext` structure provides several flags: + +| Field | Purpose | +| ---------------- | --------------------------------------- | +| `capture_errors` | Collect parse errors instead of raising | +| `no_exec` | Compile without executing (get RProc) | +| `no_optimize` | Disable peephole optimizations | +| `no_ext_ops` | Disable extended operand instructions | +| `keep_lv` | Preserve local variables across loads | + +### Loading with Context + +```c +mrb_load_string_cxt(mrb, code, cxt); /* string + context */ +mrb_load_nstring_cxt(mrb, code, len, cxt); /* with explicit length */ +mrb_load_file_cxt(mrb, fp, cxt); /* file + context */ +mrb_load_detect_file_cxt(mrb, fp, cxt); /* auto-detect .mrb or .rb */ +``` + +## Precompiled Bytecode + +Load `.mrb` files compiled by `mrbc`: + +```c +#include + +/* From byte array (generated by mrbc -B) */ +mrb_value result = mrb_load_irep(mrb, bytecode); + +/* From buffer with explicit size (safer, bounds-checked) */ +mrb_value result = mrb_load_irep_buf(mrb, buf, size); + +/* From file */ +FILE *fp = fopen("script.mrb", "rb"); +mrb_value result = mrb_load_irep_file(mrb, fp); +fclose(fp); + +/* Load without executing (returns irep for inspection) */ +mrb_irep *irep = mrb_read_irep(mrb, bytecode); +``` + +All `_irep` loading functions have `_cxt` variants that accept +a compilation context. + +### Deployment Pattern + +Ahead-of-time compilation eliminates the need for the compiler gem +at runtime: + +```shell +# Compile to C array +mrbc -Bscript_bytecode script.rb + +# This generates a C header with: +# const uint8_t script_bytecode[]; +``` + +```c +#include "script.mrb.h" + +mrb_state *mrb = mrb_open_core(); /* no compiler needed */ +mrb_load_irep(mrb, script_bytecode); +``` + +**Important:** wrap bytecode loading in arena save/restore when +loading multiple scripts: + +```c +int ai = mrb_gc_arena_save(mrb); +mrb_load_irep(mrb, script1); +mrb_gc_arena_restore(mrb, ai); + +ai = mrb_gc_arena_save(mrb); +mrb_load_irep(mrb, script2); +mrb_gc_arena_restore(mrb, ai); +``` + +## Symbols + +```c +/* Create symbol from string */ +mrb_sym sym = mrb_intern_lit(mrb, "name"); /* from literal */ +mrb_sym sym = mrb_intern_cstr(mrb, cstr); /* from C string */ +mrb_sym sym = mrb_intern(mrb, ptr, len); /* from pointer + length */ + +/* Symbol to string */ +const char *name = mrb_sym_name(mrb, sym); +mrb_int len; +const char *name = mrb_sym_name_len(mrb, sym, &len); +``` + +## Instance Variables + +```c +/* Get/set instance variables on an object */ +mrb_iv_get(mrb, obj, mrb_intern_lit(mrb, "@x")); +mrb_iv_set(mrb, obj, mrb_intern_lit(mrb, "@x"), val); +mrb_iv_defined(mrb, obj, mrb_intern_lit(mrb, "@x")); +mrb_iv_remove(mrb, obj, mrb_intern_lit(mrb, "@x")); +``` + +## Global Variables + +```c +mrb_gv_get(mrb, mrb_intern_lit(mrb, "$verbose")); +mrb_gv_set(mrb, mrb_intern_lit(mrb, "$verbose"), mrb_true_value()); +``` + +## Class Variables + +```c +mrb_cv_get(mrb, klass, mrb_intern_lit(mrb, "@@count")); +mrb_cv_set(mrb, klass, mrb_intern_lit(mrb, "@@count"), mrb_fixnum_value(0)); +``` + +## Loading and Executing Code + +```c +/* Load and execute a string (requires mruby-compiler gem) */ +mrb_value result = mrb_load_string(mrb, "1 + 2"); + +/* Load and execute a file */ +FILE *f = fopen("script.rb", "r"); +mrb_value result = mrb_load_file(mrb, f); +fclose(f); + +/* Load precompiled bytecode (no compiler needed) */ +mrb_value result = mrb_load_irep(mrb, bytecode_array); +``` + +## GC Arena + +When creating many temporary Ruby objects in C, use the GC arena to +prevent them from being collected prematurely: + +```c +int ai = mrb_gc_arena_save(mrb); +/* create temporary objects here */ +mrb_gc_arena_restore(mrb, ai); +``` + +See [gc-arena-howto.md](gc-arena-howto.md) for details. + +## Memory Allocation + +```c +void *p = mrb_malloc(mrb, size); /* raises on failure */ +void *p = mrb_calloc(mrb, nmemb, size); /* zero-initialized */ +void *p = mrb_realloc(mrb, ptr, size); /* resize */ +mrb_free(mrb, p); /* free */ + +/* NULL-returning variants (for custom error handling) */ +void *p = mrb_malloc_simple(mrb, size); +void *p = mrb_realloc_simple(mrb, ptr, size); +``` + +## Type Conversion + +```c +mrb_obj_as_string(mrb, val) /* to_s */ +mrb_inspect(mrb, val) /* inspect */ +mrb_any_to_s(mrb, val) /* default to_s */ +mrb_str_to_integer(mrb, str, base, badcheck) /* String to Integer */ +mrb_str_to_dbl(mrb, str, badcheck) /* String to Float */ +mrb_ensure_float_type(mrb, val) /* ensure Float */ +``` + +## Object Comparison + +```c +mrb_equal(mrb, a, b) /* Ruby == */ +mrb_eql(mrb, a, b) /* Ruby eql? */ +mrb_obj_eq(mrb, a, b) /* Ruby equal? (identity) */ +mrb_cmp(mrb, a, b) /* Ruby <=> (returns mrb_int) */ +``` + +## Object Inspection + +```c +mrb_obj_classname(mrb, obj) /* class name as C string */ +mrb_obj_class(mrb, obj) /* class as RClass* */ +mrb_obj_is_kind_of(mrb, obj, klass) /* is_a? / kind_of? */ +mrb_obj_respond_to(mrb, klass, mid) /* respond_to? */ +mrb_obj_id(obj) /* object_id */ +mrb_obj_freeze(mrb, obj) /* freeze */ +mrb_obj_dup(mrb, obj) /* dup */ +``` + +## Compile-Time Flags + +When compiling C code that uses mruby, you must use the same flags as +the library was built with. Use `mruby-config` to get them: + +```console +$ build/host/bin/mruby-config --cflags # compiler flags +$ build/host/bin/mruby-config --ldflags # linker flags +$ build/host/bin/mruby-config --libs # libraries +``` + +Key macros that affect ABI: + +| Macro | Effect | +| ----------------- | ---------------------------------------- | +| `MRB_NO_BOXING` | Struct-based values (larger, debuggable) | +| `MRB_WORD_BOXING` | Single-word values (fast, 32-bit safe) | +| `MRB_NAN_BOXING` | NaN-tagged values (default on 32-bit) | +| `MRB_NO_FLOAT` | Disable Float support | +| `MRB_INT64` | 64-bit integers | +| `MRB_USE_FLOAT32` | 32-bit floats | + +Mismatching these between library and application causes silent +data corruption. diff --git a/doc/guides/compile.md b/doc/guides/compile.md index 63f3849f4e..ae3abbf1ff 100644 --- a/doc/guides/compile.md +++ b/doc/guides/compile.md @@ -1,3 +1,5 @@ + + # Compile mruby uses Rake to compile and cross-compile all libraries and @@ -7,17 +9,17 @@ binaries. To compile mruby out of the source code you need the following tools: -* C Compiler (e.g. `gcc` or `clang`) -* Linker (e.g. `gcc` or `clang`) -* Archive utility (e.g. `ar`) -* Ruby 2.5 or later (e.g. `ruby` or `jruby`) +- C Compiler (e.g. `gcc` or `clang`) +- Linker (e.g. `gcc` or `clang`) +- Archive utility (e.g. `ar`) +- Ruby 2.5 or later (e.g. `ruby` or `jruby`) Optional: -* Git (to update mruby source and integrate mrbgems easier) -* C++ compiler (to use mrbgems which include `*.cpp`, `*.cxx`, `*.cc`) -* Bison (to compile `mrbgems/mruby-compiler/core/parse.y`) -* gperf (to compile `mrbgems/mruby-compiler/core/keywords`) +- Git (to update mruby source and integrate mrbgems easier) +- C++ compiler (to use mrbgems which include `*.cpp`, `*.cxx`, `*.cc`) +- Bison (to compile `mrbgems/mruby-compiler/core/parse.y`) +- gperf (to compile `mrbgems/mruby-compiler/core/keywords`) Note that `bison` bundled with macOS is too old to compile `mruby`. Try `brew install bison` and follow the instruction shown to update @@ -80,7 +82,7 @@ conf.toolchain :clang #### Visual Studio 2010, 2012 and 2013 Toolchain configuration for Visual Studio on Windows. If you use the -[Visual Studio Command Prompt](https://msdn.microsoft.com/en-us/library/ms229859\(v=vs.110\).aspx), +[Visual Studio Command Prompt](), you normally do not have to specify this manually, since it gets automatically detected by our build process. ```ruby @@ -103,8 +105,8 @@ in `ANDROID_STANDALONE_TOOLCHAIN`. It is possible to select which tools should be compiled during the compilation process. For example, -* `mruby` -* `mirb` +- `mruby` +- `mirb` The configuration are done via `mrbgems`. See `Mrbgems` section. @@ -117,6 +119,36 @@ set the character via `conf.file_separator`. conf.file_separator = '/' ``` +### Name of library directory + +In some environments, the `libmruby.a` file requires a different directory name than `lib`. +You can be changed to any name by the `conf.libdir_name` accessor. + +```ruby +conf.libdir_name = 'lib64' +``` + +Alternatively, it can be changed via the environment variable `MRUBY_SYSTEM_LIBDIR_NAME` when +the `rake` command is run. + +```console +$ export MRUBY_SYSTEM_LIBDIR_NAME=lib64 +$ rake clean all +``` + +NOTES: + +- This environment variable `MRUBY_SYSTEM_LIBDIR_NAME` does not affect `MRuby::CrossBuild`. + In other words, if you want to change it for `MRuby::CrossBuild`, you must set it with `MRuby::CrossBuild#libdir_name=`. +- If you want to switch this environment variable `MRUBY_SYSTEM_LIBDIR_NAME`, you must do `rake clean`. + + A bad usage example is shown below. + + ```console + $ rake clean all + $ rake MRUBY_SYSTEM_LIBDIR_NAME=lib64 install + ``` + ### C Compiler Configuration of the C compiler binary, flags and include paths. @@ -144,7 +176,7 @@ If you need an include path of header file use `search_header_path`: fail 'iconv.h not found' unless conf.cc.search_header_path 'iconv.h' ``` -If you need a full file name of header file use `search_header`: +If you need a full filename of header file use `search_header`: ```ruby # Searches `iconv.h`. @@ -177,7 +209,7 @@ conf.linker do |linker| linker.flags_before_libraries = ... linker.libraries = ... linker.flags_after_libraries = ... - linker.library_paths = .... + linker.library_paths = ... linker.option_library = ... linker.option_library_path = ... linker.link_options = ... @@ -229,12 +261,9 @@ end ### Preallocated Symbols -By far, preallocated symbols are highly compatible with the previous versions, so -we expect you won't see any problem with them. But just in case you face any -issue, you can disable preallocated symbols by specifying `conf.disable_presym`. - -In the build process, `mrbc` under cross compiling environment will be compiled -with this configuration. +Preallocated symbols are always enabled. Symbol IDs used in C source code +(via `MRB_SYM()` etc.) are resolved to compile-time constants during the +build process. ### Mrbgems @@ -265,7 +294,7 @@ There is a `RubyGem` (gem for CRuby) named `mgem` that help you to manage `mrbgems`. Try `gem install mgem`. `mgem` can show you the list of registered `mrbgems`. -See doc/mrbgems/README.md for more option about mrbgems. +See [doc/guides/mrbgems.md](mrbgems.md) for more option about mrbgems. ### Mrbtest @@ -332,11 +361,11 @@ conf.enable_debug When debugging mode is enabled -* Macro `MRB_DEBUG` would be defined. - * Which means `mrb_assert()` macro is enabled. -* Debug information of irep would be generated by `mrbc`. - * Because `-g` flag would be added to `mrbc` runner. - * You can have better backtrace of mruby scripts with this. +- Macro `MRB_DEBUG` would be defined. + - Which means `mrb_assert()` macro is enabled. +- Debug information of irep would be generated by `mrbc`. + - Because `-g` flag would be added to `mrbc` runner. + - You can have better backtrace of mruby scripts with this. ## Cross-Compilation @@ -376,7 +405,7 @@ end ## Build process -During the build process the directory `build` will be created in the +During the build process the `build` directory will be created in the root directory. The structure of this directory will look like this: ``` @@ -401,19 +430,19 @@ root directory. The structure of this directory will look like this: The compilation workflow will look like this: -* compile minimal `mrbc` from `src` and `mrblib` sources - * compile all files under `src` (object files will be stored in `build/host/mrbc/src`) - * compile `mruby-compiler` gem - * create `build/host/mrbc/lib/libmruby_core.a` out of all object files (C only) - * create `build/host/mrbc/bin/mrbc` via `mruby-bin-mrbc` gem -* compile all files under `src` and store result in `build/host/src` -* create `build/host/mrblib/mrblib.c` by compiling all `*.rb` files under `mrblib` with `build/host/mrbc/bin/mrbc` -* compile `build/host/mrblib/mrblib.c` to `build/host/mrblib/mrblib.o` -* create `build/host/lib/libmruby.a` out of all object files (C and Ruby) -* compile (normal) mrbgems specified in the configuration file -* create `build/host/lib/libmruby.a` from object files from gems and `libmruby_core.a` -* create binary commands according to binary gems (e.g. `mirb` and `mruby`) -* copy binaries under `build/host/bin` to `bin` directory +- compile minimal `mrbc` from `src` and `mrblib` sources + - compile all files under `src` (object files will be stored in `build/host/mrbc/src`) + - compile `mruby-compiler` gem + - create `build/host/mrbc/lib/libmruby_core.a` out of all object files (C only) + - create `build/host/mrbc/bin/mrbc` via `mruby-bin-mrbc` gem +- compile all files under `src` and store result in `build/host/src` +- create `build/host/mrblib/mrblib.c` by compiling all `*.rb` files under `mrblib` with `build/host/mrbc/bin/mrbc` +- compile `build/host/mrblib/mrblib.c` to `build/host/mrblib/mrblib.o` +- create `build/host/lib/libmruby.a` out of all object files (C and Ruby) +- compile (normal) mrbgems specified in the configuration file +- create `build/host/lib/libmruby.a` from object files from gems and `libmruby_core.a` +- create binary commands according to binary gems (e.g. `mirb` and `mruby`) +- copy binaries under `build/host/bin` to `bin` directory ``` _____ _____ ______ ____ ____ _____ _____ ____ @@ -460,18 +489,18 @@ compile for `i386` a directory called `i386` is created under the build directory. The cross compilation workflow starts in the same way as the normal -compilation by compiling all *native* libraries and binaries, except +compilation by compiling all _native_ libraries and binaries, except for we don't have `host/mrbc` directory (`host` directory itself works as placeholder for `mrbc`). Afterwards the cross compilation process proceeds like this: -* cross-compile all files under `src` and store result in `build/i386/src` -* create `build/i386/lib/libmruby_core.a` out of C object files -* create `build/i386/mrblib/mrblib.c` by compiling all `*.rb` files under `mrblib` with native `build/host/bin/mrbc` -* cross-compile `build/i386/mrblib/mrblib.c` to `build/i386/mrblib/mrblib.o` -* create `build/i386/lib/libmruby.a` from object files from gems and `libmruby_core.a` -* create binary commands according to binary gems (e.g. `mirb` and `mruby`) -* copy binaries under `build/host/bin` to `bin` directory +- cross-compile all files under `src` and store result in `build/i386/src` +- create `build/i386/lib/libmruby_core.a` out of C object files +- create `build/i386/mrblib/mrblib.c` by compiling all `*.rb` files under `mrblib` with native `build/host/bin/mrbc` +- cross-compile `build/i386/mrblib/mrblib.c` to `build/i386/mrblib/mrblib.o` +- create `build/i386/lib/libmruby.a` from object files from gems and `libmruby_core.a` +- create binary commands according to binary gems (e.g. `mirb` and `mruby`) +- copy binaries under `build/host/bin` to `bin` directory ``` _______________________________________________________________ @@ -527,7 +556,7 @@ After the build, you will get `libmruby.a`. You can link it to your application. For compiler options and library path, you can use `mruby-config` command for convenience. `mruby-config` command prints the configuration used for `libmruby.a`. -``` +```console $ mruby-config --help Usage: mruby-config [switches] switches: @@ -545,7 +574,7 @@ For example, when you have a C source file (`c.c`) and try to compile and link it with `libmruby.a`, you can run the following command, ``` -gcc `mruby-config --cflags` c.c `mruby-config --ldflags` `mruby-config --libs` +`mruby-config --cc --cflags` c.c `mruby-config --ldflags --libs` ``` When you use `make`, add following lines in `Makefile` @@ -557,6 +586,83 @@ LDFLAGS = `$(MRB_CONFIG) --ldflags` LIBS = `$(MRB_CONFIG) --libs` ``` +## Install + +To install the files in the `bin`, `include` and `lib` directories generated by the "host" build target into a system directory, do the following: + +```console +$ rake install +``` + +If there are multiple build targets in the build configuration file, to install the products of all build targets, do the following: + +```console +$ rake install:full +``` + +To install only one of several build targets, e.g., the "its-mine" build target, do the following: + +```console +$ rake install:full:its-mine +``` + +To install only the executable files, do the following: + +```console +$ rake install_bin # only "host" build target +$ rake install:bin # all build targets +$ rake install:bin:its-mine # only "its-mine" build target +``` + +### Installation Directory + +The installation directory is `/usr/local` for the "host" build target and `/usr/local/mruby/` for the others. +To change them, you can set the environment variable `PREFIX` or use `MRuby::Build#install_prefix = dir` in your build configuration file. + +The `PREFIX` environment variable affects all build targets and changes the `/usr/local` part. + +The `MRuby::Build#install_prefix` can be set for each individual build target. +In this case, the environment variable `PREFIX` is ignored. + +Also, if the environment variable `DESTDIR` is set, it will prepend to the path obtained by `install_prefix` to determine the final write directory. +This is intended for temporary file expansion by the user's package work. + +--- + +To summarize: + +- The default value of the environment variable `PREFIX` is `/usr/local`. +- For the "host" build target, the default value of `MRuby::Build#install_prefix` is ``. +- For a build target other than "host", the default value of `MRuby::Build#install_prefix` is `/mruby/`. +- If the environment variable `DESTDIR` is set, the actual write directory is `/`. + +### Excluded files + +In some cases there are files that you do not want to install. +In such cases, add a file path filter to the array object `MRuby::Build#install_excludes` to exclude them. + +The following is an object that can be defined as a file path filter. +The `path` variable that appears is a relative path based on `MRuby::Build#build_dir`. + +- string objects: files matched by `string.match?(path)` are excluded. +- regexp object: files matched by `regexp.match?(path)` are excluded. +- proc object: files which return true with `proc.call(path)` are excluded. + +```ruby +# exclude bin/mrbc +conf.install_excludes << exefile("bin/mrbc") + +# exclude all files under lib/ directory +conf.install_excludes << %r(^lib/) + +# exclude bin/mrbtest, but in this case it is recommended to use string instead of proc +conf.install_excludes << proc { |path| + path == exefile("bin/mrbtest") +} +``` + +By default, it contains only a proc object to exclude `libmruby_core`. + ## Tips -* If you see compilation troubles, try `rake clean` first. +- If you see compilation troubles, try `rake clean` first. diff --git a/doc/guides/debugger.md b/doc/guides/debugger.md index 18673c47b0..b2c948b1dc 100644 --- a/doc/guides/debugger.md +++ b/doc/guides/debugger.md @@ -1,3 +1,5 @@ + + # How to Use the mruby Debugger copyright (c) 2014 Specified Non-Profit Corporation mruby Forum @@ -38,7 +40,7 @@ To confirm mrdb was installed properly, run mrdb with the `--version` option: ```bash $ mrdb --version -mruby 3.1.0 (2022-05-12) +mruby 4.0.0 (2026-04-20) ``` ## 2.2 Basic Operation @@ -50,7 +52,7 @@ To invoke the mruby debugger, just type `mrdb`. To specify the script file: ```bash -$ mrdb [option] file name +$ mrdb [option] filename ``` For example: Debugging sample.rb @@ -61,20 +63,20 @@ $ mrdb sample.rb You can execute the shell commands listed below: -|command|description| -|:-:|:--| -|run|execute programs| -|step|execute stepping| -|continue|execute continuing program| -|break|configure the breaking point| -|delete|deleting the breaking points| -|disable|disabling the breaking points| -|enable|enabling the breaking points| -|info breakpoints|showing list of the breaking points| -|print|evaluating and printing the values of the mruby expressions in the script| -|list|displaying the source cords| -|help|showing help| -|quit|terminating the mruby debugger| +| command | description | +| :--------------: | :------------------------------------------------------------------------ | +| run | execute programs | +| step | execute stepping | +| continue | execute continuing program | +| break | configure the breaking point | +| delete | deleting the breaking points | +| disable | disabling the breaking points | +| enable | enabling the breaking points | +| info breakpoints | showing list of the breaking points | +| print | evaluating and printing the values of the mruby expressions in the script | +| list | displaying the source cords | +| help | showing help | +| quit | terminating the mruby debugger | ### 2.2.2 Debugging mruby Binary Files (mrb file) with mrdb @@ -82,8 +84,8 @@ You can debug the mruby binary files. #### 2.2.2.1 Debugging the binary files -* notice -To debug mruby binary files, you need to compile mruby files with option `-g`. +- notice + To debug mruby binary files, you need to compile mruby files with option `-g`. ```bash $ mrbc -g sample.rb @@ -119,7 +121,7 @@ Be aware that the breakpoint command will not check the validity of the class na You can get the current breakpoint information by the following options. -breakpoint breakpoint number : file name. line number +breakpoint breakpoint number : filename. line number breakpoint breakpoint number : [class name,] method name @@ -265,7 +267,7 @@ Example: ``` (sample.rb:1) info breakpoints Num Type Enb What -1 breakpoint y at sample.rb:3 -> file name,line number +1 breakpoint y at sample.rb:3 -> filename,line number 2 breakpoint n in Sample_class:sample_class_method -> [class:]method name 3 breakpoint y in sample_global_method ``` @@ -299,11 +301,11 @@ When you do not specify both the `first` and `last` options, you will receive th Example: ``` -Specifying file name and first row number +Specifying filename and first row number sample.rb:1) list sample2.rb:5 ``` -Specifying the file name and the first and last row number: +Specifying the filename and the first and last row number: ``` (sample.rb:1) list sample2.rb:6,7 diff --git a/doc/guides/gc-arena-howto.md b/doc/guides/gc-arena-howto.md index 1fff624df6..505e5a2d44 100644 --- a/doc/guides/gc-arena-howto.md +++ b/doc/guides/gc-arena-howto.md @@ -1,25 +1,28 @@ + + # How to use `mrb_gc_arena_save()`/`mrb_gc_arena_restore()`/`mrb_gc_protect()` _This is an English translation of [Matz's blog post][matz blog post] written in Japanese._ _Some parts are updated to reflect recent changes._ -[matz blog post]: + +[matz blog post]: https://www.rubyist.net/~matz/20130731.html When you are extending mruby using C language, you may encounter mysterious "arena overflow error" or memory leak or very slow -execution speed. This is an error indicating overflow of "GC arena" +execution speed. This is an error indicating overflow of "GC arena" implementing "conservative GC". GC (garbage collector) must ensure that object is "alive", in other -words, that it is referenced by somewhere from the program. This can be +words, that it is referenced by somewhere from the program. This can be determined by checking if the object can be directly or indirectly -referenced by root. The local variables, global variables and +referenced by root. The local variables, global variables and constants etc. are root. If program execution is performed inside mruby VM, there is nothing to worry about because GC can access all roots owned by the VM. -The problem arises when executing C functions. The object referenced +The problem arises when executing C functions. The object referenced by C variable is also "alive", but mruby GC cannot aware of this, so it might mistakenly recognize the objects referenced by only C variables as dead. @@ -27,15 +30,15 @@ variables as dead. This can be a fatal bug if the GC tries to collect a live object. In CRuby, we scan C stack area, and use C variable as root to check -whether object is alive or not. Of course, because we are accessing C +whether object is alive or not. Of course, because we are accessing C stack just as memory region, we never know it is an integer or a -pointer. We work around this by assuming that if it looks like a -pointer, then assume it as a pointer. We call it "conservative". +pointer. We work around this by assuming that if it looks like a +pointer, then assume it as a pointer. We call it "conservative". By the way, CRuby's "conservative GC" has some problems. The biggest problem is we have no way to access to the stack area in -portable way. Therefore, we cannot use this method if we'd like to +portable way. Therefore, we cannot use this method if we'd like to implement highly portable runtime, like mruby. So we came up with another plan to implement "conservative GC" in mruby. @@ -62,14 +65,14 @@ memory leak. As of this writing, mruby automatically extend arena to remember objects (See `MRB_GC_FIXED_ARENA` and `MRB_GC_ARENA_SIZE` in -doc/guides/mrbconf.md). +[doc/guides/mrbconf.md](mrbconf.md)). If you create many objects in C functions, memory usage will increase, since -GC never kicks in. This memory usage may look like memory leaks, but will also +GC never kicks in. This memory usage may look like memory leaks, but will also make execution slower as more memory will need to be allocated. With the build time configuration, you can limit the maximum size of -arena (e.g., 100). Then if you create many objects, arena overflows, +arena (e.g., 100). Then if you create many objects, arena overflows, thus you will get an "arena overflow error". To work around these problems, we have `mrb_gc_arena_save()` and @@ -93,77 +96,54 @@ In mruby, C function calls are surrounded by this save/restore, but we can further optimize memory usage by surrounding save/restore, and can avoid creating arena overflow bugs. -Let's take a real example. Here is the source code of `Array#inspect`: +Let's take a real example. Here is the source code of `Array#inspect` +(from `src/array.c`): ```c static mrb_value -inspect_ary(mrb_state *mrb, mrb_value ary, mrb_value list) +mrb_ary_to_s(mrb_state *mrb, mrb_value self) { - mrb_int i; - mrb_value s, arystr; - char head[] = { '[' }; - char sep[] = { ',', ' ' }; - char tail[] = { ']' }; - - /* check recursive */ - for(i=0; ic->ci->mid = MRB_SYM(inspect); + mrb_value ret = mrb_str_new_lit(mrb, "["); + int ai = mrb_gc_arena_save(mrb); + if (MRB_RECURSIVE_UNARY_P(mrb, MRB_SYM(inspect), self)) { + mrb_str_cat_lit(mrb, ret, "...]"); + return ret; } - - mrb_ary_push(mrb, list, ary); - - arystr = mrb_str_new_capa(mrb, 64); - mrb_str_cat(mrb, arystr, head, sizeof(head)); - - for(i=0; i 0) { - mrb_str_cat(mrb, arystr, sep, sizeof(sep)); - } - if (mrb_array_p(RARRAY_PTR(ary)[i])) { - s = inspect_ary(mrb, RARRAY_PTR(ary)[i], list); - } - else { - s = mrb_inspect(mrb, RARRAY_PTR(ary)[i]); - } - mrb_str_cat(mrb, arystr, RSTRING_PTR(s), RSTRING_LEN(s)); + for (mrb_int i=0; i0) mrb_str_cat_lit(mrb, ret, ", "); + mrb_str_cat_str(mrb, ret, mrb_inspect(mrb, RARRAY_PTR(self)[i])); mrb_gc_arena_restore(mrb, ai); } + mrb_str_cat_lit(mrb, ret, "]"); - mrb_str_cat(mrb, arystr, tail, sizeof(tail)); - mrb_ary_pop(mrb, list); - - return arystr; + return ret; } ``` -This is a real example, so slightly complicated, but bear with me. The essence of `Array#inspect` is that after stringifying each element of array using `inspect` method, we join them together so that we can get `inspect` representation of the entire array. After the `inspect` representation is created, we no longer require the -individual string representation. This means that we don't have to register +individual string representation. This means that we don't have to register these temporal objects into GC arena. -Therefore, in order to keep the arena size small; the `ary_inspect()` function +Therefore, in order to keep the arena size small; the function will do the following: -* save the position of the stack top using `mrb_gc_arena_save()`. -* get `inspect` representation of each element. -* append it to the constructing entire `inspect` representation of array. -* restore stack top position using `mrb_gc_arena_restore()`. +- save the position of the stack top using `mrb_gc_arena_save()`. +- get `inspect` representation of each element. +- append it to the constructing entire `inspect` representation of array. +- restore stack top position using `mrb_gc_arena_restore()`. Please note that the final `inspect` representation of entire array -was created before the call of `mrb_gc_arena_restore()`. Otherwise, +was created before the call of `mrb_gc_arena_restore()`. Otherwise, required temporal object may be deleted by GC. We may have an usecase where after creating many temporal objects, we'd -like to keep some of them. In this case, we cannot use the same idea -in `ary_inspect()` like appending objects to existing one. +like to keep some of them. In this case, we cannot use the same idea +in `mrb_ary_to_s()` like appending objects to existing one. Instead, after `mrb_gc_arena_restore()`, we must re-register the objects we want to keep in the arena using `mrb_gc_protect(mrb, obj)`. Use `mrb_gc_protect()` with caution because it could also lead to an "arena diff --git a/doc/guides/getting-started.md b/doc/guides/getting-started.md new file mode 100644 index 0000000000..432471bf30 --- /dev/null +++ b/doc/guides/getting-started.md @@ -0,0 +1,283 @@ + + +# Getting Started with mruby + +This guide walks you through building mruby, running your first Ruby program, +and embedding mruby in a C application. + +## Prerequisites + +You need: + +- C compiler (`gcc` or `clang`) +- Ruby 2.5 or later (for the build system) +- `rake` (bundled with Ruby) +- `git` (optional, for cloning the source) + +## Building mruby + +Clone the repository and build: + +```console +$ git clone https://github.com/mruby/mruby.git +$ cd mruby +$ rake +``` + +This compiles the default configuration and produces: + +- `bin/mruby` — Ruby script interpreter +- `bin/mirb` — interactive Ruby shell +- `bin/mrbc` — bytecode compiler +- `build/host/lib/libmruby.a` — library for embedding + +## Running Ruby Code + +### Interactive shell + +```console +$ bin/mirb +mirb - Pair interactive mruby +> puts "Hello, mruby!" +Hello, mruby! + => nil +> 1 + 2 + => 3 +``` + +### Running a script file + +Create `hello.rb`: + +```ruby +puts "Hello from mruby!" +``` + +Run it: + +```console +$ bin/mruby hello.rb +Hello from mruby! +``` + +### One-liner + +```console +$ bin/mruby -e 'puts "Hello!"' +Hello! +``` + +## Compiling to Bytecode + +mruby can compile Ruby scripts to bytecode (`.mrb` files) for faster +loading and deployment without source code: + +```console +$ bin/mrbc hello.rb # produces hello.mrb +$ bin/mruby -b hello.mrb # run bytecode +Hello from mruby! +``` + +You can also generate C source from Ruby scripts: + +```console +$ bin/mrbc -Bhello_code hello.rb # produces hello.c with byte array +``` + +This generates a C file with a `const uint8_t hello_code[]` array that +can be loaded with `mrb_load_irep()` in your C application. + +## Embedding mruby in C + +The primary use case of mruby is embedding in C/C++ applications. + +### Minimal example + +Create `embed.c`: + +```c +#include +#include + +int main(void) +{ + mrb_state *mrb = mrb_open(); + if (!mrb) return 1; + + mrb_load_string(mrb, "puts 'Hello from embedded mruby!'"); + if (mrb->exc) { + mrb_print_error(mrb); + } + + mrb_close(mrb); + return 0; +} +``` + +### Compile and link + +Use `mruby-config` to get the correct compiler and linker flags: + +```console +$ gcc -I include `build/host/bin/mruby-config --cflags` embed.c \ + `build/host/bin/mruby-config --ldflags --libs` -o embed +$ ./embed +Hello from embedded mruby! +``` + +**Important**: Always use `mruby-config --cflags` when compiling code +that uses mruby. The build configuration may define macros (such as +`MRB_NO_BOXING` or `MRB_USE_BIGINT`) that change the internal data +layout. Compiling without these flags causes silent data corruption. + +### Calling Ruby from C + +```c +#include +#include +#include +#include + +int main(void) +{ + mrb_state *mrb = mrb_open(); + + /* Define a Ruby method */ + mrb_load_string(mrb, "def greet(name) \"Hello, #{name}!\" end"); + + /* Call it from C */ + mrb_value result = mrb_funcall(mrb, mrb_top_self(mrb), + "greet", 1, mrb_str_new_lit(mrb, "World")); + printf("%s\n", mrb_str_to_cstr(mrb, result)); + + mrb_close(mrb); + return 0; +} +``` + +### Defining C functions callable from Ruby + +```c +#include +#include + +static mrb_value +my_add(mrb_state *mrb, mrb_value self) +{ + mrb_int a, b; + mrb_get_args(mrb, "ii", &a, &b); + return mrb_fixnum_value(a + b); +} + +int main(void) +{ + mrb_state *mrb = mrb_open(); + + /* Define method on Kernel (available everywhere) */ + mrb_define_method(mrb, mrb->kernel_module, "my_add", + my_add, MRB_ARGS_REQ(2)); + + mrb_load_string(mrb, "puts my_add(3, 4)"); /* prints 7 */ + + mrb_close(mrb); + return 0; +} +``` + +## Loading Precompiled Bytecode + +For deployment without the compiler gem, precompile your Ruby code: + +```console +$ bin/mrbc -Bruby_code app.rb +``` + +Then load in C: + +```c +#include +#include +#include "app.c" /* contains ruby_code[] */ + +int main(void) +{ + mrb_state *mrb = mrb_open(); + mrb_load_irep(mrb, ruby_code); + if (mrb->exc) { + mrb_print_error(mrb); + } + mrb_close(mrb); + return 0; +} +``` + +This approach does not require the `mruby-compiler` gem, resulting in +a smaller binary. + +## Customizing the Build + +mruby's functionality is controlled by the build configuration file. +The default is `build_config/default.rb`. + +### Using a custom configuration + +```console +$ MRUBY_CONFIG=build_config/minimal.rb rake +``` + +### Selecting gems + +Gems add features to mruby. A minimal configuration: + +```ruby +MRuby::Build.new do |conf| + conf.toolchain :gcc + + # Core language extensions + conf.gem core: 'mruby-array-ext' + conf.gem core: 'mruby-string-ext' + conf.gem core: 'mruby-hash-ext' + + # Tools + conf.gem core: 'mruby-bin-mruby' # mruby command + conf.gem core: 'mruby-bin-mirb' # interactive shell + conf.gem core: 'mruby-bin-mrbc' # bytecode compiler + + # Compiler (needed for mrb_load_string) + conf.gem core: 'mruby-compiler' +end +``` + +### Using a gembox + +Gemboxes are predefined collections of gems: + +```ruby +MRuby::Build.new do |conf| + conf.toolchain :gcc + conf.gembox 'default' # standard set of gems +end +``` + +## Amalgamation (Single-File Build) + +For the simplest integration, use amalgamation to combine all mruby +source into a single `mruby.c` and `mruby.h`: + +```console +$ rake amalgam +$ gcc -I build/host/amalgam your_app.c build/host/amalgam/mruby.c -o your_app -lm +``` + +See [amalgamation.md](amalgamation.md) for details. + +## What's Next + +- [Language Features](language.md) — Ruby subset supported by mruby +- [C API Reference](capi.md) — values, classes, methods, error handling +- [Compile](compile.md) — full build system reference +- [mrbgems](mrbgems.md) — creating and using gems +- [Linking](link.md) — linking `libmruby` to applications +- [Build-time Configurations](mrbconf.md) — compile-time options +- [GC Arena](gc-arena-howto.md) — managing GC arena in C extensions +- [Limitations](../limitations.md) — differences from CRuby diff --git a/doc/guides/hier.md b/doc/guides/hier.md new file mode 100644 index 0000000000..02b9af14d9 --- /dev/null +++ b/doc/guides/hier.md @@ -0,0 +1,62 @@ + + +# The mruby directory structure + +```text ++- 💎 mruby/ The top directory of mruby. + | + +- 📁 .github/ GitHub configuration files for mruby project management. + | + +- 📁 benchmark/ Benchmarking files for mruby. + | + +- 📁 bin/ Links to temporary executables after build. Auto-created. + | + +- 📁 build/ Default build output destination for mruby. Auto-created. + | | + | +- 📁 repos/ The git clone destination directory for GEMs that depend on the build configuration. + | | + | +- 📁 host/ The "host" build output directory. + | + +- 📁 build_config/ Build configuration files for various environments. + | + +- 📁 doc/ Documentation for mruby. + | | + | +- 📁 guides/ Documentation for general users. + | | + | +- 📁 internal/ Documentation for internal implementations for developers. + | + +- 📁 examples/ Examples of mruby usages. + | | + | +- 📁 mrbgems/ Examples for creating custom GEM for mruby. + | + +- 📁 include/ C header files required when using mruby. + | + +- 📁 lib/ Ruby scripts used for building mruby. + | + +- 📁 mrbgems/ A library collection of features not provided by mruby core only. + | | See doc/guides/mrbgems.md file + | | + | +- 📁 mruby-*/ The directory of each GEMs. + | | + | +- 📃 *.gembox A collection of GEMs grouped by features and purposes. + | + +- 📁 mrblib/ The core Ruby scripts that makes up the main body of mruby. + | + +- 📁 oss-fuzz/ Source code for The fuzzing-test. + | See https://github.com/google/oss-fuzz + | + +- 📁 src/ The core C source code that makes up the main body of mruby. + | + +- 📁 tasks/ Rake tasks at build-time. + | | + | +- 📁 toolchains/ Definitions for the compiler, linker, archiver, etc. for each toolchain. + | + +- 📁 test/ Ruby scripts needed for testing mruby. + | | + | +- 📁 t/ mruby test cases. + | + +- 📁 tools/ External programs used for the mruby project. + | + +- 📁 lrama/ LALR parser generator as an alternative to bison. + Import from https://github.com/ruby/lrama +``` diff --git a/doc/guides/language.md b/doc/guides/language.md new file mode 100644 index 0000000000..31c9b074d8 --- /dev/null +++ b/doc/guides/language.md @@ -0,0 +1,430 @@ + + +# mruby Language Features + +This guide describes the Ruby language features supported by mruby 4.0. +mruby implements a subset of the Ruby language, optimized for embedded use. +For a list of specific behavioral differences, see +[limitations.md](../limitations.md). + +**If you are coming from CRuby**, note these major differences upfront: + +- No `require` or `load` — all code is linked at build time +- No `defined?` keyword — use `respond_to?`, `const_defined?`, etc. +- No refinements (`refine`, `using`) +- No `Encoding` class — UTF-8 opt-in via `MRB_UTF8_STRING` +- Fibers cannot yield across C function boundaries +- Integer size varies by platform and boxing mode +- Operators cannot be overridden by user code + +See [Key Differences from CRuby](#key-differences-from-cruby) for +the full list. + +## Syntax + +### Keywords + +mruby supports the following keywords: + +`BEGIN`, `END`, `alias`, `and`, `begin`, `break`, `case`, `class`, +`def`, `do`, `else`, `elsif`, `end`, `ensure`, `false`, `for`, `if`, +`in`, `module`, `next`, `nil`, `not`, `or`, `redo`, `rescue`, +`retry`, `return`, `self`, `super`, `then`, `true`, `undef`, +`unless`, `until`, `when`, `while`, `yield` + +Magic variables: `__FILE__`, `__LINE__`, `__ENCODING__`, `__method__` + +**Not supported:** `defined?` (use `respond_to?`, `const_defined?`, +etc. instead), `refinements` (`using`, `refine`). + +### Classes and Modules + +```ruby +class Animal + attr_accessor :name + + def initialize(name) + @name = name + end + + def speak + "..." + end +end + +class Dog < Animal + def speak + "Woof!" + end +end + +module Greetable + def greet + "Hello, I'm #{name}" + end +end + +class Dog + include Greetable +end +``` + +All standard class and module features are supported: inheritance, +`include`, `prepend`, `extend`, `attr_reader`/`attr_writer`/ +`attr_accessor`, `public`/`private`/`protected` visibility, class +variables (`@@var`), class methods, and `super`. + +### Methods + +```ruby +# Required, optional, rest, post-rest, keyword, and block arguments +def example(a, b = 1, *rest, last, key:, opt_key: nil, **kwargs, &block) +end + +# Endless method definition +def double(x) = x * 2 +``` + +### Blocks and Procs + +```ruby +[1, 2, 3].each { |n| puts n } + +[1, 2, 3].each do |n| + puts n +end + +square = Proc.new { |x| x * x } +square = proc { |x| x * x } +double = lambda { |x| x * 2 } +double = ->(x) { x * 2 } +``` + +### Control Flow + +```ruby +# if/unless (both statement and modifier forms) +if condition + # ... +elsif other + # ... +else + # ... +end + +result = value if condition +result = value unless condition + +# case/when +case obj +when String then "string" +when Integer then "integer" +else "other" +end + +# Loops +while condition + # ... +end + +until condition + # ... +end + +for item in collection + # ... +end + +# Loop control +break # exit loop +next # skip to next iteration +redo # restart current iteration +retry # restart begin/rescue block +``` + +### Exception Handling + +```ruby +begin + risky_operation +rescue ArgumentError => e + handle_arg_error(e) +rescue StandardError => e + handle_error(e) +ensure + cleanup +end + +raise "something went wrong" +raise ArgumentError, "bad argument" +``` + +**Note:** `raise` without arguments in a `rescue` clause does not +re-raise the current exception. Capture and re-raise explicitly: + +```ruby +begin + risky_operation +rescue => e + log(e) + raise e # explicit re-raise required +end +``` + +### Strings + +```ruby +"double-quoted with #{interpolation}" +'single-quoted literal' +heredoc = <<~HEREDOC + indented heredoc + with #{interpolation} +HEREDOC +``` + +### Regular Expressions + +Regular expressions require an external gem such as `mruby-regexp-pcre` +or `mruby-onig-regexp`. Without a regexp gem, `Regexp` literals +(`/pattern/`) are not available. + +### Pattern Matching (Limited) + +Only rightward assignment with simple variable binding is supported: + +```ruby +expr => var # assigns expr to var +``` + +`case/in` syntax, array/hash patterns, guard clauses, pin operator, +find patterns, and alternative patterns are **not** supported. + +## Numeric Types + +mruby's numeric type sizes depend on the boxing mode and platform. + +### Integer + +| Configuration | Range | +| -------------------------------------- | ---------------- | +| 64-bit word boxing (default on 64-bit) | roughly +/- 2^62 | +| 32-bit word boxing (default on 32-bit) | roughly +/- 2^30 | +| NaN boxing (64-bit only) | -2^31 to 2^31-1 | + +Integer overflow raises a `RangeError` unless the `mruby-bigint` gem +is included, in which case integers automatically promote to +arbitrary precision. + +### Float + +By default, `Float` uses 64-bit `double`. Compile-time options: + +- `MRB_USE_FLOAT32`: use 32-bit `float` instead +- `MRB_NO_FLOAT`: disable floating-point entirely + +With word boxing on 64-bit, many float values are stored inline +(without heap allocation) using a rotation encoding. + +### Additional Numeric Types (via gems) + +- **Rational** (`mruby-rational`): exact rational arithmetic +- **Complex** (`mruby-complex`): complex number support +- **Bigint** (`mruby-bigint`): arbitrary-precision integers + +## Core Classes + +These classes are always available in mruby (no gem required): + +| Class | Notes | +| ------------- | -------------------------------------- | +| Object | Base class for all objects | +| Module | Module definition and mixin | +| Class | Class definition and instantiation | +| NilClass | Singleton `nil` | +| TrueClass | Singleton `true` | +| FalseClass | Singleton `false` | +| Integer | Fixed-precision integer | +| Float | Floating-point (unless `MRB_NO_FLOAT`) | +| Symbol | Interned identifier | +| String | Mutable byte string | +| Array | Ordered collection | +| Hash | Key-value mapping | +| Range | Interval representation | +| Proc | Closure / callable object | +| Exception | Exception hierarchy root | +| StandardError | Common error base | + +### Core Modules + +| Module | Notes | +| ---------- | ----------------------------------------- | +| Kernel | Core methods (`puts`, `p`, `raise`, etc.) | +| Comparable | Comparison operators via `<=>` | +| Enumerable | Collection iteration methods | + +## Standard Library (via gemboxes) + +mruby's standard library is organized into gemboxes. The `default` +gembox includes all of the below. Use this table to find which +gembox provides the class or feature you need: + +### Classes and Modules + +| Class/Module | Gembox | Gem | +| --------------------- | ---------- | ----------------- | +| Fiber | stdlib | mruby-fiber | +| Enumerator | stdlib | mruby-enumerator | +| Enumerator::Lazy | stdlib | mruby-enum-lazy | +| Set | stdlib | mruby-set | +| ObjectSpace | stdlib | mruby-objectspace | +| Time | stdlib-ext | mruby-time | +| Struct | stdlib-ext | mruby-struct | +| Data | stdlib-ext | mruby-data | +| Random | stdlib-ext | mruby-random | +| IO, File | stdlib-io | mruby-io | +| Socket | stdlib-io | mruby-socket | +| Dir | stdlib-io | mruby-dir | +| Errno | stdlib-io | mruby-errno | +| Math | math | mruby-math | +| Rational | math | mruby-rational | +| Complex | math | mruby-complex | +| Bigint | math | mruby-bigint | +| Method, UnboundMethod | metaprog | mruby-method | + +### Methods and Features + +| Feature | Gembox | Gem | +| ----------------------------- | ---------- | ------------------ | +| `catch`/`throw` | stdlib | mruby-catch | +| `Kernel#sprintf`, `String#%` | stdlib-ext | mruby-sprintf | +| `Array#pack`, `String#unpack` | stdlib-ext | mruby-pack | +| `Kernel#rand` | stdlib-ext | mruby-random | +| `Kernel#eval` | metaprog | mruby-eval | +| `Kernel#binding` | metaprog | mruby-binding | +| `Proc#binding` | metaprog | mruby-proc-binding | +| Runtime compiler | metaprog | mruby-compiler | + +### Core Class Extensions + +The `stdlib` gembox also extends built-in classes with additional +methods. These are included by default: + +| Extension | Examples | +| ----------------------- | ---------------------------------------------- | +| Array extensions | `#dig`, `#union`, `#difference` | +| Hash extensions | `#dig`, `#transform_keys`, `#transform_values` | +| String extensions | `#encode`, `#bytes`, `#chars` | +| Numeric extensions | `Integer#digits`, `Integer#pow` | +| Comparable extensions | `#clamp` | +| Enumerable extensions | `#sort_by`, `#min_by`, `#max_by`, `#tally` | +| Range extensions | `#size`, `#cover?` | +| Proc extensions | `#<<`, `#>>` (composition) | +| Symbol extensions | `#to_proc` | +| Object extensions | `#then`, `#yield_self` | +| Kernel extensions | `#__method__` | +| Class/Module extensions | `Module#name` | + +### Gembox Summary + +| Gembox | Contents | Notes | +| ------------ | --------------------------------------------- | -------------------------------------------- | +| `stdlib` | Core class extensions, Fiber, Enumerator, Set | Works with `MRB_NO_STDIO` and `MRB_NO_FLOAT` | +| `stdlib-ext` | Time, Struct, Data, Random, sprintf, pack | Works with `MRB_NO_STDIO` and `MRB_NO_FLOAT` | +| `stdlib-io` | IO, File, Dir, Socket, Errno | Requires stdio | +| `math` | Math, Rational, Complex, Bigint | Works with `MRB_NO_STDIO` | +| `metaprog` | eval, binding, Method, compiler | Works with `MRB_NO_STDIO` and `MRB_NO_FLOAT` | +| `default` | All of the above + CLI tools | Full installation | + +## Key Differences from CRuby + +### No Runtime Loading + +mruby has no `require` or `load`. All code (gems, libraries) is +linked at build time. To add functionality, include the appropriate +gem in your build configuration: + +```ruby +MRuby::Build.new do |conf| + conf.gem :core => "mruby-time" +end +``` + +### No `defined?` Keyword + +The `defined?` keyword raises `NameError` instead of returning a +type string or `nil`. Use alternatives: + +```ruby +# Instead of: defined?(Foo) +Object.const_defined?(:Foo) + +# Instead of: defined?(@var) +instance_variable_defined?(:@var) + +# Instead of: defined?(method_name) +respond_to?(:method_name) +``` + +### Fiber Limitations + +Fibers cannot cross C function boundaries. You cannot yield from a +fiber inside a C-implemented method. Only `mrb_fiber_yield` at +function return is supported. + +### Array and String Subclasses + +`Array` and `String` do not support instance variables to reduce memory. +This means subclassing `Array` or `String` and adding `@fields` will raise an error. + +### Operator Overriding + +Operators of primitive classes cannot be overridden by user code. +Redefining `String#+` has no effect on the behavior of the `+` operator. + +### Module Loading Hooks + +`include`/`prepend`/`extend` do not call `append_features`/ +`prepend_features`/`extend_object` hooks. The module is included +directly. + +### Small Hash Optimization + +For small hashes, `#hash` is not called on keys. Custom `#hash` +methods may not execute for small hash tables. + +### No Refinements + +Module refinements (`refine`, `using`) are not supported. + +### No Encoding Class + +There is no `Encoding` class. String encoding is either pure bytes +or UTF-8 (opt-in via `MRB_UTF8_STRING` compile flag). + +### `nil?` in Conditionals + +Redefining `nil?` has no effect on conditional expressions. +The VM uses direct nil checks for performance. + +### Integer Precision + +Integer size varies by boxing mode (see [Numeric Types](#numeric-types) +above). Code relying on 64-bit integer precision may behave +differently on 32-bit or NaN boxing configurations. + +## Build-Time Configuration + +Key compile-time macros that affect language behavior: + +| Macro | Effect | +| -------------------- | ---------------------------------- | +| `MRB_NO_FLOAT` | Remove all float support | +| `MRB_USE_FLOAT32` | Use 32-bit float instead of double | +| `MRB_UTF8_STRING` | Enable UTF-8 string handling | +| `MRB_INT32` | Force 32-bit integer | +| `MRB_INT64` | Force 64-bit integer | +| `MRB_STR_LENGTH_MAX` | Max string length (default 1MB) | +| `MRB_ARY_LENGTH_MAX` | Max array length (default 2^17) | + +See [mrbconf.md](mrbconf.md) for the complete list of configuration +macros. diff --git a/doc/guides/link.md b/doc/guides/link.md index 82d8ad7679..978aa62594 100644 --- a/doc/guides/link.md +++ b/doc/guides/link.md @@ -1,14 +1,16 @@ + + # Linking `libmruby` to your application You have two ways to link `libmruby` to your application. -* using executable gem. -* using normal compilation process +- using executable gem. +- using normal compilation process ## Executable Gems If your application is relatively small, `mrbgem` is an easier way to -create the executable. By tradition, the gem name start with +create the executable. By tradition, the gem name start with `mruby-bin-`, e.g. `mruby-bin-debugger`. ### `mrbgem.rake` file @@ -28,7 +30,7 @@ end ### Source tree structure The source file for the gem executable should be in -`/tools/`. Currently, we support C or C++ source code +`/tools/`. Currently, we support C or C++ source code (`.c`, `.cpp`, `.cxx`, `.cc`) for the executable. Ruby source files are not supported. Put the functionality in the different gem and specify dependency to it in `mrbgem.rake`. @@ -45,16 +47,16 @@ your build script (e.g. `Makefile`). You need to specify compiler options that are compatible to mruby configuration, for example: -* `-I` to specify the place for mruby header files -* `-D` to specify mruby configuration macros +- `-I` to specify the place for mruby header files +- `-D` to specify mruby configuration macros To retrieve compiler options used to build `mruby`, you can use `mruby-config` command with following options: -* `--cc` compiler name -* `--cflags` options passed to compiler +- `--cc` compiler name +- `--cflags` options passed to compiler -``` +```console $ mruby-config --cflags -std=gnu99 -g -O3 -Wall -DMRB_GC_FIXED_ARENA -I/home/matz/work/mruby/include -I/home/matz/work/mruby/build/host/include ``` @@ -66,12 +68,12 @@ compatible to mruby configuration. To retrieve linker options, you can use `mruby-config` with following options: -* `--ld` linker name -* `--ldflags` options passed to linker -* `--ldflags-before-libs` options passed to linker before linked libraries -* `--libs` linked libraries +- `--ld` linker name +- `--ldflags` options passed to linker +- `--ldflags-before-libs` options passed to linker before linked libraries +- `--libs` linked libraries -``` +```console $ mruby-config --ldflags -L/home/matz/work/mruby/build/host/lib diff --git a/doc/guides/memory.md b/doc/guides/memory.md new file mode 100644 index 0000000000..5f36528509 --- /dev/null +++ b/doc/guides/memory.md @@ -0,0 +1,211 @@ + + +# Memory Allocation + +In mruby, you can customize how memory is allocated in two ways: + +1. **Provide your own `malloc()`/`realloc()`/`free()`** +2. **Override `mrb_basic_alloc_func()`** + +--- + +## 1. Provide your own `malloc()`/`realloc()`/`free()` + +On platforms without a full C standard library —such as many microcontrollers— you may need to supply your own implementations of `malloc()`, `realloc()`, and `free()`. mruby’s allocator calls directly into these functions, so replacing them lets you control **every** allocation and deallocation performed by your entire program, including any third‑party libraries you link against. + +Keep in mind: + +- Calling `realloc(NULL, size)` must behave like `malloc(size)`. +- Calling `free(NULL)` must be a no‑op. + +Simply define these three functions in your code (or link against a library that provides them), and mruby — along with all other code in your process — will use your versions automatically. + +## 2. Override `mrb_basic_alloc_func()` + +Inside mruby, all of its own memory allocations go through a single function called mrb_basic_alloc_func() (formerly mrb_default_allocf()). By defining this function in your application before linking, you can intercept and handle **only** the memory operations initiated by mruby itself without affecting other libraries or parts of your program. + +```c +// Example signature: +// void* mrb_basic_alloc_func(void* ptr, size_t size); +``` + +Implement mrb_basic_alloc_func() in your code, and mruby will invoke it for every internal allocation, reallocation, and free request. + +### Expected behavior + +- `mrb_basic_alloc_func(NULL, size)` should allocate `size` bytes, just like `malloc(size)`. +- `mrb_basic_alloc_func(ptr, size)` should resize the existing block at `ptr` to `size` bytes, just like `realloc(ptr, size)`. +- `mrb_basic_alloc_func(ptr, 0)` should free the block at `ptr`, just like `free(ptr)`. + +--- + +## Summary of effects: + +- **Custom `malloc`/`realloc`/`free`**: replaces allocation behavior globally (mruby + all other code and third‑party libraries). + +- **Custom `mrb_basic_alloc_func()`**: replaces allocation behavior only for mruby’s internal use, leaving other libraries’ allocations untouched. + +## Migration note + +If you are moving from the old API: + +1. **Removal of `mrb_open_allocf()`** + - \_Old: + + ```c + mrb_state *mrb = mrb_open_allocf(my_allocf, ud); + ``` + + - \_New: + + ```c + // No allocf parameter; set up your hook via mrb_basic_alloc_func definition. + mrb_state *mrb = mrb_open_core(); + ``` + +2. **`mrb_open_core()` takes no arguments** + - Simply drop any allocf or user-data arguments, and redefine `mrb_basic_alloc_func` as you need. + +3. **No more `mrb_allocf` type** + - Definitions using the `mrb_allocf` typedef can be removed; implement `mrb_basic_alloc_func()` with the signature below: + + ```c + void* mrb_basic_alloc_func(void *ptr, size_t size); + ``` + +4. **`mrb_basic_alloc_func` signature change** + - _Old:_ + + ```c + void* mrb_default_allocf(mrb_state *mrb, void *ptr, size_t size, void *ud); + ``` + + - _New:_ + + ```c + void* mrb_basic_alloc_func(void *ptr, size_t size); + ``` + +--- + +### Code examples + +- **Old style**: + + ```c + static void* + my_allocf(mrb_state *mrb, void *ud, void *ptr, size_t size) + { + // ...custom logic... + } + + mrb_state *mrb = mrb_open_allocf(my_allocf, some_ud); + ``` + +- **New style**: + + ```c + // Define your hook before creating the state: + void* + mrb_basic_alloc_func(void *ptr, size_t size) + { + // ...custom logic... + } + + mrb_state *mrb = mrb_open_core(); + ``` + +--- + +## 3. Heap Regions: Contiguous Memory for GC + +By default, mruby allocates GC heap pages individually via `malloc()`. +On embedded targets with multiple memory banks (e.g., STM32 CCM+SRAM, +ESP32 PSRAM+IRAM), you may want to place heap pages in a specific +memory region. `mrb_gc_add_region()` lets you provide a contiguous +buffer that mruby carves into heap pages. + +### API + +```c +#include + +int mrb_gc_add_region(mrb_state *mrb, void *start, size_t size); +``` + +- **`start`**: pointer to a contiguous memory buffer. +- **`size`**: size of the buffer in bytes. +- **Returns**: number of heap pages carved from the buffer, or 0 if + the buffer is too small. + +The buffer is aligned internally to pointer size. Each page is +approximately 40 KB on 64-bit systems (24 KB on 32-bit). The caller +retains ownership of the buffer and must keep it valid for the +lifetime of the `mrb_state`. + +### Example: Static buffer + +```c +#include +#include + +/* 256 KB static buffer -- about 6 pages on 64-bit */ +static char heap_buf[256 * 1024]; + +int main(void) +{ + mrb_state *mrb = mrb_open(); + int pages = mrb_gc_add_region(mrb, heap_buf, sizeof(heap_buf)); + /* pages are immediately available for object allocation */ + + /* ... use mrb ... */ + + mrb_close(mrb); /* region pages are cleaned up; buffer is not freed */ + return 0; +} +``` + +### Example: MCU with multiple RAM banks + +```c +/* STM32 with 64 KB CCM and 128 KB SRAM */ +extern char __ccm_start[], __ccm_end[]; /* linker symbols */ +extern char __sram_start[], __sram_end[]; + +mrb_state *mrb = mrb_open(); +mrb_gc_add_region(mrb, __ccm_start, __ccm_end - __ccm_start); +mrb_gc_add_region(mrb, __sram_start, __sram_end - __sram_start); +``` + +### How it works + +When `mrb_gc_add_region()` is called, mruby: + +1. Aligns the buffer start to pointer size. +2. Divides the buffer into `mrb_heap_page`-sized chunks. +3. Initializes each page's freelist and links it into the GC heap. +4. Records the region in a descriptor for O(1) pointer-to-page mapping. + +Region pages participate in the normal GC cycle (mark-and-sweep) like +any other heap page. The only differences are: + +- **Never freed**: the GC will not call `free()` on region pages, even + if all objects on a page are dead. The page stays in the heap with an + empty freelist, ready for reuse. +- **Fallback**: when all region pages are full, mruby falls back to + `malloc()` for new pages as usual. +- **Cleanup**: `mrb_close()` frees the internal region descriptor but + does not free the buffer itself. + +### Sizing + +The page size is controlled by `MRB_HEAP_PAGE_SIZE` (default: 1024 slots). +Each page occupies: + +| Platform | Slot size | Page size (approx) | +| -------- | --------- | ------------------ | +| 64-bit | 40 bytes | ~41 KB | +| 32-bit | 24 bytes | ~25 KB | + +To estimate pages for a given buffer: `pages = buffer_size / sizeof(mrb_heap_page)`. +Each page provides `MRB_HEAP_PAGE_SIZE` object slots. diff --git a/doc/guides/mrbconf.md b/doc/guides/mrbconf.md index 989f8cc735..cc438653e3 100644 --- a/doc/guides/mrbconf.md +++ b/doc/guides/mrbconf.md @@ -1,234 +1,292 @@ + + # mruby configuration macros +## The configuration file + +You can do the build configuration in the build configuration file. The default +configuration file is `build_config/default.rb`. + +You can specify your own configuration file by the `MRUBY_CONFIG` environment +variable (you can use `CONFIG` for shorthand for `MRUBY_CONFIG`). If the path +doesn't exist, `build_config/${MRUBY_CONFIG}.rb` is used. + ## How to use these macros -You can use mrbconfs with following ways: +Just add the configuration value to the `MRuby::Build#defines` attribute. +This is the same for `MRuby::CrossBuild`. + +```ruby +# build_config.rb + +MRuby::Build.new do |conf| + ... + conf.defines << 'MRB_GC_FIXED_ARENA' + conf.defines << 'MRB_NO_METHOD_CACHE' + ... +end +``` -* Write them in `mrbconf.h`. - * Using compiler flags is preferred when building a cross binaries or multiple mruby binaries - since it's easier to use different mrbconf per each `MRuby::Build`. - * Most flags can be enabled by just commenting in. -* Pass them as compiler flags. - * Make sure you pass the same flags to all compilers since some mrbconf(e.g., `MRB_GC_FIXED_ARENA`) - changes `struct` layout and cause memory access error when C and other language(e.g., C++) is mixed. +**_NOTE_** + +- Use common definitions (`conf.defines`) instead of per-compiler definitions (e.g., `conf.cc.defines`) unless there is a special reason not to. +- It is now deprecated to edit the `include/mruby/mrbconf.h` file or give it directly as a compiler flag, as was the case before. ## stdio setting `MRB_NO_STDIO` -* When defined `` functions won't be used. -* Some features will be disabled when this is enabled: - * `mrb_irep` load/dump from/to file. - * Compiling mruby script from a file. - * Printing features in **src/print.c**. +- When defined `` functions won't be used. +- Some features will be disabled when this is enabled: + - `mrb_irep` load/dump from/to file. + - Compiling mruby script from a file. + - Printing features in **src/print.c**. ## Debug macros `MRB_USE_DEBUG_HOOK` -* When defined code fetch hook and debug OP hook will be enabled. -* When using any of the hook set function pointer `code_fetch_hook` and/or `debug_op_hook` of `mrb_state`. -* Fetch hook will be called before any OP. -* Debug OP hook will be called when dispatching `OP_DEBUG`. +- When defined code fetch hook and debug OP hook will be enabled. +- When using any of the hook set function pointer `code_fetch_hook` and/or `debug_op_hook` of `mrb_state`. +- Fetch hook will be called before any OP. +- Debug OP hook will be called when dispatching `OP_DEBUG`. `MRB_DEBUG` -* When defined `mrb_assert*` macro will be defined with macros from ``. -* Could be enabled via `enable_debug` method of `MRuby::Build`. +- When defined `mrb_assert*` macro will be defined with macros from ``. +- Could be enabled via `enable_debug` method of `MRuby::Build`. ## Stack configuration `MRB_STACK_EXTEND_DOUBLING` -* If defined doubles the stack size when extending it. -* Otherwise extends stack with `MRB_STACK_GROWTH`. +- If defined doubles the stack size when extending it. +- Otherwise extends stack with 1.5x growth (minimum `MRB_STACK_GROWTH`). `MRB_STACK_GROWTH` -* Default value is `128`. -* Used in stack extending. -* Ignored when `MRB_STACK_EXTEND_DOUBLING` is defined. +- Default value is `128`. +- Minimum stack growth size when extending. +- Ignored when `MRB_STACK_EXTEND_DOUBLING` is defined. `MRB_STACK_MAX` -* Default value is `0x40000 - MRB_STACK_GROWTH`. -* Raises `RuntimeError` when stack size exceeds this value. +- Default value is `0x40000 - MRB_STACK_GROWTH`. +- Raises `RuntimeError` when stack size exceeds this value. ## Primitive type configuration `MRB_USE_FLOAT32` -* When defined single precision floating-point type(C type `float`) is used as `mrb_float`. -* Otherwise, double precision floating-point type(C type `double`) is used as `mrb_float`. +- When defined single-precision floating-point type(C type `float`) is used as `mrb_float`. +- Otherwise, double-precision floating-point type(C type `double`) is used as `mrb_float`. `MRB_NO_FLOAT` -* When defined removes floating-point numbers from mruby. -* It makes mruby easier to handle in "Micro-controller without FPU" and "Kernel Space". +- When defined removes floating-point numbers from mruby. +- It makes mruby easier to handle in "Micro-controller without FPU" and "Kernel Space". `MRB_INT32` -* When defined, or `MRB_INT64` are not defined on 32-bit CPU mode, `mrb_int` will be defined as `int32_t`. -* Conflicts with `MRB_INT64`. +- When defined, or `MRB_INT64` are not defined on 32-bit CPU mode, `mrb_int` will be defined as `int32_t`. +- Conflicts with `MRB_INT64`. `MRB_INT64` -* When defined, or `MRB_INT32` are not defined on 64-bit CPU mode, `mrb_int` will be defined as `int64_t`. -* Conflicts with `MRB_INT32`. +- When defined, or `MRB_INT32` are not defined on 64-bit CPU mode, `mrb_int` will be defined as `int64_t`. +- Conflicts with `MRB_INT32`. +- On 32-bit platforms, `MRB_INT64` requires `MRB_NO_BOXING` because heap-allocated `RInteger` needs 8-byte alignment that the GC heap may not guarantee with word or NaN boxing. ## Garbage collector configuration `MRB_GC_STRESS` -* When defined full GC is emitted per each `RBasic` allocation. -* Mainly used in memory manager debugging. -* If defined at the same time as `MRB_DEBUG`, full GC is emitted also per each heap allocation (`mrb_malloc()` or etc.). +- When defined full GC is emitted per each `RBasic` allocation. +- Mainly used in memory manager debugging. +- If defined at the same time as `MRB_DEBUG`, full GC is emitted also per each heap allocation (`mrb_malloc()` or etc.). This configuration slows down mruby execution by a factor of 2 to 3 or even more. `MRB_GC_TURN_OFF_GENERATIONAL` -* When defined turns generational GC by default. +- When defined turns generational GC off by default. `MRB_GC_FIXED_ARENA` -* When defined used fixed size GC arena. -* Raises `RuntimeError` when this is defined and GC arena size exceeds `MRB_GC_ARENA_SIZE`. -* Useful tracking unnecessary mruby object allocation. +- When defined used fixed size GC arena. +- Raises `RuntimeError` when this is defined and GC arena size exceeds `MRB_GC_ARENA_SIZE`. +- Useful tracking unnecessary mruby object allocation. `MRB_GC_ARENA_SIZE` -* Default value is `100`. -* Ignored when `MRB_GC_FIXED_ARENA` isn't defined. -* Defines fixed GC arena size. +- Default value is `100`. +- Ignored when `MRB_GC_FIXED_ARENA` isn't defined. +- Defines fixed GC arena size. `MRB_HEAP_PAGE_SIZE` -* Defines value is `1024`. -* Specifies number of `RBasic` per each heap page. +- Default value is `1024`. +- Specifies number of `RBasic` per each heap page. ## Memory pool configuration `POOL_ALIGNMENT` -* Default value is `4`. -* If you're allocating data types that requires alignment more than default value define the -largest value of required alignment. +- Default value is `4`. +- If you're allocating data types that requires alignment more than default value define the + largest value of required alignment. `POOL_PAGE_SIZE` -* Default value is `16000`. -* Specifies page size of pool page. -* Smaller the value is increases memory overhead. +- Default value is `16000`. +- Specifies page size of pool page. +- Smaller the value is increases memory overhead. ## State atexit configuration `MRB_FIXED_STATE_ATEXIT_STACK` -* If defined enables fixed size `mrb_state` atexit stack. -* Raises `RuntimeError` when `mrb_state_atexit` call count to same `mrb_state` exceeds -`MRB_FIXED_STATE_ATEXIT_STACK_SIZE`'s value. +- If defined enables fixed size `mrb_state` atexit stack. +- Raises `RuntimeError` when `mrb_state_atexit` call count to same `mrb_state` exceeds + `MRB_FIXED_STATE_ATEXIT_STACK_SIZE`'s value. `MRB_FIXED_STATE_ATEXIT_STACK_SIZE` -* Default value is `5`. -* If `MRB_FIXED_STATE_ATEXIT_STACK` isn't defined this macro is ignored. +- Default value is `5`. +- If `MRB_FIXED_STATE_ATEXIT_STACK` isn't defined this macro is ignored. ## `mrb_value` configuration `MRB_ENDIAN_BIG` -* If defined compiles mruby for big endian machines. -* Used in `MRB_NAN_BOXING`. -* Some mrbgem use this mrbconf. +- If defined compiles mruby for big endian machines. +- Used in `MRB_NAN_BOXING`. +- Some mrbgem use this mrbconf. `MRB_NAN_BOXING` -* If defined represent `mrb_value` in boxed `double`. -* Conflicts with `MRB_USE_FLOAT32` and `MRB_NO_FLOAT`. +- If defined represent `mrb_value` in boxed `double`. +- Conflicts with `MRB_USE_FLOAT32` and `MRB_NO_FLOAT`. `MRB_WORD_BOXING` -* If defined represent `mrb_value` as a word. -* If defined `Float` will be a mruby object with `RBasic`. +- If defined represent `mrb_value` as a word (natural unit of data for the processor). +- Default boxing mode when none is specified. +- On 64-bit platforms, floats are inlined using rotation encoding. +- On 32-bit platforms, floats are heap-allocated as `RFloat` objects. + +`MRB_NO_BOXING` + +- If defined represent `mrb_value` as a C struct (occupies 2 words). +- Most portable but least memory-efficient representation. +- Required for `MRB_INT64` on 32-bit platforms. +- Default for `host-debug` configuration. + +`MRB_WORDBOX_NO_INLINE_FLOAT` + +- If defined disables inline float values in word boxing. +- All floats are heap-allocated as `RFloat` objects. +- Automatically defined on 32-bit platforms (64-bit `double` cannot fit in a 32-bit word). +- Only meaningful with `MRB_WORD_BOXING`. ## Reduce heap memory configuration `MRB_USE_ETEXT_RO_DATA_P` -* Use `etext` and `edata` section addresses defined by the linker to detect read-only data. -* Those addresses are widely available, but not portable, nor standardized. -* This macro is defined by default on User-mode Linux. +- Use `etext` and `edata` section addresses defined by the linker to detect read-only data. +- Those addresses are widely available, but not portable, nor standardized. +- Defined by default on User-mode Linux. `MRB_NO_DEFAULT_RO_DATA_P` -* Define this macro when the default `mrb_ro_data_p()` does not work for any reason. +- Define this macro when the default `mrb_ro_data_p()` does not work for any reason. `MRB_USE_CUSTOM_RO_DATA_P` -* Please try if `MRB_USE_LINK_TIME_RO_DATA_P` is not available. -* The `mrb_ro_data_p()` function is implemented by the user in an arbitrary file. -* The prototype declaration is `mrb_bool mrb_ro_data_p(const char *ptr)`. -* Return `TRUE` if `ptr` is in the read-only section, otherwise return `FALSE`. +- Define to provide your own `mrb_ro_data_p()` implementation. +- The prototype declaration is `mrb_bool mrb_ro_data_p(const char *ptr)`. +- Return `TRUE` if `ptr` is in the read-only section, otherwise return `FALSE`. ## Other configuration -`MRB_MALLOC_TRIM` +`MRB_USE_MALLOC_TRIM` -* call malloc_trim(0) for each mrb_full_gc() call +- Call `malloc_trim(0)` for each `mrb_full_gc()` call. `MRB_UTF8_STRING` -* Adds UTF-8 encoding support to character-oriented String instance methods. -* If it isn't defined, they only support the US-ASCII encoding. +- Adds UTF-8 encoding support to character-oriented String instance methods. +- If it isn't defined, they only support the US-ASCII encoding. `MRB_STR_LENGTH_MAX` -* The maximum length of strings (default 1MB) -* set this value to zero to skip the check +- The maximum length of strings (default 1048576). +- Set this value to zero to skip the check. `MRB_ARY_LENGTH_MAX` -* The maximum length of strings (default 1MB) -* set this value to zero to skip the check +- The maximum length of arrays (default 131072). +- Set this value to zero to skip the check. `MRB_FUNCALL_ARGC_MAX` -* Default value is `16`. -* Specifies 4th argument(`argc`) max value of `mrb_funcall`. -* Raises `ArgumentError` when the `argc` argument is bigger then this value `mrb_funcall`. +- Default value is `16`. +- Specifies 4th argument(`argc`) max value of `mrb_funcall`. +- Raises `ArgumentError` when the `argc` argument is bigger then this value `mrb_funcall`. -`KHASH_DEFAULT_SIZE` +`KHASH_INITIAL_SIZE` -* Default value is `32`. -* Specifies default size of khash table bucket. -* Used in `kh_init_ ## name` function. +- Default value is `32`. +- Specifies initial size of khash table bucket. +- Used in `kh_init_ ## name` function. `MRB_NO_METHOD_CACHE` -* Disable method cache to save memory. +- Disable method cache to save memory. `MRB_METHOD_CACHE_SIZE` -* Default value is `256`. -* Ignored if `MRB_NO_METHOD_CACHE` is defined. -* Need to be the power of 2. +- Default value is `256`. +- Ignored if `MRB_NO_METHOD_CACHE` is defined. +- Need to be the power of 2. -`MRB_USE_METHOD_T_STRUCT` +`MRB_USE_ALL_SYMBOLS` -* Use C struct to represent `mrb_method_t` -* No `MRB_USE_METHOD_T_STRUCT` requires highest 2 bits of function pointers to be zero -* Define this macro on machines that use higher bits of pointers +- Make it available `Symbol.all_symbols` in `mrbgems/mruby-symbol-ext` +- Increase heap memory usage. -`MRB_USE_ALL_SYMBOLS` +`MRB_USE_VM_SWITCH_DISPATCH` -* Make it available `Symbol.all_symbols` in `mrbgems/mruby-symbol-ext` -* Increase heap memory usage. +- Turn on switch dispatch in VM loop. +- Otherwise, computed goto (direct threading) is used when supported by the compiler. -`MRB_USE_ALL_SYMBOLS` +`MRB_SYMBOL_LINEAR_THRESHOLD` + +- Default value is `256`. +- Threshold for switching symbol table from linear search to hash table. + +## Tuning profiles + +Predefined profiles adjust several macros together for specific +deployment targets. Define one of the following: + +`MRB_CONSTRAINED_BASELINE_PROFILE` + +- For micro controllers. +- Enables `MRB_NO_METHOD_CACHE`, reduces `KHASH_INITIAL_SIZE` to `16`, + and `MRB_HEAP_PAGE_SIZE` to `256`. + +`MRB_BASELINE_PROFILE` + +- Default mruby profile. No additional changes. + +`MRB_MAIN_PROFILE` -* Make it available `Symbol.all_symbols` in `mrbgems/mruby-symbol-ext` -* Increase heap memory usage. +- For desktop computers or workstations. +- Increases `MRB_METHOD_CACHE_SIZE` to `1024` and `MRB_HEAP_PAGE_SIZE` + to `4096`. -`MRB_NO_DIRECT_THREADING` +`MRB_HIGH_PROFILE` -* Turn off direct threading optimization in VM loop +- For long-lived server processes. +- Increases `MRB_METHOD_CACHE_SIZE` to `4096` and `MRB_HEAP_PAGE_SIZE` + to `4096`. diff --git a/doc/guides/mrbgems.md b/doc/guides/mrbgems.md index 9cd28c5fad..07f59f7f73 100644 --- a/doc/guides/mrbgems.md +++ b/doc/guides/mrbgems.md @@ -1,12 +1,14 @@ + + # mrbgems -mrbgems is a library manager to integrate C and Ruby extension in an easy and +mrbgems is a library manager to integrate C and Ruby extensions in an easy and standardised way into mruby. Conventionally, each mrbgem name is prefixed by `mruby-`, e.g. `mruby-time` for a gem that provides `Time` class functionality. ## Usage -You have to activate mrbgems explicitly in your build configuration. To add +You have to activate mrbgems explicitly in your build configuration. To add a gem, add the following line to your build configuration file, for example: ```ruby @@ -21,9 +23,9 @@ conf.gem 'examples/mrbgems/ruby_extension_example' In that case, -* if your build configuration file is in the `build_config` directory, it's +- if your build configuration file is in the `build_config` directory, it's relative from `MRUBY_ROOT`. -* otherwise, it is relative from the directory where your build configuration is. +- otherwise, it is relative from the directory where your build configuration is. A remote GIT repository location for a GEM is also supported: @@ -61,7 +63,7 @@ mrbgem from the core or mgem-list. Note that if more than one git-based gem has the same base name (i.e. the default checkout directory name), it is (now) an error **UNLESS** they have the same repository URL, branch name and -commit-id (i.e. checksum hash). You can bypass this by explicitly +commit-id (i.e. checksum hash). You can bypass this by explicitly importing your preferred version **first** and setting the `canonical:` option to `true`: @@ -72,19 +74,49 @@ conf.gem github: 'me/mruby-yaml', branch: 'my-hacked-branch', canonical: true If you do this, the system will (mostly) silently ignore other attempts to clone a gem with this name. -Note that this only affects cloning the gem from git. It does not -resolve version conflicts. If the version as specified in the gem's +Note that this only affects cloning the gem from git. It does not +resolve version conflicts. If the version as specified in the gem's rakefile is incompatible with a dependency, your build will still fail. +### Tweak the gem in your build configuration file + +You can give blocks in the `conf.gem` call to make adjustments for +environments where the original gem does not expect them: + +```ruby +conf.gem core: "mruby-bin-mirb" do |g| + # For cross build to NetBSD + g.linker.libraries = %w(edit termcap) +end +``` + +However, it should be used with caution, as it may deviate from the intent +of the gem's author. + +### Gem Testing + +If you enable unit tests in your build with `enable_test`, tests will be +generated for all gems and their dependencies by default. If necessary, it is +possible to suppress tests for a specific gem like so: + +```ruby +conf.gem 'mruby-noisygem' do |g| + g.skip_test = true +end +``` + +However, it is considered best practice to leave all tests enabled whenever +possible. A warning message will be generated for each gem with disabled tests. + ## GemBox There are instances when you wish to add a collection of mrbgems into mruby at once, or be able to substitute mrbgems based on configuration, without having to -add each gem to your build configuration file. A packaged collection of mrbgems -is called a GemBox. A GemBox is a file that contains a list of mrbgems to load +add each gem to your build configuration file. A packaged collection of mrbgems +is called a GemBox. A GemBox is a file that contains a list of mrbgems to load into mruby, in the same format as if you were adding them to the build config -via `config.gem`, but wrapped in an `MRuby::GemBox` object. GemBoxes are +via `config.gem`, but wrapped in an `MRuby::GemBox` object. GemBoxes are loaded into mruby via `config.gembox 'boxname'`. Below we have created a GemBox containing `mruby-time` and `mrbgems-example`: @@ -96,7 +128,7 @@ MRuby::GemBox.new do |conf| end ``` -As mentioned, the GemBox uses the same conventions as `MRuby::Build`. The GemBox +As mentioned, the GemBox uses the same conventions as `MRuby::Build`. The GemBox must be saved with a `.gembox` extension inside the `mrbgems` directory to be picked up by mruby. @@ -140,16 +172,20 @@ The maximal GEM structure looks like this: | +- src/ <- Source for C extension | + +- ports// <- Platform-specific C sources (see Platform Ports) + | +- tools/ <- Source for Executable (in C) | +- test/ <- Test code (Ruby) ``` -The folder `mrblib` contains pure Ruby files to extend mruby. The folder `src` -contains C/C++ files to extend mruby. The folder `include` contains C/C++ header -files. The folder `test` contains C/C++ and pure Ruby files for testing purposes +The `mrblib` directory contains pure Ruby files to extend mruby. The `src` directory +contains C/C++ files to extend mruby. The `include` directory contains C/C++ header +files. The `test` directory contains C/C++ and pure Ruby files for testing purposes which will be used by `mrbtest`. `mrbgem.rake` contains the specification to compile C and Ruby files. `README.md` is a short description of your GEM. +The optional `ports//` directories hold platform-specific C sources +selected at build time; see [Platform Ports](#platform-ports-ports) below. ## Build process @@ -171,15 +207,15 @@ the GEM functionality to tools like `mruby` and `mirb`. The following properties can be set inside your `MRuby::Gem::Specification` for information purpose: -* `spec.license` or `spec.licenses` (A single license or a list of them under which this GEM is licensed) -* `spec.author` or `spec.authors` (Developer name or a list of them) -* `spec.version` (Current version) -* `spec.description` (Detailed description) -* `spec.summary` - * One line short description of mrbgem. - * Printed in build summary of rake when set. -* `spec.homepage` (Homepage) -* `spec.requirements` (External requirements as information for user) +- `spec.license` or `spec.licenses` (A single license or a list of them under which this GEM is licensed) +- `spec.author` or `spec.authors` (Developer name or a list of them) +- `spec.version` (Current version) +- `spec.description` (Detailed description) +- `spec.summary` + - One line short description of mrbgem. + - Printed in build summary of rake when set. +- `spec.homepage` (Homepage) +- `spec.requirements` (External requirements as information for user) The `license` and `author` properties are required in every GEM! @@ -210,29 +246,29 @@ The version requirements and default gem information are optional. Version requirement supports following operators: -* '=': is equal -* '!=': is not equal -* '>': is greater -* '<': is lesser -* '>=': is equal or greater -* '<=': is equal or lesser -* '~>': is equal or greater and is lesser than the next major version - * example 1: '~> 2.2.2' means '>= 2.2.2' and '< 2.3.0' - * example 2: '~> 2.2' means '>= 2.2.0' and '< 3.0.0' +- '=': is equal +- '!=': is not equal +- '>': is greater +- '<': is lesser +- '>=': is equal or greater +- '<=': is equal or lesser +- '~>': is equal or greater and is lesser than the next major version + - example 1: '~> 2.2.2' means '>= 2.2.2' and '< 2.3.0' + - example 2: '~> 2.2' means '>= 2.2.0' and '< 3.0.0' When more than one version requirements is passed, the dependency must satisfy all of it. You can have default gem to use as dependency when it's not defined in your build configuration. When the last argument of `add_dependency` call is `Hash`, it will be treated as default gem information. -Its format is same as argument of method `MRuby::Build#gem`, expect that it can't be treated as path gem location. +Its format is same as argument of method `MRuby::Build#gem`, except that it can't be treated as path gem location. When a special version of dependency is required, use `MRuby::Build#gem` in the build configuration to override default gem. If you have conflicting GEMs use the following method: -* `spec.add_conflict(gem, *requirements)` - * The `requirements` argument is same as in `add_dependency` method. +- `spec.add_conflict(gem, *requirements)` + - The `requirements` argument is same as in `add_dependency` method. like following code: @@ -251,18 +287,18 @@ end In case your GEM has more complex build requirements you can use the following options additionally inside your GEM specification: -* `spec.cc.flags` (C compiler flags) -* `spec.cc.defines` (C compiler defines) -* `spec.cc.include_paths` (C compiler include paths) -* `spec.linker.flags` (Linker flags) -* `spec.linker.libraries` (Linker libraries) -* `spec.linker.library_paths` (Linker additional library path) -* `spec.bins` (Generate binary file) -* `spec.rbfiles` (Ruby files to compile) -* `spec.objs` (Object files to compile) -* `spec.test_rbfiles` (Ruby test files for integration into mrbtest) -* `spec.test_objs` (Object test files for integration into mrbtest) -* `spec.test_preload` (Initialization files for mrbtest) +- `spec.cc.flags` (C compiler flags) +- `spec.cc.defines` (C compiler defines) +- `spec.cc.include_paths` (C compiler include paths) +- `spec.linker.flags` (Linker flags) +- `spec.linker.libraries` (Linker libraries) +- `spec.linker.library_paths` (Linker additional library path) +- `spec.bins` (Generate binary file) +- `spec.rbfiles` (Ruby files to compile) +- `spec.objs` (Object files to compile) +- `spec.test_rbfiles` (Ruby test files for integration into mrbtest) +- `spec.test_objs` (Object test files for integration into mrbtest) +- `spec.test_preload` (Initialization files for mrbtest) You also can use `spec.mruby.cc` and `spec.mruby.linker` to add extra global parameters for the compiler and linker. @@ -272,12 +308,92 @@ Your GEM can export include paths to another GEMs that depends on your GEM. By default, `/...absolute path.../{GEM_NAME}/include` will be exported. So it is recommended not to put GEM's local header files on include/. -These exports are retroactive. +These exports are transitive. For example: when B depends on C and A depends on B, A will get include paths exported by C. -Exported include_paths are automatically appended to GEM local include_paths by rake. +Exported `include_paths` are automatically appended to GEM local `include_paths` by rake. You can use `spec.export_include_paths` accessor if you want more complex build. +### Settings for GEM build commands/tasks + +When the block argument passed to `MRuby::Gem::Specification.new` is executed, +the GEM build commands/tasks for the `MRuby::Build` instance may not yet be finalized. +In most cases, modifying the GEM build commands/tasks within the block passed to +`MRuby::Gem::Specification.new` is not a problem. + +However, you may need to perform GEM build commands/tasks after the GEM build +commands/tasks for the `MRuby::Build` instance have been finalized. +In such cases, you can achieve this by passing a block argument to +`MRuby::Gem::Specification#build_settings` within the block passed to +`MRuby::Gem::Specification.new`. + +```ruby +spec.build_settings do + spec.cc.flags << "-any_flags" +end +``` + +**NOTE**: Using the `build_settings` method will cause GEM's all build command settings +directly written in the block passed to `MRuby::Gem::Specification.new` to be ignored. + +## Platform Ports (ports/) + +A gem may ship platform-specific C sources under `ports//` +subdirectories. The build configuration selects which port name(s) +are active via `conf.ports`, and each gem compiles the sources of +the first matching `ports//` it ships: + +```ruby +MRuby::Build.new do |conf| + conf.toolchain + conf.ports :posix # selects ports/posix/ across all gems +end +``` + +`conf.ports` accepts multiple names as a fallback chain. Each gem +picks the first directory in the list that exists on its side: + +```ruby +conf.ports :rp2040, :posix # try rp2040 per-gem, else posix +``` + +Host builds auto-detect `:posix` or `:win` when `conf.ports` is +not set. Sources outside `ports/` (i.e. `src/`) are always +compiled regardless of the port selection. + +### External HAL Provider Gems + +A third-party gem may replace another gem's bundled port at build +time. A gem whose name matches `hal--` is recognized +as the external HAL provider for the target gem whose name's last +`-`-separated segment is ``. For example, `hal-task-glib` +overrides the HAL of `mruby-task`; `hal-io-uring` would override +`mruby-io`. The HAL provider must depend on its target so it can +`#include` the target's HAL header: + +```ruby +MRuby::Gem::Specification.new('hal-task-glib') do |spec| + spec.license = 'MIT' + spec.author = 'Your Name' + spec.summary = 'GLib HAL for mruby-task' + spec.add_dependency 'mruby-task', core: 'mruby-task' + # src/ contains the HAL implementation +end +``` + +When a matching HAL provider gem is present in the build, the +target gem's `ports//` sources are dropped from the +build automatically. The HAL provider's own sources supply the +implementation instead, avoiding duplicate symbol errors at link +time. Loading two gems that match the same `hal--*` +prefix is a build error. + +The naming convention is the only signal -- no spec attribute, +no `add_dependency` flag is required. A gem author who wants to +contribute an additional bundled port upstream sends a PR adding +`/ports//`; a gem author who prefers to ship +out of tree publishes a `hal--` gem instead. + ## C Extension mruby can be extended with C. This is possible by using the C API to @@ -334,9 +450,9 @@ mrb_c_extension_example_gem_final(mrb_state* mrb) { mruby can be extended with pure Ruby. It is possible to override existing classes or add new ones in this way. Put all Ruby files into the `mrblib` -folder. +directory. -### Pre-Conditions +### Preconditions none @@ -362,13 +478,13 @@ none mruby can be extended with C and Ruby at the same time. It is possible to override existing classes or add new ones in this way. Put all Ruby files -into the `mrblib` folder and all C files into the `src` folder. +into the `mrblib` directory and all C files into the `src` directory. mruby codes under `mrblib` directory would be executed after gem init C -function is called. Make sure *mruby script* depends on *C code* and -*C code* doesn't depend on *mruby script*. +function is called. Make sure _mruby script_ depends on _C code_ and +_C code_ doesn't depend on _mruby script_. -### Pre-Conditions +### Preconditions See C and Ruby example. @@ -397,7 +513,7 @@ See C and Ruby example. ## Binary gems Some gems can generate executables under `bin` directory. Those gems are called -binary gems. Names of binary gems are conventionally prefixed by `mruby-bin`, +binary gems. Names of binary gems are conventionally prefixed by `mruby-bin`, e.g. `mruby-bin-mirb` and `mruby-bin-strip`. To specify the name of executable, you need to specify `spec.bins` in the diff --git a/doc/guides/rom-method-table.md b/doc/guides/rom-method-table.md new file mode 100644 index 0000000000..f6470fc35c --- /dev/null +++ b/doc/guides/rom-method-table.md @@ -0,0 +1,416 @@ + + +# ROM Method Tables + +ROM method tables allow C methods to be registered using static data +stored in ROM (read-only memory) rather than heap-allocated RAM. This +saves significant memory on embedded systems where RAM is scarce. + +## Motivation + +In a default mruby build, `mrb_open()` builds ~40 classes with ~700+ +method entries at startup. Each method entry is heap-allocated via +individual `mrb_define_method_id()` calls. On a constrained MCU, this +consumes ~14KB of RAM for method table metadata alone. + +ROM method tables eliminate this cost by placing method metadata in +static `const` data at compile time. Only runtime mutations (e.g., +reopening a class to add methods) trigger heap allocation. + +## Architecture + +### Chained Layers + +Each class has a method table (`mt`) pointer to a linked list of +`mrb_mt_tbl` layers: + +``` +String.mt -> [mutable layer] -> [string_ext ROM] -> [string_core ROM] -> NULL +``` + +**Lookup** walks the chain front-to-back, returning the first match. +The method cache makes repeated lookups O(1), so the chain walk +only occurs on cache misses. + +**Mutation** uses copy-on-write (COW): if the top layer is read-only, +a new mutable layer is created in front of it. The ROM data is never +modified. + +``` +Before: String.mt -> [string_ext ROM] -> [string_core ROM] -> NULL + +After String.define_method(:foo): + String.mt -> [mutable: foo] -> [string_ext ROM] -> [string_core ROM] -> NULL +``` + +### Memory Layout + +Each `mrb_mt_tbl` stores method entries as an array of `mrb_mt_entry` +structs, each combining a function pointer, a symbol key, and flags: + +``` +ptr -> [ entry[0] | entry[1] | ... | entry[N-1] ] + |<-- mrb_mt_entry: { val, key, flags } -->| +``` + +Values are `union mrb_mt_ptr` (function pointer or proc pointer). Keys +are pure `mrb_sym` (no flag encoding). Flags are a separate `uint32_t` +field that stores visibility, func/proc type, and argument spec. + +Entries are searched linearly, so source code order does not matter. +The method cache makes repeated lookups O(1), so the linear scan +only occurs on cache misses. + +### Per-State Wrappers + +The `const mrb_mt_entry[]` arrays are truly static and shared across +the process. However, the `mrb_mt_tbl` wrapper (which carries the +`next` pointer for chaining) is heap-allocated per `mrb_state` by +`MRB_MT_INIT_ROM()`. This allows multiple `mrb_state` instances in +the same process to each have independent method table chains, even +when linking to the same const entries. + +## How to Define a ROM Method Table + +### Step 1: Define the Static Data + +Include `` (which provides `mrb_mt_entry`, +`MRB_MT_ENTRY()`, and flag constants) and define the ROM entries: + +```c +#include + +static const mrb_mt_entry my_rom_entries[] = { + MRB_MT_ENTRY(my_method_a, MRB_SYM(method_a), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(my_method_b, MRB_SYM(method_b), MRB_ARGS_NONE()), + MRB_MT_ENTRY(my_method_eq, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), +}; +``` + +### Step 2: Register in the Init Function + +Replace `mrb_define_method_id()` calls with a single +`MRB_MT_INIT_ROM()` call: + +```c +void +mrb_mruby_mygem_gem_init(mrb_state *mrb) +{ + struct RClass *c = mrb_define_class_id(mrb, MRB_SYM(MyClass), mrb->object_class); + MRB_MT_INIT_ROM(mrb, c, my_rom_entries); +} +``` + +`MRB_MT_INIT_ROM()` allocates a per-state wrapper and pushes the ROM +layer onto the class's method table chain. + +### Step 3: Verify + +Build and run the test suite. ROM tables are semantically transparent +to Ruby code. + +## Reference + +### Data Types + +Defined in `include/mruby/class.h`: + +```c +union mrb_mt_ptr { + mrb_func_t func; /* first member: see MRB_MT_ENTRY note */ + const struct RProc *proc; +}; + +typedef struct mrb_mt_entry { + union mrb_mt_ptr val; + mrb_sym key; /* pure symbol ID (no flags packed) */ + uint32_t flags; /* method flags + aspec */ +} mrb_mt_entry; + +typedef struct mrb_mt_tbl { + int size; + int alloc; /* bit 30: MRB_MT_READONLY_BIT */ + mrb_mt_entry *ptr; + struct mrb_mt_tbl *next; /* next (lower-priority) layer, or NULL */ +} mrb_mt_tbl; +``` + +### Macros + +```c +/* ROM table entry: 3rd param is MRB_ARGS_*() optionally OR'd with + MRB_MT_PRIVATE. The macro OR's in MRB_MT_FUNC automatically. + `func` must be the first member of `union mrb_mt_ptr` so that + positional initialization works on legacy C++ compilers that do + not accept C99 designated initializers. */ +#define MRB_MT_ENTRY(fn, sym, flags) \ + { { (fn) }, (sym), (flags) | MRB_MT_FUNC } + +/* Extract aspec from combined flags */ +#define MRB_MT_ASPEC(flags) ((mrb_aspec)((flags) & 0xffffff)) + +/* Allocate a per-state ROM wrapper and push onto class method chain */ +#define MRB_MT_INIT_ROM(mrb, cls, entries) \ + mrb_mt_init_rom(mrb, cls, entries, \ + (int)(sizeof(entries)/sizeof(entries[0]))) +``` + +### Flags + +| Flag | Value | Description | +| ---------------- | ------- | ----------------------------------- | +| `MRB_MT_FUNC` | (1<<24) | C function (auto-set by macro) | +| `MRB_MT_PUBLIC` | 0 | Public visibility (default) | +| `MRB_MT_PRIVATE` | (1<<25) | Private visibility (in entry param) | + +The third parameter to `MRB_MT_ENTRY()` is an `MRB_ARGS_*()` +expression optionally OR'd with `MRB_MT_PRIVATE`. The aspec value +occupies bits 0-23 and the visibility flag occupies bit 25; these +ranges do not overlap, so the values are simply OR'd together. +`MRB_MT_FUNC` is set automatically. The no-arg optimization is +derived at runtime from `aspec == 0` (`MRB_ARGS_NONE()`). + +**How to write entries:** + +- **`MRB_MT_ENTRY(fn, sym, MRB_ARGS_*(...))`**: Public method. +- **`MRB_MT_ENTRY(fn, sym, MRB_ARGS_*(...) | MRB_MT_PRIVATE)`**: + Private method. +- Use the same `MRB_ARGS_*()` macros as `mrb_define_method_id()`. + +### Symbol Macros + +Use the presym macros for keys. See `doc/guides/symbol.md` for the +full list: + +```c +MRB_SYM(size) /* size */ +MRB_SYM_B(chomp) /* chomp! */ +MRB_SYM_Q(frozen) /* frozen? */ +MRB_SYM_E(name) /* name= */ +MRB_OPSYM(add) /* + */ +MRB_OPSYM(eq) /* == */ +MRB_OPSYM(aref) /* [] */ +MRB_OPSYM(aset) /* []= */ +MRB_OPSYM(cmp) /* <=> */ +MRB_IVSYM(name) /* @name */ +``` + +### API + +```c +void mrb_mt_init_rom(mrb_state *mrb, struct RClass *c, + const mrb_mt_entry *entries, int size); +``` + +Allocates a per-state `mrb_mt_tbl` wrapper for the const entries and +pushes it onto the class's method table chain. The wrapper is tracked +in `mrb->rom_mt` and freed at `mrb_close()`. Use the `MRB_MT_INIT_ROM` +macro to auto-compute the size. Multiple calls push additional layers, +which is how extension gems add methods to core classes. + +## Entry Correspondence + +Each `MRB_MT_ENTRY()` bundles a function pointer with its method name +and flags in a single line. Their order in the source code does not +matter, but keeping related methods +together improves readability. + +**Method aliases** (two names for the same function) are expressed as +separate entries sharing the same function pointer: + +```c +static const mrb_mt_entry str_rom_entries[] = { + MRB_MT_ENTRY(mrb_str_size, MRB_SYM(size), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_str_size, MRB_SYM(length), MRB_ARGS_NONE()), +}; +``` + +## Conditional Methods + +Methods that depend on build configuration (e.g., `MRB_NO_FLOAT`) can +use `#ifdef` directly inside the ROM entries array. The `sizeof` in +`MRB_MT_INIT_ROM()` automatically adjusts to the number of entries +that survive preprocessing: + +```c +static const mrb_mt_entry integer_rom_entries[] = { + MRB_MT_ENTRY(int_to_s, MRB_SYM(to_s), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(int_add, MRB_OPSYM(add), MRB_ARGS_REQ(1)), +#ifndef MRB_NO_FLOAT + MRB_MT_ENTRY(int_to_f, MRB_SYM(to_f), MRB_ARGS_NONE()), +#endif +}; +``` + +For conditional methods on a **different class**, use a separate ROM +table wrapped in the `#ifdef`: + +```c +#ifndef MRB_NO_FLOAT +static const mrb_mt_entry float_rom_entries[] = { ... }; +#endif + +void mrb_init_numeric(mrb_state *mrb) { + MRB_MT_INIT_ROM(mrb, integer, integer_rom_entries); +#ifndef MRB_NO_FLOAT + MRB_MT_INIT_ROM(mrb, fl, float_rom_entries); +#endif +} +``` + +## Extension Gems + +Extension gems use exactly the same pattern. Since gems are +initialized after core, calling `MRB_MT_INIT_ROM()` pushes the gem's +ROM layer in front of the core ROM layer: + +```c +/* mrbgems/mruby-string-ext/src/string.c */ + +static const mrb_mt_entry string_ext_rom_entries[] = { ... }; + +void mrb_mruby_string_ext_gem_init(mrb_state *mrb) +{ + struct RClass *s = mrb->string_class; + MRB_MT_INIT_ROM(mrb, s, string_ext_rom_entries); +} +``` + +After initialization, String's method table chain looks like: + +``` +String.mt -> [string_ext ROM, 53 methods] + -> [string_core ROM, 46 methods] + -> NULL +``` + +A gem may also define ROM tables for multiple classes: + +```c +void mrb_mruby_mygem_gem_init(mrb_state *mrb) +{ + MRB_MT_INIT_ROM(mrb, mrb->string_class, string_mygem_rom_entries); + MRB_MT_INIT_ROM(mrb, mrb->integer_class, integer_mygem_rom_entries); +} +``` + +## Methods That Cannot Use ROM Tables + +Some methods must remain as `mrb_define_method_id()` calls: + +- **Class methods** (`mrb_define_class_method_id()`): ROM tables + register instance methods only. +- **Module functions** (`mrb_define_module_function_id()`): Same + reason. +- **Methods requiring `mrb_state*` at definition time**: For example, + methods that create frozen RProc objects during init. +- **Methods on dynamically created classes**: Classes created at + init time (not stored in `mrb->xxx_class`) that require + `mrb_define_class()` to obtain the class pointer. +- **Cross-class methods** (methods on a class the gem does not own): + Each ROM table adds a 16-byte `mrb_mt_tbl` layer to the target + class's chain. For 1-2 methods, this overhead exceeds the savings. + Use `mrb_define_method_id()` instead -- cross-class methods share + the target class's existing mutable layer. + +These methods are added after `MRB_MT_INIT_ROM()` and go into the +mutable layer that sits in front of the ROM chain. + +## Runtime Behavior + +### Open Classes (COW) + +Ruby's open classes work transparently. When a Ruby program or C code +adds a method to a class with a ROM table, the COW mechanism creates a +mutable layer: + +```ruby +class String + def my_custom_method + 42 + end +end +"hello".my_custom_method #=> 42 +"hello".size #=> 5 (still found in ROM layer) +``` + +### Method Removal + +`remove_method` works on ROM methods using a tombstone marker. When a +method in a ROM layer is removed, a special entry (`MRB_MT_FUNC` flag with +`func=NULL`) is inserted into the mutable layer. The `mt_get()` lookup +treats this marker as "not found" and stops searching the chain, +effectively hiding the ROM entry. Unlike `undef_method` (which blocks +superclass lookup), `remove_method`'s tombstone allows the superclass +method to be found. + +`undef_method` uses a different tombstone (`proc=NULL` without +`MRB_MT_FUNC`), which is returned by `mt_get()` so the caller raises +NoMethodError without searching the superclass. + +### Class Duplication + +`Class.dup` shares the ROM chain. The duplicated class gets an empty +mutable layer pointing to the same ROM layers as the original. No ROM +data is copied. + +### Garbage Collection + +ROM layers are skipped during GC mark and sweep phases. Only mutable +layers are scanned for live RProc references and freed when the class +is collected. ROM wrappers are freed at `mrb_close()` via the +`mrb->rom_mt` tracking list. + +### Memory Measurement + +`mrb_class_mt_memsize()` reports only mutable layer memory. ROM +wrappers are tracked separately and not counted per-class. + +## Converting Existing Code + +To convert existing `mrb_define_method_id()` calls to a ROM table: + +1. **Count** the number of method definitions that can be converted. + +2. **Create** the ROM entries array using `MRB_MT_ENTRY()`. + +3. **Move** each `mrb_define_method_id()` call into the entries: + - `MRB_MT_ENTRY(func, sym, aspec)` where: + - `func` is the function pointer + - `sym` is the symbol macro (e.g., `MRB_SYM(name)`) + - `aspec` is the original `MRB_ARGS_*()` macro + - For private methods, OR `MRB_MT_PRIVATE` into the aspec: + `MRB_MT_ENTRY(func, sym, aspec | MRB_MT_PRIVATE)` + +4. **Replace** the calls with `MRB_MT_INIT_ROM(mrb, c, entries)`. + +5. **Keep** any methods that cannot be converted (see above) as + individual `mrb_define_method_id()` calls after the ROM init. + +6. **Build and test**: `rake CONFIG=host-debug -j24 all test:run:serial` + +### Before + +```c +void mrb_mruby_foo_gem_init(mrb_state *mrb) { + struct RClass *foo = mrb_define_class_id(mrb, MRB_SYM(Foo), mrb->object_class); + mrb_define_method_id(mrb, foo, MRB_SYM(bar), foo_bar, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, foo, MRB_SYM(baz), foo_baz, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, foo, MRB_OPSYM(eq), foo_eq, MRB_ARGS_REQ(1)); +} +``` + +### After + +```c +static const mrb_mt_entry foo_rom_entries[] = { + MRB_MT_ENTRY(foo_bar, MRB_SYM(bar), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(foo_baz, MRB_SYM(baz), MRB_ARGS_NONE()), + MRB_MT_ENTRY(foo_eq, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), +}; + +void mrb_mruby_foo_gem_init(mrb_state *mrb) { + struct RClass *foo = mrb_define_class_id(mrb, MRB_SYM(Foo), mrb->object_class); + MRB_MT_INIT_ROM(mrb, foo, foo_rom_entries); +} +``` diff --git a/doc/guides/symbol.md b/doc/guides/symbol.md index 8f90ce59fd..8c34f75b2a 100644 --- a/doc/guides/symbol.md +++ b/doc/guides/symbol.md @@ -1,7 +1,9 @@ + + # Symbols Symbols in `mruby` C source code is represented by `mrb_sym` which is alias of -`uint32_t`. Lower 30 bits are used for symbols so that higher 2 bits can be +`uint32_t`. Lower 30 bits are used for symbols so that higher 2 bits can be used as flags, e.g. `struct mt_elem` in `class.c`. ```c @@ -57,27 +59,21 @@ To save RAM, `mruby` can use compile-time allocation of some symbols. You can use following macros to get preallocated symbols by including `mruby/presym.h` header. -* `MRB_SYM(xor)` //=> xor (Word characters) -* `MRB_SYM_B(xor)` //=> xor! (Method with Bang) -* `MRB_SYM_Q(xor)` //=> xor? (Method with Question mark) -* `MRB_SYM_E(xor)` //=> xor= (Method with Equal) -* `MRB_CVSYM(xor)` //=> @@xor (Class Variable) -* `MRB_IVSYM(xor)` //=> @xor (Instance Variable) -* `MRB_OPSYM(xor)` //=> ^ (Operator) +- `MRB_SYM(xor)` //=> xor (Word characters) +- `MRB_SYM_B(xor)` //=> xor! (Method with Bang) +- `MRB_SYM_Q(xor)` //=> xor? (Method with Question mark) +- `MRB_SYM_E(xor)` //=> xor= (Method with Equal) +- `MRB_GVSYM(xor)` //=> $xor (Global Variable) +- `MRB_CVSYM(xor)` //=> @@xor (Class Variable) +- `MRB_IVSYM(xor)` //=> @xor (Instance Variable) +- `MRB_OPSYM(xor)` //=> ^ (Operator) For `MRB_OPSYM()`, specify the names corresponding to operators (see `MRuby::Presym::OPERATORS` in `lib/mruby/presym.rb` for the names that can be specified for it). Other than that, describe only word characters -excluding leading and ending punctuations. - -These macros are converted to static symbol IDs at compile time, unless -preallocate symbols are disabled by `conf.disable_presym`. In that case, -these macros are expanded to `mrb_intern_lit` calls, therefore the mruby state -variable is required. The above macros assume the variable name is `mrb`. If -its name is not `mrb`, you need to use macros with `_2` suffix, such as -`MRB_SYM_2` to specify `mrb_state*` variable. - -### Disabling Preallocated Symbols +excluding leading and ending punctuation. -You can disable preallocated symbols by specifying `conf.disable_presym` in the -configuration file. +These macros are converted to static symbol IDs at compile time. +The `_2` suffix variants (e.g., `MRB_SYM_2`) are kept for backward +compatibility only; they accept an explicit `mrb_state*` parameter +but ignore it. New code should use the standard macros above. diff --git a/doc/internal/architecture.md b/doc/internal/architecture.md new file mode 100644 index 0000000000..438dc823d2 --- /dev/null +++ b/doc/internal/architecture.md @@ -0,0 +1,182 @@ + + +# mruby Architecture + +This document provides a map of mruby's internals for developers who +want to understand, debug, or contribute to the codebase. + +## Overview + +mruby's execution pipeline: + +```text +Ruby source → Parser → AST → Code Generator → Bytecode (irep) + ↓ + VM → Result +``` + +The design priority is **memory > performance > readability**. + +## Object Model + +All heap-allocated Ruby objects share a common header (`MRB_OBJECT_HEADER`): + +```text +struct RBasic (8 bytes on 64-bit) +┌──────────────┬─────┬──────────┬────────┬───────┐ +│ RClass *c │ tt │ gc_color │ frozen │ flags │ +│ (class ptr) │ 8b │ 3b │ 1b │ 20b │ +└──────────────┴─────┴──────────┴────────┴───────┘ +``` + +All object structs embed this header via `MRB_OBJECT_HEADER`: + +| Struct | Ruby Type | Extra Fields | +| ------------ | ---------------- | ---------------------------------- | +| `RObject` | Object instances | `iv` (instance variables) | +| `RClass` | Class/Module | `iv`, `mt` (method table), `super` | +| `RString` | String | embedded or heap buffer, length | +| `RArray` | Array | embedded or heap buffer, length | +| `RHash` | Hash | hash table or k-v array | +| `RProc` | Proc/Lambda | `irep` or C function, environment | +| `RData` | C data wrapper | `void *data`, `mrb_data_type` | +| `RFiber` | Fiber | `mrb_context` | +| `RException` | Exception | `iv` | + +Immediate values (Integer, Symbol, `true`, `false`, `nil`) are encoded +directly in `mrb_value` without heap allocation. The encoding depends on +the boxing mode (see [boxing.md](boxing.md)). + +Objects must fit within 5 words (`mrb_static_assert_object_size`). + +## Virtual Machine + +The VM is register-based, using two stacks: a **value stack** for +registers (locals, temporaries, arguments) and a **call info stack** +for tracking method/block call frames. Each method call pushes a +`mrb_callinfo` frame with the method symbol, proc, PC, and argument +counts. + +The dispatch loop in `mrb_vm_run()` decodes opcodes and operates on +registers. Method dispatch looks up the receiver's class method table +(with a per-state method cache), then either calls a C function +directly or pushes a new call frame for Ruby methods. + +Exception handling uses `setjmp`/`longjmp` (or C++ exceptions if +configured). Rescue/ensure handler tables are stored in each irep +and searched during stack unwinding. + +See [vm.md](vm.md) for detailed VM internals, [opcode.md](opcode.md) +for the full instruction set. + +## Garbage Collector + +The GC uses **tri-color incremental mark-and-sweep** with an optional +**generational mode**. Objects are colored white (unmarked), gray +(marked, children pending), black (fully marked), or red (static/ROM). + +The three-phase cycle (root scan, incremental marking, sweep) runs +in small steps between VM instructions to avoid long pauses. Write +barriers (`mrb_field_write_barrier`, `mrb_write_barrier`) maintain +correctness during incremental marking. + +The GC arena protects newly created objects in C code. Heap regions +(`mrb_gc_add_region`) support embedded systems with fixed memory banks. + +See [gc.md](gc.md) for detailed GC internals, +[../guides/gc-arena-howto.md](../guides/gc-arena-howto.md) for arena +usage patterns, [../guides/memory.md](../guides/memory.md) for memory +management. + +## Compiler Pipeline + +The compiler transforms Ruby source code through three stages: + +1. **Parser** (`parse.y`): Lrama/Bison grammar produces an AST of + `mrb_ast_node` structures, tracking lexer state and local scopes. +2. **Code Generator** (`codegen.c`): walks the AST and emits bytecode + into `mrb_irep` structures (instruction sequence, literal pool, + symbol table, child ireps). +3. **Execution**: the irep is wrapped in an `RProc` and executed by + the VM, or serialized to `.mrb` binary format. + +Alternative loading paths include `mrb_load_string()` (compile and +run), `mrb_load_irep()` (load precompiled bytecode), and `mrbc` +(ahead-of-time compilation). + +See [compiler.md](compiler.md) for detailed compiler internals, +[opcode.md](opcode.md) for the instruction set. + +## Source File Map + +### Core (`src/`) + +| File | Responsibility | +| ------------- | ---------------------------------------------- | +| `vm.c` | Bytecode dispatch loop, method invocation | +| `state.c` | `mrb_state` init/close, irep management | +| `gc.c` | Garbage collector (mark-sweep, incremental) | +| `class.c` | Class/module definition, method tables | +| `object.c` | Core object operations | +| `variable.c` | Instance/class/global variables, object shapes | +| `proc.c` | Proc/Lambda/closure handling | +| `array.c` | Array implementation | +| `string.c` | String implementation (embedded, shared, heap) | +| `hash.c` | Hash implementation (open addressing) | +| `numeric.c` | Integer/Float arithmetic | +| `symbol.c` | Symbol table and interning | +| `range.c` | Range implementation | +| `error.c` | Exception creation, raise, backtrace | +| `kernel.c` | Kernel module methods | +| `load.c` | `.mrb` bytecode loading | +| `dump.c` | Bytecode serialization (write `.mrb`) | +| `print.c` | Print/puts/p output | +| `backtrace.c` | Stack trace generation | + +### Compiler (`mrbgems/mruby-compiler/core/`) + +| File | Responsibility | +| ----------- | ------------------------------- | +| `parse.y` | Yacc grammar → AST | +| `y.tab.c` | Generated parser (from parse.y) | +| `codegen.c` | AST → bytecode (irep) | +| `node.h` | AST node type definitions | + +### Key Headers (`include/mruby/`) + +| Header | Contents | +| ------------ | ------------------------------------- | +| `mruby.h` | `mrb_state`, core API declarations | +| `value.h` | `mrb_value`, type enums, value macros | +| `object.h` | `RBasic`, `RObject`, object header | +| `class.h` | `RClass`, method table types | +| `string.h` | `RString`, string macros | +| `array.h` | `RArray`, array macros | +| `hash.h` | `RHash`, hash API | +| `data.h` | `RData`, C data wrapping | +| `irep.h` | `mrb_irep`, bytecode structures | +| `compile.h` | Compiler context, `mrb_load_string` | +| `boxing_*.h` | Value boxing implementations | + +## mrbgems System + +Gems are the module system for mruby. Each gem lives in +`mrbgems/mruby-*/` and contains: + +```text +mruby-example/ +├── mrbgem.rake gem specification (name, deps, bins) +├── src/ C source files +├── mrblib/ Ruby source files (compiled to bytecode) +├── include/ C headers +├── test/ mrbtest test files +└── bintest/ binary test files (CRuby) +``` + +At build time, gem Ruby files are compiled with `mrbc` and linked into +`libmruby.a`. Gem initialization runs in dependency order via +`gem_init.c` (auto-generated). + +GemBoxes (`mrbgems/*.gembox`) define named collections of gems +(e.g., `default.gembox` includes `stdlib`, `stdlib-ext`, `stdlib-io`, +`math`, `metaprog`, and binary tools). diff --git a/doc/internal/boxing.md b/doc/internal/boxing.md index e62294fec2..653d8c3a13 100644 --- a/doc/internal/boxing.md +++ b/doc/internal/boxing.md @@ -1,10 +1,12 @@ + + # Boxing The mruby objects and data are represented by C data type `mrb_value`. There are three options how to pack the data values in the `mrb_value`. -* Word Boxing -* NaN Boxing -* No Boxing +- Word Boxing +- NaN Boxing +- No Boxing ## Word Boxing @@ -14,39 +16,81 @@ Some values (called immediate values, e.g. integers, booleans, symbols, etc.) ar The Word boxing packing bit patterns are like following: -| Types | Bit Pattern | -|--------|-------------------------------------| -| object | xxxxxxxx xxxxxxxx xxxxxxxx xxxxx000 | -| fixnum | xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxx1 | -| nil | 00000000 00000000 00000000 00000000 | -| true | 00000000 00000000 00000000 00001100 | -| false | 00000000 00000000 00000000 00000100 | -| undef | 00000000 00000000 00000000 00010100 | -| symbol | xxxxxxxx xxxxxxxx xxxxxxxx xxxxxx10 | +| Types | Bit Pattern | +| ------ | ------------------------------------- | +| object | `xxxxxxxx xxxxxxxx xxxxxxxx xxxxx000` | +| fixnum | `xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxx1` | +| nil | `00000000 00000000 00000000 00000000` | +| true | `00000000 00000000 00000000 00001100` | +| false | `00000000 00000000 00000000 00000100` | +| undef | `00000000 00000000 00000000 00010100` | +| symbol | `xxxxxxxx xxxxxxxx xxxxxxxx xxxxxx10` | + +### Inline Float (64-bit) + +On 64-bit platforms, `double` values are packed into the word using +rotation encoding. The IEEE 754 exponent field is rotated so that +common exponent values (those not colliding with the pointer/tag +patterns above) fit directly in a word. This encoding is lossless +for most float values; only a small set of exotic exponents require +heap allocation as `RFloat`. + +To disable inline float and heap-allocate all floats, define +`MRB_WORDBOX_NO_INLINE_FLOAT`. + +### 32-bit Considerations -On 64 bit platform (unless `MRB_WORDBOX_NO_FLOAT_TRUNCATE`), float values are also packed in the `mrb_value`. In that case, we drop least significant 2 bits from mantissa. -If you need full precision for floating point numbers, define `MRB_WORDBOX_NO_FLOAT_TRUNCATE`. +On 32-bit platforms with 64-bit `double` (the common case), +`MRB_WORDBOX_NO_INLINE_FLOAT` is automatically defined because a +64-bit double cannot fit in a 32-bit word. All floats are +heap-allocated as `RFloat` objects. + +The `RFloat` struct uses a `char[]` buffer instead of a `double` +field to avoid alignment issues, since GC heap slots (RVALUE) on +32-bit have only 4-byte alignment but `double` requires 8-byte. +Accessor functions `mrb_rfloat_value()` and `mrb_rfloat_set()` use +`memcpy` for safe access. ## NaN Boxing -NaN boxing packs the Ruby data in a floating point numbers, which represent NaN (Not a Number) values. Under IEEE753 definitions every value that exponent is all set are considered as NaN. That means NaN can represent `2^51` values. NaN boxing is a teaching to pack the values in those NaN representation. In theory, 64 bits pointers are too big to fit in NaN, but practically most OS uses only 48 bits at most for pointers (except for some OS e.g. Solaris). +NaN boxing packs the Ruby data in a floating-point numbers, which represent NaN (Not a Number) values. Under IEEE753 definitions every value that exponent is all set are considered as NaN. That means NaN can represent `2^51` values. NaN boxing is a teaching to pack the values in those NaN representation. In theory, 64-bit pointers are too big to fit in NaN, but practically most OS use only 48 bits at most for pointers (except for some OS e.g. Solaris). The NaN boxing packing bit patterns are like following: -| Types | Bit Pattern | -|--------|-------------------------------------------------------------------------| -| float | SEEEEEEE EEEEFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF | -| +/-inf | S1111111 11110000 00000000 00000000 00000000 00000000 00000000 00000000 | -| nan | 01111111 11111000 00000000 00000000 00000000 00000000 00000000 00000000 | -| fixnum | 01111111 11111001 00000000 00000000 IIIIIIII IIIIIIII IIIIIIII IIIIIIII | -| symbol | 01111111 11111110 00000000 00000000 SSSSSSSS SSSSSSSS SSSSSSSS SSSSSSSS | -| misc | 01111111 11111111 00000000 00000000 00000000 00000000 00TTTTTT 0000MMMM | -| object | 01111111 11111100 PPPPPPPP PPPPPPPP PPPPPPPP PPPPPPPP PPPPPPPP PPPPPP00 | -| ptr | 01111111 11111100 PPPPPPPP PPPPPPPP PPPPPPPP PPPPPPPP PPPPPPPP PPPPPP01 | -| nil | 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 | +| Types | Bit Pattern | +| ------ | ------------------------------------------------------------------------- | +| float | `SEEEEEEE EEEEFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF` | +| +/-inf | `S1111111 11110000 00000000 00000000 00000000 00000000 00000000 00000000` | +| nan | `01111111 11111000 00000000 00000000 00000000 00000000 00000000 00000000` | +| fixnum | `01111111 11111001 00000000 00000000 IIIIIIII IIIIIIII IIIIIIII IIIIIIII` | +| symbol | `01111111 11111110 00000000 00000000 SSSSSSSS SSSSSSSS SSSSSSSS SSSSSSSS` | +| misc | `01111111 11111111 00000000 00000000 00000000 00000000 00TTTTTT 0000MMMM` | +| object | `01111111 11111100 PPPPPPPP PPPPPPPP PPPPPPPP PPPPPPPP PPPPPPPP PPPPPP00` | +| ptr | `01111111 11111100 PPPPPPPP PPPPPPPP PPPPPPPP PPPPPPPP PPPPPPPP PPPPPP01` | +| nil | `00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000` | -The object values appear far more frequently than floating point numbers, so we offset the value so that object pointers are unchanged. This technique is called "favor pointer"". +The object values appear far more frequently than floating-point numbers, so we offset the value so that object pointers are unchanged. This technique is called "favor pointer". ## No Boxing No boxing represents `mrb_value` by the C struct with `type` and the value union. This is the most portable (but inefficient) representation. No boxing can be specified by `MRB_NO_BOXING`, and it's default for debugging configuration (e.g. `host-debug`). + +## Comparison + +| Property | Word Boxing | NaN Boxing | No Boxing | +| ---------------------- | ----------------- | ---------------- | -------------------- | +| `mrb_value` size | 1 word (4/8 byte) | 8 bytes | 2 words (8/16 bytes) | +| Default on | most platforms | (manual opt-in) | `host-debug` | +| Macro | `MRB_WORD_BOXING` | `MRB_NAN_BOXING` | `MRB_NO_BOXING` | +| Inline integers | yes (31/63 bit) | yes (32 bit) | yes (full width) | +| Inline floats (64-bit) | yes (rotation) | yes (native) | yes (struct field) | +| Inline floats (32-bit) | no (heap RFloat) | yes (native) | yes (struct field) | +| Pointer size limit | none | 48 bits | none | +| Debugger friendly | no | no | yes | + +## ABI Compatibility + +The boxing mode changes the layout of `mrb_value`. Code compiled with +one boxing mode **cannot** be linked against a library built with a +different mode. Always use `mruby-config --cflags` to get the correct +compiler flags. diff --git a/doc/internal/compiler.md b/doc/internal/compiler.md new file mode 100644 index 0000000000..7f83ab16ff --- /dev/null +++ b/doc/internal/compiler.md @@ -0,0 +1,323 @@ + + +# Compiler Pipeline Internals + +This document describes mruby's compilation pipeline for developers +working on the parser, code generator, or bytecode format. + +**Read this if you are:** adding new syntax or modifying the parser, +debugging codegen issues (wrong registers, missing opcodes), +working with the `.mrb` binary format, or understanding how Ruby +constructs map to bytecode. + +## Pipeline Overview + +```text +Ruby source + | + v + Lexer/Parser (parse.y) + | + v + AST (mrb_ast_node) + | + v + Code Generator (codegen.c) + | + v + Bytecode (mrb_irep) + | + v + VM execution -or- .mrb binary file +``` + +## Stage 1: Lexer and Parser + +The lexer and parser are combined in a single Lrama/Bison grammar +file: `mrbgems/mruby-compiler/core/parse.y`. + +### Parser State + +The parser maintains extensive state in `mrb_parser_state`: + +- **lstate**: current lexer state (EXPR_BEG, EXPR_END, EXPR_ARG, + EXPR_DOT, EXPR_FNAME, etc.). Controls how tokens like `+`/`-` + are interpreted (sign vs operator) and whether newlines are + significant. +- **locals**: stack of local variable lists (one per scope), stored + as cons-lists of symbols. +- **lex_strterm**: string/heredoc parsing state for handling nested + interpolation. +- **cond_stack**, **cmdarg_stack**: bit stacks tracking + conditional and command argument contexts. +- **tree**: root AST node after successful parse. +- **error_buffer**: accumulated parse errors. + +### AST Nodes + +The parser produces an AST using two node types: + +- **Cons-list nodes**: traditional binary tree pairs (car/cdr) +- **Variable-sized nodes**: have a header with `node_type`, `lineno`, + and `filename_index` + +Key node types include `NODE_SCOPE` (new variable scope), +`NODE_STMTS` (statement sequence), `NODE_IF`, `NODE_WHILE`, +`NODE_CALL` (method call), `NODE_DEF` (method definition), +`NODE_CLASS`, `NODE_RESCUE`, `NODE_ENSURE`, etc. See +`mrbgems/mruby-compiler/core/node.h` for the complete list. + +### Local Variable Tracking + +Local variables are tracked per-scope during parsing: + +- `local_add(sym)`: register a new local variable in current scope +- `local_var_p(sym)`: check if a symbol is a local variable (affects + whether an identifier is parsed as a method call or variable + reference) + +## Stage 2: Code Generator + +The code generator (`mrbgems/mruby-compiler/core/codegen.c`) walks +the AST and emits bytecode into `mrb_irep` structures. + +### Codegen Scope + +Each lexical scope (method, block, class body) has its own +`codegen_scope`: + +```text +codegen_scope ++-- sp current register index (stack pointer) ++-- pc current instruction count ++-- nlocals number of local variables ++-- nregs maximum register index used ++-- lv local variable list ++-- iseq[] instruction sequence (grows dynamically) ++-- pool[] literal pool (strings, numbers) ++-- syms[] symbol table (method/variable names) ++-- reps[] child ireps (nested methods/blocks) ++-- catch_table[] exception handler entries ++-- loop current loop context stack ++-- prev parent scope ++-- mscope true if method/module/class scope +``` + +Scopes nest for blocks, method definitions, and class/module bodies. +Each scope produces one `mrb_irep`. + +### Register Allocation + +The code generator uses a simple stack-based register allocator: + +- Register 0 is always `self` +- Registers 1..nlocals-1 are local variables (in declaration order) +- Registers nlocals..nregs-1 are temporaries + +`push()` increments `sp` and tracks the high-water mark in `nregs`. +`pop()` decrements `sp`. The allocator is linear - it does not +reuse temporaries within an expression. + +### Instruction Emission + +Instructions are emitted via helper functions: + +- `genop_0(opcode)`: no operands +- `genop_1(opcode, a)`: one operand (auto-extends with OP_EXT1 + if a > 255) +- `genop_2(opcode, a, b)`: two operands (auto-extends with + OP_EXT1/2/3 as needed) +- `genop_3(opcode, a, b, c)`: three operands +- `genop_W(opcode, a)`: 24-bit operand +- `genop_2S(opcode, a, b)`: one 8-bit + one 16-bit operand + +### Peephole Optimization + +The code generator performs limited peephole optimizations, such as +removing redundant `OP_MOVE` instructions and combining consecutive +literal loads. Optimization is disabled at jump targets and when +`no_optimize` is set in the compilation context. + +### Loop Context + +Loop constructs (`while`, `until`, `for`, blocks) push a +`loopinfo` structure that tracks jump destinations: + +- `pc0`: destination for `next` +- `pc1`: destination for `redo` +- `pc2`: destination for `break` + +Loop types (`LOOP_NORMAL`, `LOOP_BLOCK`, `LOOP_FOR`, `LOOP_BEGIN`, +`LOOP_RESCUE`) determine how `break`/`next`/`redo` behave. + +## IRep Structure + +The compiled bytecode is stored in `mrb_irep` (Instruction +REPresentation): + +```text +mrb_irep ++-- iseq[] instruction sequence (mrb_code array) ++-- pool[] literal pool (mrb_irep_pool entries) ++-- syms[] symbol table (mrb_sym array) ++-- reps[] child ireps (nested scopes) ++-- lv[] local variable names (for debugging) ++-- nlocals local variable count ++-- nregs register count (locals + temporaries) ++-- ilen instruction count ++-- plen pool entry count ++-- slen symbol count ++-- rlen child irep count ++-- clen catch handler count ++-- debug_info source file/line mapping +``` + +### Literal Pool + +Pool entries store constants referenced by instructions: + +| Type | Tag | Description | +| ---------------- | --- | ------------------------------- | +| `IREP_TT_STR` | 0 | Dynamic string (heap allocated) | +| `IREP_TT_SSTR` | 2 | Static string (read-only) | +| `IREP_TT_INT32` | 1 | 32-bit integer | +| `IREP_TT_INT64` | 3 | 64-bit integer | +| `IREP_TT_FLOAT` | 5 | Floating-point number | +| `IREP_TT_BIGINT` | 7 | Arbitrary-precision integer | + +The code generator deduplicates pool entries: identical strings +and equal numeric values share the same pool index. + +### Catch Handler Table + +Exception handler entries are appended after the instruction +sequence in memory: + +```text +mrb_irep_catch_handler ++-- type MRB_CATCH_RESCUE (0) or MRB_CATCH_ENSURE (1) ++-- begin[4] start PC of protected range ++-- end[4] end PC of protected range ++-- target[4] jump target when handler fires +``` + +During exception unwinding, handlers are searched in reverse order +(last to first) for the current PC. + +## Operand Encoding + +Standard instructions use 8-bit operands. When a value exceeds +255, extension prefixes widen operands to 16 bits: + +| Prefix | Effect | +| --------- | --------------------------------- | +| `OP_EXT1` | First operand (a) becomes 16-bit | +| `OP_EXT2` | Second operand (b) becomes 16-bit | +| `OP_EXT3` | Both a and b become 16-bit | + +Instruction formats: + +| Format | Layout | Size | +| ------ | ----------------------------- | ------- | +| Z | opcode only | 1 byte | +| B | opcode + a(8) | 2 bytes | +| BB | opcode + a(8) + b(8) | 3 bytes | +| BBB | opcode + a(8) + b(8) + c(8) | 4 bytes | +| BS | opcode + a(8) + b(16) | 4 bytes | +| BSS | opcode + a(8) + b(16) + c(16) | 6 bytes | +| S | opcode + a(16) | 3 bytes | +| W | opcode + a(24) | 4 bytes | + +See [opcode.md](opcode.md) for the full instruction table. + +## OP_ENTER: Argument Specification + +`OP_ENTER` encodes a method's argument layout in a 24-bit value +(W format). The bit fields are defined by the `MRB_ARGS_*` macros: + +```text +Bits 23 no-block flag +Bits 18-22 required argument count (5 bits, 0-31) +Bits 13-17 optional argument count (5 bits, 0-31) +Bit 12 rest argument flag (*args) +Bits 7-11 post-rest argument count (5 bits, 0-31) +Bits 2-6 keyword argument count (5 bits, 0-31) +Bit 1 keyword rest flag (**kwargs) +Bit 0 block argument flag (&block) +``` + +Example: `def foo(a, b=1, *rest, &block)` produces an aspec with +1 required, 1 optional, rest flag set, and block flag set. + +## Presym: Compile-Time Symbols + +The presym system pre-allocates symbol IDs at build time for +frequently used method names and operators. This avoids runtime +string interning for common symbols. + +Generated by `lib/mruby/presym.rb`, the presym table maps symbol +names to compile-time constants: + +| Macro | Example | Symbol | +| ----------------- | --------------------- | ------------- | +| `MRB_SYM(name)` | `MRB_SYM(initialize)` | `:initialize` | +| `MRB_SYM_B(name)` | `MRB_SYM_B(map)` | `:map!` | +| `MRB_SYM_Q(name)` | `MRB_SYM_Q(nil)` | `:nil?` | +| `MRB_SYM_E(name)` | `MRB_SYM_E(name)` | `:name=` | +| `MRB_OPSYM(op)` | `MRB_OPSYM(add)` | `:+` | +| `MRB_IVSYM(name)` | `MRB_IVSYM(name)` | `:@name` | +| `MRB_CVSYM(name)` | `MRB_CVSYM(count)` | `:@@count` | +| `MRB_GVSYM(name)` | `MRB_GVSYM(stdout)` | `:$stdout` | + +## Binary Format (.mrb) + +Precompiled bytecode is stored in the RITE binary format: + +```text +Header: "RITE" magic + version ("0400") + CRC + size +Section IREP: instruction sequences, pools, symbols +Section DBG: debug info (optional, filename/line mapping) +Section LVAR: local variable names (optional) +Footer: "END\0" +``` + +Loading functions: + +- `mrb_load_irep(mrb, bin)`: load and execute from byte array +- `mrb_load_irep_buf(mrb, buf, len)`: load with explicit size + (safer) +- `mrb_read_irep(mrb, bin)`: load without executing (returns + `mrb_irep*`) +- `mrb_load_irep_file(mrb, fp)`: load from file + +The `mrbc` command-line tool performs ahead-of-time compilation: + +```shell +mrbc -o output.mrb source.rb # binary format +mrbc -Boutput source.rb # C array format +``` + +## Compilation Limits + +| Limit | Value | +| ---------------------- | ----------------------------- | +| Max nesting depth | 256 (`MRB_CODEGEN_LEVEL_MAX`) | +| Max local variables | 255 (uint16 `nlocals`) | +| Max symbols per irep | 65535 | +| Max operand (standard) | 255 (8-bit) | +| Max operand (extended) | 65535 (16-bit) | + +## Source Files + +| File | Contents | +| --------------------------------------- | ------------------------- | +| `mrbgems/mruby-compiler/core/parse.y` | Lrama/Bison grammar | +| `mrbgems/mruby-compiler/core/y.tab.c` | Generated parser | +| `mrbgems/mruby-compiler/core/codegen.c` | Code generator | +| `mrbgems/mruby-compiler/core/node.h` | AST node types | +| `include/mruby/irep.h` | IRep structure definition | +| `include/mruby/compile.h` | Compiler context API | +| `include/mruby/ops.h` | Opcode definitions | +| `src/load.c` | Binary format loader | +| `src/dump.c` | Binary format writer | +| `lib/mruby/presym.rb` | Presym table generator | diff --git a/doc/internal/gc.md b/doc/internal/gc.md new file mode 100644 index 0000000000..2e4ba8e0ef --- /dev/null +++ b/doc/internal/gc.md @@ -0,0 +1,487 @@ + + +# Garbage Collector Internals + +This document describes the internals of mruby's garbage collector +for developers working on `src/gc.c` and related code. + +**Read this if you are:** modifying core data structures that hold +object references (and need to add write barriers), debugging +memory leaks or GC-related crashes, tuning GC performance for an +embedded target, or working on the GC code itself. + +**For user-facing GC docs**, see +[gc-arena-howto.md](../guides/gc-arena-howto.md) (arena usage in C +extensions) and [memory.md](../guides/memory.md) (heap regions). + +## Overview + +mruby uses a **tri-color incremental mark-and-sweep** garbage collector +with an optional **generational mode**. The collector runs in small +incremental steps between VM instruction execution, avoiding long +pauses. + +## Tri-Color Model + +Every heap-allocated object has a color stored in +`RBasic::gc_color` (3 bits): + +| Color | Value | Meaning | +| -------------- | ------ | ------------------------------------ | +| White (A or B) | 1 or 2 | Unmarked, candidate for collection | +| Gray | 0 | Marked, but children not yet scanned | +| Black | 4 | Fully marked and scanned | +| Red | 7 | Static/ROM object, never collected | + +The GC uses two white types (A and B) in a flip-flop scheme. At the +start of each GC cycle, the meaning of "current white" is flipped by +XORing the white bits. This avoids recoloring all live objects at +cycle boundaries, which is an O(1) operation instead of O(n). + +```c +#define is_dead(s, o) \ + (((o)->gc_color & other_white_part(s) & GC_WHITES) || \ + (o)->tt == MRB_TT_FREE) +``` + +An object is dead if it still carries the previous cycle's white color. + +## Heap Structure + +### Heap Pages + +Objects are allocated from fixed-size heap pages: + +```text +mrb_heap_page ++-- freelist linked list of free slots ++-- next next page in heap list ++-- free_next next page with free slots ++-- old old generation flag (generational mode) ++-- region true if carved from a contiguous region ++-- objects[MRB_HEAP_PAGE_SIZE] RVALUE array (default 1024) +``` + +Each page holds `MRB_HEAP_PAGE_SIZE` objects (default 1024). On +64-bit systems, a page is approximately 40 KB (40 bytes per slot). + +### RVALUE Union + +All mruby object types share the same slot size via a C union: + +```text +RVALUE = union of { + RBasic, RObject, RClass, RString, RArray, RHash, + RRange, RData, RProc, REnv, RFiber, RException, ... + struct { RBasic header; RVALUE *next; } (free slot) +} +``` + +Free slots use the union space for a freelist pointer. + +### Freelist + +Each page maintains a singly-linked freelist of available slots. +Allocation pops from the freelist; deallocation during sweep +prepends to the freelist. The GC tracks pages with free slots in +`gc->free_heaps` for fast allocation. + +### Heap Regions + +For embedded systems with fixed memory banks, `mrb_gc_add_region()` +carves heap pages from a user-provided contiguous buffer: + +```c +static uint8_t heap_buf[64 * 1024]; +mrb_gc_add_region(mrb, heap_buf, sizeof(heap_buf)); +``` + +Region pages are never freed by the GC (even if all objects die). +When region pages are exhausted, allocation falls back to `malloc()`. + +## GC Phases + +The GC operates as a three-state machine: + +```text +GC_STATE_ROOT --> GC_STATE_MARK --> GC_STATE_SWEEP --> GC_STATE_ROOT +``` + +### Root Scan (GC_STATE_ROOT) + +Marks objects directly reachable from the VM: + +1. Global variables (`mrb_gc_mark_gv`) +2. GC arena (`gc->arena[0..arena_idx-1]`) +3. All built-in classes (Object, Class, Module, etc.) +4. Top-level self (`mrb->top_self`) +5. Current exception (`mrb->exc`) +6. Execution contexts (VM stacks, call info stacks, active fibers) +7. Task queues (if `MRB_USE_TASK_SCHEDULER` is defined) + +After root scanning, the white color is flipped. + +### Incremental Marking (GC_STATE_MARK) + +Gray objects are popped from the gray stack and their children +marked. Each step processes a limited number of objects: + +```text +limit = (GC_STEP_SIZE / 100) * step_ratio +``` + +With default `step_ratio = 200` and `GC_STEP_SIZE = 1024`, the +limit is 2048 objects per step. After each step, `gc_debt` is +decremented by the actual number of objects processed, so larger +steps repay more debt. + +When the gray stack is exhausted, the final marking phase re-marks +the arena and global variables to catch objects created during +marking, then transitions to sweep. + +### Sweep (GC_STATE_SWEEP) + +Iterates through heap pages. For each object: + +- If dead (previous cycle's white): call `obj_free()`, return + slot to freelist +- If alive: paint with current white for the next cycle + +Sweep is also incremental: `gc->sweeps` tracks the current page +position between steps. + +## Gray Stack + +The gray stack is a fixed-size array of object pointers: + +```c +struct RBasic *gray_stack[MRB_GRAY_STACK_SIZE]; /* default 1024 */ +size_t gray_stack_top; +mrb_bool gray_overflow; +``` + +When the stack overflows, `gray_overflow` is set to `TRUE`. During +marking, `gc_gray_rescan()` scans the entire heap to find any gray +objects that could not be pushed. This guarantees correctness at the +cost of a full heap scan. + +## Write Barriers + +During incremental marking, a black (fully marked) object storing +a reference to a white (unmarked) object creates a dangerous edge +that could lead to premature collection. Write barriers prevent this. + +### Field Write Barrier + +Used when assigning a specific field: + +```c +mrb_field_write_barrier(mrb, parent, child); +``` + +If `parent` is black and `child` is white: + +- During marking or generational mode: paint `child` gray (add to + gray stack for scanning) +- During sweep: paint `parent` with current white (demote it for + next cycle) + +### General Write Barrier + +Used when an object has been modified but the specific child is +not known: + +```c +mrb_write_barrier(mrb, obj); +``` + +Paints `obj` gray and pushes it onto the gray stack for re-scanning. + +## GC Arena + +The arena protects newly created objects from collection before +they are stored in a reachable location. Every `mrb_obj_alloc()` +automatically pushes the new object onto the arena. + +C extensions must save and restore the arena index when creating +many temporary objects to prevent arena overflow: + +```c +int ai = mrb_gc_arena_save(mrb); +/* create temporary objects */ +mrb_gc_arena_restore(mrb, ai); +``` + +### Fixed vs Dynamic Arena + +- **Dynamic** (default): arena grows by 1.5x when full. Risk of + unbounded growth if arena is not properly managed. +- **Fixed** (`MRB_GC_FIXED_ARENA`): raises an exception on overflow. + Arena size is `MRB_GC_ARENA_SIZE` (default 100). + +### Permanent Registration + +For long-lived C objects that must survive indefinitely: + +```c +mrb_gc_register(mrb, obj); /* add to permanent root */ +mrb_gc_unregister(mrb, obj); /* remove from root */ +``` + +These store objects in a global array that is always marked as +part of the root set. + +See [gc-arena-howto.md](../guides/gc-arena-howto.md) for detailed +usage patterns. + +## Generational Mode + +When enabled (default, unless `MRB_GC_TURN_OFF_GENERATIONAL` is +defined), the GC classifies objects into young and old generations. + +### Minor GC + +Only processes young objects. Pages where all objects are old are +marked with `page->old = TRUE` and skipped entirely during sweep. +Minor GC always runs to completion in a single step. + +### Major GC + +A full mark-and-sweep cycle that processes all objects. Triggered +when `gc->live > gc->oldgen_threshold`. Major GC runs +incrementally, like the non-generational collector. + +After a major GC completes, the collector reverts to minor GC mode. +The old-generation threshold is recalculated: + +```text +oldgen_threshold = live_after_mark * MAJOR_GC_INC_RATIO / 100 +``` + +With `MAJOR_GC_INC_RATIO = 120`, a major GC triggers when live +objects exceed 120% of the last major GC's survivors. + +### Transitioning Between Modes + +```c +mrb_gc_generational_mode_set(mrb, TRUE); /* enable */ +mrb_gc_generational_mode_set(mrb, FALSE); /* disable */ +``` + +From Ruby: `GC.generational_mode = true/false`. + +## Object Allocation + +`mrb_obj_alloc()` is the core allocation function: + +1. If `MRB_GC_STRESS` is defined, run a full GC +2. Increment `gc->gc_debt`; if positive, run `mrb_incremental_gc()` +3. Ensure arena has space (`gc_arena_keep`) +4. Pop an object from the freelist of `gc->free_heaps` +5. If no free pages, allocate a new page (`add_heap`) +6. Initialize the object (zero fill, set type and class) +7. Paint with current white color +8. Push onto arena (`gc_protect`) +9. Increment `gc->live` + +## Object Freeing + +`obj_free()` performs type-specific cleanup: + +- **Objects/Exceptions**: free instance variable tables +- **Classes**: free method tables and instance variable tables +- **Arrays**: free heap buffer (if not embedded/shared) +- **Hashes**: free hash table +- **Strings**: free heap buffer (if not embedded/shared) +- **Data objects**: call user-provided `dfree` callback +- **Procs**: decrement irep reference count +- **Fibers**: free context (stacks) + +The object's type is set to `MRB_TT_FREE` after freeing. + +## Triggering GC + +### Debt Model + +GC uses a **debt-based feedback model** to balance allocation +rate against collection work. The key field is `gc->gc_debt` +(signed integer): + +- **Negative** = credit (GC is ahead, no collection needed) +- **Zero** = balanced +- **Positive** = debt (allocation outpacing collection, GC runs) + +Each object allocation increments `gc_debt` by 1. When debt +goes positive, `mrb_incremental_gc()` runs. Each incremental +step decrements debt by `GC_STEP_SIZE` (1024), giving credit +for many future allocations. + +When a GC cycle completes, credit is calculated from +`interval_ratio`: + +```text +credit = (live_after_mark / 100) * interval_ratio - live_after_mark +minimum: GC_STEP_SIZE (1024) +gc_debt = -credit +``` + +With default `interval_ratio = 200` and 1000 live objects: +`credit = (1000/100)*200 - 1000 = 1000`, so approximately 1000 +allocations can occur before the next GC cycle begins. + +### Malloc Pressure + +When `gc->malloc_threshold` is set (non-zero), the GC also +tracks bytes allocated through `mrb_realloc_simple()` in +`gc->malloc_increase`. When `malloc_increase` exceeds +`malloc_threshold`, the counter resets and an incremental GC +step runs. This captures memory pressure from large buffers +(e.g., long strings) that would otherwise be invisible to the +object-count-based debt model. + +### Manual + +```c +mrb_full_gc(mrb); /* force complete GC cycle */ +mrb_garbage_collect(mrb); /* public API wrapper */ +``` + +From Ruby: `GC.start`. + +## Configuration + +### Compile-Time + +| Macro | Default | Description | +| ------------------------------ | ------- | --------------------------------------- | +| `MRB_HEAP_PAGE_SIZE` | 1024 | Objects per heap page | +| `MRB_GRAY_STACK_SIZE` | 1024 | Gray stack capacity | +| `MRB_GC_ARENA_SIZE` | 100 | Arena size (fixed mode) or initial size | +| `MRB_GC_FIXED_ARENA` | off | Use fixed-size arena | +| `MRB_GC_TURN_OFF_GENERATIONAL` | off | Disable generational mode | +| `MRB_GC_STRESS` | off | Full GC on every allocation (debug) | +| `MRB_GC_STATS` | off | Enable GC statistics counters | +| `MRB_USE_MALLOC_TRIM` | off | Call `malloc_trim()` after full GC | + +### Runtime + +From Ruby code: + +```ruby +GC.interval_ratio = 200 # controls debt credit after GC cycle +GC.step_ratio = 200 # objects per incremental step +GC.step_limit = 0 # 0=unlimited, >0=absolute step cap +GC.malloc_threshold = 0 # 0=disabled, >0=bytes to trigger GC +GC.generational_mode = true +GC.start # force full GC +GC.enable # re-enable GC +GC.disable # disable GC +``` + +### GC Statistics + +`GC.stat` returns a Hash with GC state and statistics: + +```ruby +GC.stat +# => { +# :live => 5432, # live object count +# :debt => -1024, # GC debt (negative=credit, positive=behind) +# :state => 0, # 0=root, 1=marking, 2=sweeping +# :generational => true, # generational mode enabled +# :full => false, # major GC in progress +# :step_limit => 0, # current step limit setting +# :malloc_increase => 8192, # malloc bytes since last cycle +# :malloc_threshold => 0, # current malloc threshold setting +# } +``` + +With `MRB_GC_STATS` enabled, additional keys are available: + +```ruby +# :total => 15, # total GC invocations +# :minor => 12, # minor GC count +# :major => 3, # major GC count +``` + +### Tuning Guide + +**`interval_ratio`** (default 200): Controls how many allocations +occur between GC cycles. Higher values reduce GC frequency but +increase peak memory. The debt credit after each cycle is +`(live_after_mark / 100) * interval_ratio - live_after_mark`. + +**`step_ratio`** (default 200): Controls how much work each +incremental step performs. Higher values make each step larger, +reducing total GC overhead but increasing individual pause times. + +**`step_limit`** (default 0, unlimited): Caps the maximum work +per incremental step regardless of `step_ratio`. Useful for +real-time applications that need bounded pause times. The +effective step size is `min(step_ratio calculation, step_limit)`. + +**`malloc_threshold`** (default 0, disabled): Triggers GC when +cumulative `malloc`/`realloc` bytes exceed this threshold. Useful +when applications allocate large buffers (strings, data objects) +that create memory pressure without proportional object count +increase. + +### Practical Tuning Examples + +**Allocation-heavy workloads** (many short-lived Procs, closures, +blocks): GC sweep dominates because of high object churn. Increase +`interval_ratio` to reduce GC frequency: + +```ruby +GC.interval_ratio = 400 # ~12% faster than default (200) +``` + +Higher values (400-600) reduce sweep overhead at the cost of more +dead objects accumulating before collection. Values above 600 show +diminishing returns. Peak memory usage increases temporarily, but +live object count after GC remains the same. + +**CPU-intensive workloads** (numeric computation, recursive methods +with no object allocation): GC parameters have negligible impact +because GC rarely runs. No tuning needed. + +**Real-time or latency-sensitive** applications: Use `step_limit` +to bound pause times: + +```ruby +GC.step_limit = 256 # cap incremental step to 256 objects +``` + +This makes GC pauses more predictable but increases total GC +overhead (more steps needed per cycle). + +**Large buffer workloads** (reading files, building long strings): +Set `malloc_threshold` to trigger GC when buffer allocations +accumulate, even if object count is low: + +```ruby +GC.malloc_threshold = 1024 * 1024 # trigger GC per ~1MB allocated +``` + +### Diagnosing GC Overhead + +Use `GC.stat` to monitor GC behavior at runtime: + +```ruby +s = GC.stat +puts "live objects: #{s[:live]}" +puts "GC debt: #{s[:debt]}" # positive = GC is behind +puts "GC state: #{s[:state]}" # 0=idle, 1=marking, 2=sweeping +``` + +If `debt` is frequently positive during performance-critical +sections, increase `interval_ratio`. If memory usage is too high, +decrease it. + +## Source Files + +| File | Contents | +| -------------------- | --------------------------------- | +| `src/gc.c` | GC implementation | +| `include/mruby/gc.h` | `mrb_gc` structure, public GC API | +| `include/mruby.h` | Arena save/restore macros | diff --git a/doc/internal/opcode.md b/doc/internal/opcode.md index 230848cb5d..e54ec1fa2f 100644 --- a/doc/internal/opcode.md +++ b/doc/internal/opcode.md @@ -1,128 +1,206 @@ -# The new bytecode - -We will reimplement the VM to use 8bit instruction code. By -bytecode, we mean real byte code. The whole purpose is -reducing the memory consumption of mruby VM. - -# Instructions - -Instructions are bytes. There can be 256 instructions. Currently, we -have 94 instructions. Instructions can take 0 to 3 operands. - -## operands - -The size of operands can be either 8bits, 16bits or 24bits. -In the table.1 below, the second field describes the size (and -sign) of operands. - -* B: 8bit -* S: 16bit -* sS: signed 16bit -* W: 24bit - -## table.1 Instruction Table - -| Instruction Name | Operand type | Semantics | -|--------------------|----------------|------------------------------------------------------------| -| `OP_NOP` | `-` | `no operation` | -| `OP_MOVE` | `BB` | `R(a) = R(b)` | -| `OP_LOADL` | `BB` | `R(a) = Pool(b)` | -| `OP_LOADI` | `BB` | `R(a) = mrb_int(b)` | -| `OP_LOADINEG` | `BB` | `R(a) = mrb_int(-b)` | -| `OP_LOADI__1` | `B` | `R(a) = mrb_int(-1)` | -| `OP_LOADI_0` | `B` | `R(a) = mrb_int(0)` | -| `OP_LOADI_1` | `B` | `R(a) = mrb_int(1)` | -| `OP_LOADI_2` | `B` | `R(a) = mrb_int(2)` | -| `OP_LOADI_3` | `B` | `R(a) = mrb_int(3)` | -| `OP_LOADI_4` | `B` | `R(a) = mrb_int(4)` | -| `OP_LOADI_5` | `B` | `R(a) = mrb_int(5)` | -| `OP_LOADI_6` | `B` | `R(a) = mrb_int(6)` | -| `OP_LOADI_7` | `B` | `R(a) = mrb_int(7)` | -| `OP_LOADI16` | `BS` | `R(a) = mrb_int(b)` | -| `OP_LOADI32` | `BSS` | `R(a) = mrb_int((b<<16)+c)` | -| `OP_LOADSYM` | `BB` | `R(a) = Syms(b)` | -| `OP_LOADNIL` | `B` | `R(a) = nil` | -| `OP_LOADSELF` | `B` | `R(a) = self` | -| `OP_LOADT` | `B` | `R(a) = true` | -| `OP_LOADF` | `B` | `R(a) = false` | -| `OP_GETGV` | `BB` | `R(a) = getglobal(Syms(b))` | -| `OP_SETGV` | `BB` | `setglobal(Syms(b), R(a))` | -| `OP_GETSV` | `BB` | `R(a) = Special[Syms(b)]` | -| `OP_SETSV` | `BB` | `Special[Syms(b)] = R(a)` | -| `OP_GETIV` | `BB` | `R(a) = ivget(Syms(b))` | -| `OP_SETIV` | `BB` | `ivset(Syms(b),R(a))` | -| `OP_GETCV` | `BB` | `R(a) = cvget(Syms(b))` | -| `OP_SETCV` | `BB` | `cvset(Syms(b),R(a))` | -| `OP_GETCONST` | `BB` | `R(a) = constget(Syms(b))` | -| `OP_SETCONST` | `BB` | `constset(Syms(b),R(a))` | -| `OP_GETMCNST` | `BB` | `R(a) = R(a)::Syms(b)` | -| `OP_SETMCNST` | `BB` | `R(a+1)::Syms(b) = R(a)` | -| `OP_GETUPVAR` | `BBB` | `R(a) = uvget(b,c)` | -| `OP_SETUPVAR` | `BBB` | `uvset(b,c,R(a))` | -| `OP_JMP` | `S` | `pc+=a` | -| `OP_JMPIF` | `BS` | `if R(a) pc+=b` | -| `OP_JMPNOT` | `BS` | `if !R(a) pc+=b` | -| `OP_JMPNIL` | `BS` | `if R(a)==nil pc+=b` | -| `OP_JMPUW` | `S` | `unwind_and_jump_to(a)` | -| `OP_EXCEPT` | `B` | `R(a) = exc` | -| `OP_RESCUE` | `BB` | `R(b) = R(a).isa?(R(b))` | -| `OP_RAISEIF` | `B` | `raise(R(a)) if R(a)` | -| `OP_SEND` | `BBB` | `R(a) = call(R(a),Syms(b),R(a+1),...,R(a+c))` | -| `OP_SENDB` | `BBB` | `R(a) = call(R(a),Syms(b),R(a+1),...,R(a+c),&R(a+c+1))` | -| `OP_CALL` | `-` | `R(0) = self.call(frame.argc, frame.argv)` | -| `OP_SUPER` | `BB` | `R(a) = super(R(a+1),... ,R(a+b+1))` | -| `OP_ARGARY` | `BS` | `R(a) = argument array (16=m5:r1:m5:d1:lv4)` | -| `OP_ENTER` | `W` | `arg setup according to flags (23=m5:o5:r1:m5:k5:d1:b1)` | -| `OP_KEY_P` | `BB` | `R(a) = kdict.key?(Syms(b))` | -| `OP_KEYEND` | `-` | `raise unless kdict.empty?` | -| `OP_KARG` | `BB` | `R(a) = kdict[Syms(b)]; kdict.delete(Syms(b))` | -| `OP_RETURN` | `B` | `return R(a) (normal)` | -| `OP_RETURN_BLK` | `B` | `return R(a) (in-block return)` | -| `OP_BREAK` | `B` | `break R(a)` | -| `OP_BLKPUSH` | `BS` | `R(a) = block (16=m5:r1:m5:d1:lv4)` | -| `OP_ADD` | `B` | `R(a) = R(a)+R(a+1)` | -| `OP_ADDI` | `BB` | `R(a) = R(a)+mrb_int(b)` | -| `OP_SUB` | `B` | `R(a) = R(a)-R(a+1)` | -| `OP_SUBI` | `BB` | `R(a) = R(a)-mrb_int(b)` | -| `OP_MUL` | `B` | `R(a) = R(a)*R(a+1)` | -| `OP_DIV` | `B` | `R(a) = R(a)/R(a+1)` | -| `OP_EQ` | `B` | `R(a) = R(a)==R(a+1)` | -| `OP_LT` | `B` | `R(a) = R(a)R(a+1)` | -| `OP_GE` | `B` | `R(a) = R(a)>=R(a+1)` | -| `OP_ARRAY` | `BB` | `R(a) = ary_new(R(a),R(a+1)..R(a+b))` | -| `OP_ARRAY2` | `BBB` | `R(a) = ary_new(R(b),R(b+1)..R(b+c))` | -| `OP_ARYCAT` | `B` | `ary_cat(R(a),R(a+1))` | -| `OP_ARYPUSH` | `BB` | `ary_push(R(a),R(a+1)..R(a+b))` | -| `OP_ARYDUP` | `B` | `R(a) = ary_dup(R(a))` | -| `OP_AREF` | `BBB` | `R(a) = R(b)[c]` | -| `OP_ASET` | `BBB` | `R(b)[c] = R(a)` | -| `OP_APOST` | `BBB` | `*R(a),R(a+1)..R(a+c) = R(a)[b..]` | -| `OP_INTERN` | `B` | `R(a) = intern(R(a))` | -| `OP_STRING` | `BB` | `R(a) = str_dup(Pool(b))` | -| `OP_STRCAT` | `B` | `str_cat(R(a),R(a+1))` | -| `OP_HASH` | `BB` | `R(a) = hash_new(R(a),R(a+1)..R(a+b*2-1))` | -| `OP_HASHADD` | `BB` | `hash_push(R(a),R(a+1)..R(a+b*2))` | -| `OP_HASHCAT` | `B` | `R(a) = hash_cat(R(a),R(a+1))` | -| `OP_LAMBDA` | `BB` | `R(a) = lambda(Irep(b),OP_L_LAMBDA)` | -| `OP_BLOCK` | `BB` | `R(a) = lambda(Irep(b),OP_L_BLOCK)` | -| `OP_METHOD` | `BB` | `R(a) = lambda(Irep(b),OP_L_METHOD)` | -| `OP_RANGE_INC` | `B` | `R(a) = range_new(R(a),R(a+1),FALSE)` | -| `OP_RANGE_EXC` | `B` | `R(a) = range_new(R(a),R(a+1),TRUE)` | -| `OP_OCLASS` | `B` | `R(a) = ::Object` | -| `OP_CLASS` | `BB` | `R(a) = newclass(R(a),Syms(b),R(a+1))` | -| `OP_MODULE` | `BB` | `R(a) = newmodule(R(a),Syms(b))` | -| `OP_EXEC` | `BB` | `R(a) = blockexec(R(a),Irep[b])` | -| `OP_DEF` | `BB` | `R(a).newmethod(Syms(b),R(a+1)); R(a) = Syms(b)` | -| `OP_ALIAS` | `BB` | `alias_method(target_class,Syms(a),Syms(b))` | -| `OP_UNDEF` | `B` | `undef_method(target_class,Syms(a))` | -| `OP_SCLASS` | `B` | `R(a) = R(a).singleton_class` | -| `OP_TCLASS` | `B` | `R(a) = target_class` | -| `OP_DEBUG` | `BBB` | `print a,b,c` | -| `OP_ERR` | `B` | `raise(LocalJumpError, Pool(a))` | -| `OP_EXT1` | `-` | `make 1st operand 16bit` | -| `OP_EXT2` | `-` | `make 2nd operand 16bit` | -| `OP_EXT3` | `-` | `make 1st and 2nd operands 16bit` | -| `OP_STOP` | `-` | `stop VM` | -| ------------------ | -------------- | ---------------------------------------------------------- | + + +# mruby Bytecode + +mruby uses 8-bit instruction opcodes. Each instruction is a single byte, +allowing up to 256 opcodes. Instructions can take 0 to 3 operands. + +## Operands + +The size of operands can be either 8 bits, 16 bits, or 24 bits. +In the instruction table below, the operand type field describes +the size of each operand. + +- `Z`: no operand +- `B`: 8 bits +- `S`: 16 bits +- `W`: 24 bits + +If the first and second operands are of type `B` (8 bits), they may be +extended to 16 bits by the operand extension instruction immediately +preceding them. +See also `OP_EXT1`, `OP_EXT2` and `OP_EXT3`. + +## Instruction Table + +| No. | Instruction Name | Operand type | Semantics | +| --: | ---------------- | ------------ | ----------------------------------------------------------------- | +| 0 | `OP_NOP` | `Z` | no operation | +| 1 | `OP_MOVE` | `BB` | `R[a] = R[b]` | +| 2 | `OP_LOADL` | `BB` | `R[a] = Pool[b]` | +| 3 | `OP_LOADI8` | `BB` | `R[a] = mrb_int(b)` | +| 4 | `OP_LOADINEG` | `BB` | `R[a] = mrb_int(-b)` | +| 5 | `OP_LOADI__1` | `B` | `R[a] = mrb_int(-1)` | +| 6 | `OP_LOADI_0` | `B` | `R[a] = mrb_int(0)` | +| 7 | `OP_LOADI_1` | `B` | `R[a] = mrb_int(1)` | +| 8 | `OP_LOADI_2` | `B` | `R[a] = mrb_int(2)` | +| 9 | `OP_LOADI_3` | `B` | `R[a] = mrb_int(3)` | +| 10 | `OP_LOADI_4` | `B` | `R[a] = mrb_int(4)` | +| 11 | `OP_LOADI_5` | `B` | `R[a] = mrb_int(5)` | +| 12 | `OP_LOADI_6` | `B` | `R[a] = mrb_int(6)` | +| 13 | `OP_LOADI_7` | `B` | `R[a] = mrb_int(7)` | +| 14 | `OP_LOADI16` | `BS` | `R[a] = mrb_int(b)` | +| 15 | `OP_LOADI32` | `BSS` | `R[a] = mrb_int((b<<16)+c)` | +| 16 | `OP_LOADSYM` | `BB` | `R[a] = Syms[b]` | +| 17 | `OP_LOADNIL` | `B` | `R[a] = nil` | +| 18 | `OP_LOADSELF` | `B` | `R[a] = self` | +| 19 | `OP_LOADTRUE` | `B` | `R[a] = true` | +| 20 | `OP_LOADFALSE` | `B` | `R[a] = false` | +| 21 | `OP_GETGV` | `BB` | `R[a] = getglobal(Syms[b])` | +| 22 | `OP_SETGV` | `BB` | `setglobal(Syms[b], R[a])` | +| 23 | `OP_GETSV` | `BB` | `R[a] = Special[Syms[b]]` | +| 24 | `OP_SETSV` | `BB` | `Special[Syms[b]] = R[a]` | +| 25 | `OP_GETIV` | `BB` | `R[a] = ivget(Syms[b])` | +| 26 | `OP_SETIV` | `BB` | `ivset(Syms[b],R[a])` | +| 27 | `OP_GETCV` | `BB` | `R[a] = cvget(Syms[b])` | +| 28 | `OP_SETCV` | `BB` | `cvset(Syms[b],R[a])` | +| 29 | `OP_GETCONST` | `BB` | `R[a] = constget(Syms[b])` | +| 30 | `OP_SETCONST` | `BB` | `constset(Syms[b],R[a])` | +| 31 | `OP_GETMCNST` | `BB` | `R[a] = R[a]::Syms[b]` | +| 32 | `OP_SETMCNST` | `BB` | `R[a+1]::Syms[b] = R[a]` | +| 33 | `OP_GETUPVAR` | `BBB` | `R[a] = uvget(b,c)` | +| 34 | `OP_SETUPVAR` | `BBB` | `uvset(b,c,R[a])` | +| 35 | `OP_GETIDX` | `B` | `R[a] = R[a][R[a+1]]` | +| 36 | `OP_GETIDX0` | `BB` | `R[a] = R[b][0]` | +| 37 | `OP_SETIDX` | `B` | `R[a][R[a+1]] = R[a+2]` | +| 38 | `OP_JMP` | `S` | `pc += a` | +| 39 | `OP_JMPIF` | `BS` | `if R[a] pc += b` | +| 40 | `OP_JMPNOT` | `BS` | `if !R[a] pc += b` | +| 41 | `OP_JMPNIL` | `BS` | `if R[a]==nil pc += b` | +| 42 | `OP_JMPUW` | `S` | `unwind_and_jump_to(a)` | +| 43 | `OP_EXCEPT` | `B` | `R[a] = exc` | +| 44 | `OP_RESCUE` | `BB` | `R[b] = R[a].isa?(R[b])` | +| 45 | `OP_RAISEIF` | `B` | `raise(R[a]) if R[a]` | +| 46 | `OP_MATCHERR` | `B` | `raise NoMatchingPatternError unless R[a]` | +| 47 | `OP_SSEND` | `BBB` | `R[a] = self.send(Syms[b],R[a+1]..,R[a+n+1]:R[a+n+2]..) (c=n\ | +| 48 | `OP_SSEND0` | `BB` | `R[a] = self.send(Syms[b])` (no args) | +| 49 | `OP_SSENDB` | `BBB` | `R[a] = self.send(Syms[b],R[a+1]..,&R[a+n+2k+1])` | +| 50 | `OP_SEND` | `BBB` | `R[a] = R[a].send(Syms[b],R[a+1]..,R[a+n+1]:R[a+n+2]..) (c=n\ | +| 51 | `OP_SEND0` | `BB` | `R[a] = R[a].send(Syms[b])` (no args) | +| 52 | `OP_SENDB` | `BBB` | `R[a] = R[a].send(Syms[b],R[a+1]..,&R[a+n+2k+1])` | +| 53 | `OP_CALL` | `Z` | `self.call(*, **, &)` (tailcall) | +| 54 | `OP_BLKCALL` | `BB` | `R[a] = R[a].call(R[a+1],...,R[a+b])` (direct block call) | +| 55 | `OP_SUPER` | `BB` | `R[a] = super(R[a+1],...,R[a+b+1])` | +| 56 | `OP_ARGARY` | `BS` | `R[a] = argument array (16=m5:r1:m5:d1:lv4)` | +| 57 | `OP_ENTER` | `W` | `arg setup according to flags (24=n1:m5:o5:r1:m5:k5:d1:b1)` | +| 58 | `OP_KEY_P` | `BB` | `R[a] = kdict.key?(Syms[b])` | +| 59 | `OP_KEYEND` | `Z` | `raise unless kdict.empty?` | +| 60 | `OP_KARG` | `BB` | `R[a] = kdict[Syms[b]]; kdict.delete(Syms[b])` | +| 61 | `OP_RETURN` | `B` | `return R[a]` (normal) | +| 62 | `OP_RETURN_BLK` | `B` | `return R[a]` (in-block return) | +| 63 | `OP_RETSELF` | `Z` | `return self` | +| 64 | `OP_RETNIL` | `Z` | `return nil` | +| 65 | `OP_RETTRUE` | `Z` | `return true` | +| 66 | `OP_RETFALSE` | `Z` | `return false` | +| 67 | `OP_BREAK` | `B` | `break R[a]` | +| 68 | `OP_BLKPUSH` | `BS` | `R[a] = block (16=m5:r1:m5:d1:lv4)` | +| 69 | `OP_ADD` | `B` | `R[a] = R[a] + R[a+1]` | +| 70 | `OP_ADDI` | `BB` | `R[a] = R[a] + mrb_int(b)` | +| 71 | `OP_SUB` | `B` | `R[a] = R[a] - R[a+1]` | +| 72 | `OP_SUBI` | `BB` | `R[a] = R[a] - mrb_int(b)` | +| 73 | `OP_ADDILV` | `BBB` | `R[a] = R[a] + mrb_int(c)` (with local variable fallback) | +| 74 | `OP_SUBILV` | `BBB` | `R[a] = R[a] - mrb_int(c)` (with local variable fallback) | +| 75 | `OP_MUL` | `B` | `R[a] = R[a] * R[a+1]` | +| 76 | `OP_DIV` | `B` | `R[a] = R[a] / R[a+1]` | +| 77 | `OP_EQ` | `B` | `R[a] = R[a] == R[a+1]` | +| 78 | `OP_LT` | `B` | `R[a] = R[a] < R[a+1]` | +| 79 | `OP_LE` | `B` | `R[a] = R[a] <= R[a+1]` | +| 80 | `OP_GT` | `B` | `R[a] = R[a] > R[a+1]` | +| 81 | `OP_GE` | `B` | `R[a] = R[a] >= R[a+1]` | +| 82 | `OP_ARRAY` | `BB` | `R[a] = ary_new(R[a],R[a+1]..R[a+b])` | +| 83 | `OP_ARRAY2` | `BBB` | `R[a] = ary_new(R[b],R[b+1]..R[b+c])` | +| 84 | `OP_ARYCAT` | `B` | `ary_cat(R[a],R[a+1])` | +| 85 | `OP_ARYPUSH` | `BB` | `ary_push(R[a],R[a+1]..R[a+b])` | +| 86 | `OP_ARYSPLAT` | `B` | `R[a] = ary_splat(R[a])` | +| 87 | `OP_AREF` | `BBB` | `R[a] = R[b][c]` | +| 88 | `OP_ASET` | `BBB` | `R[b][c] = R[a]` | +| 89 | `OP_APOST` | `BBB` | `*R[a],R[a+1]..R[a+c] = R[a][b..]` | +| 90 | `OP_INTERN` | `B` | `R[a] = intern(R[a])` | +| 91 | `OP_SYMBOL` | `BB` | `R[a] = intern(Pool[b])` | +| 92 | `OP_STRING` | `BB` | `R[a] = str_dup(Pool[b])` | +| 93 | `OP_STRCAT` | `B` | `str_cat(R[a],R[a+1])` | +| 94 | `OP_HASH` | `BB` | `R[a] = hash_new(R[a],R[a+1]..R[a+b*2-1])` | +| 95 | `OP_HASHADD` | `BB` | `hash_push(R[a],R[a+1]..R[a+b*2])` | +| 96 | `OP_HASHCAT` | `B` | `R[a] = hash_cat(R[a],R[a+1])` | +| 97 | `OP_LAMBDA` | `BB` | `R[a] = lambda(Irep[b],L_LAMBDA)` | +| 98 | `OP_BLOCK` | `BB` | `R[a] = lambda(Irep[b],L_BLOCK)` | +| 99 | `OP_METHOD` | `BB` | `R[a] = lambda(Irep[b],L_METHOD)` | +| 100 | `OP_RANGE_INC` | `B` | `R[a] = range_new(R[a],R[a+1],FALSE)` | +| 101 | `OP_RANGE_EXC` | `B` | `R[a] = range_new(R[a],R[a+1],TRUE)` | +| 102 | `OP_OCLASS` | `B` | `R[a] = ::Object` | +| 103 | `OP_CLASS` | `BB` | `R[a] = newclass(R[a],Syms[b],R[a+1])` | +| 104 | `OP_MODULE` | `BB` | `R[a] = newmodule(R[a],Syms[b])` | +| 105 | `OP_EXEC` | `BB` | `R[a] = blockexec(R[a],Irep[b])` | +| 106 | `OP_DEF` | `BB` | `R[a].newmethod(Syms[b],R[a+1]); R[a] = Syms[b]` | +| 107 | `OP_TDEF` | `BBB` | `target_class.newmethod(Syms[b],Irep[c]); R[a] = Syms[b]` | +| 108 | `OP_SDEF` | `BBB` | `R[a].singleton_class.newmethod(Syms[b],Irep[c]); R[a] = Syms[b]` | +| 109 | `OP_ALIAS` | `BB` | `alias_method(target_class,Syms[a],Syms[b])` | +| 110 | `OP_UNDEF` | `B` | `undef_method(target_class,Syms[a])` | +| 111 | `OP_SCLASS` | `B` | `R[a] = R[a].singleton_class` | +| 112 | `OP_TCLASS` | `B` | `R[a] = target_class` | +| 113 | `OP_DEBUG` | `BBB` | `print a,b,c` | +| 114 | `OP_ERR` | `B` | `raise(LocalJumpError, Pool[a])` | +| 115 | `OP_EXT1` | `Z` | make 1st operand (a) 16 bit | +| 116 | `OP_EXT2` | `Z` | make 2nd operand (b) 16 bit | +| 117 | `OP_EXT3` | `Z` | make 1st and 2nd operands 16 bit | +| 118 | `OP_STOP` | `Z` | stop VM | + +## Notes + +### OP_SEND0 / OP_SSEND0 + +These are optimized versions of `OP_SEND` / `OP_SSEND` for zero-argument +method calls (no operand `c` needed). + +### OP_RETSELF / OP_RETNIL / OP_RETTRUE / OP_RETFALSE + +These are optimized return instructions that avoid loading a value into +a register before returning. Common patterns like `attr_reader` methods +(`return self.@x`) and predicate methods (`return true`/`return false`) +benefit from these specialized opcodes. + +### OP_BLKCALL + +Direct block invocation that bypasses method dispatch. Used when calling +a block argument directly (e.g., `yield` or `block.call`). + +### OP_ADDILV / OP_SUBILV + +Optimized integer increment/decrement that keeps operands for method +call fallback when the receiver is not a Fixnum. + +### OP_TDEF / OP_SDEF + +Optimized method definition. `OP_TDEF` defines a method on the +`target_class` directly from an irep without creating an intermediate +`RProc`. `OP_SDEF` does the same for singleton methods. + +### OP_MATCHERR + +Raises `NoMatchingPatternError` when a pattern match fails. Used by +the `case`/`in` pattern matching syntax. + +### OP_GETIDX / OP_GETIDX0 / OP_SETIDX Optimization + +These instructions optimize `[]` and `[]=` access for Array, Hash, and String. + +**OP_GETIDX** uses direct function calls: + +- `Array`: `mrb_ary_entry()` (integer index only) +- `Hash`: `mrb_hash_get()` +- `String`: `mrb_str_aref()` (integer/string/range index) + +**OP_GETIDX0** is a specialized variant for index 0 (e.g., `ary[0]`). + +**OP_SETIDX** uses direct function calls: + +- `Array`: `mrb_ary_set()` (integer index only) +- `Hash`: `mrb_hash_set()` + +**Fallback to method dispatch** occurs when: + +- Object is a subclass (e.g., `MyArray < Array`) +- Object has a singleton class (singleton methods defined) +- Index type is not supported (e.g., non-integer for Array) + +This allows subclasses to override `[]`/`[]=` while base classes remain optimized. diff --git a/doc/internal/vm.md b/doc/internal/vm.md new file mode 100644 index 0000000000..10af1a2828 --- /dev/null +++ b/doc/internal/vm.md @@ -0,0 +1,325 @@ + + +# Virtual Machine Internals + +This document describes mruby's virtual machine for developers +working on `src/vm.c` and related code. + +**Read this if you are:** debugging method dispatch or call frame +issues, working on exception handling, implementing new opcodes, +modifying fiber/coroutine behavior, or optimizing the dispatch loop. + +For the instruction set, see [opcode.md](opcode.md). For the +compiler that generates bytecode, see [compiler.md](compiler.md). + +## Execution Model + +mruby uses a **register-based VM**. Local variables and temporaries +occupy fixed register slots determined at compile time. Each method +call gets its own register window on a shared value stack. + +## Execution Context + +The VM state is stored in `mrb_context`: + +```text +mrb_context ++-- stbase..stend value stack (mrb_value[]) ++-- cibase..ciend call info stack (mrb_callinfo[]) ++-- ci current call frame pointer ++-- status fiber state ++-- prev previous context (fiber chain) ++-- vmexec VM execution state flag +``` + +The value stack and call info stack grow independently. Each fiber +has its own `mrb_context`. + +### Stack Sizing + +- Initial value stack: 128 entries (`STACK_INIT_SIZE`) +- Initial call info stack: 32 entries (`CALLINFO_INIT_SIZE`) +- Growth factor: 1.5x (or 2x with `MRB_STACK_EXTEND_DOUBLING`) +- Minimum growth: 128 entries (`MRB_STACK_GROWTH`) +- Max stack depth: `MRB_STACK_MAX` (0x40000 - 128) +- Max call depth: `MRB_CALL_LEVEL_MAX` (512, or 128 with ASAN) + +Exceeding either limit raises `SystemStackError`. + +When the value stack is reallocated, all `REnv` objects and +`mrb_callinfo` stack pointers are adjusted by the delta +(`envadjust` function). + +## Call Frames + +Each method or block call pushes a `mrb_callinfo` frame: + +```text +mrb_callinfo ++-- n:4 positional argument count (0-14, 15 = varargs) ++-- nk:4 keyword argument count (0-14, 15 = varargs) ++-- cci call context info (NONE, DIRECT, SKIP, RESUMED) ++-- vis visibility flags (public/private/protected) ++-- mid method symbol ++-- proc current RProc ++-- blk block argument (RProc*) ++-- stack pointer into value stack ++-- pc program counter (bytecode position) ++-- u.env closure environment (REnv*) ++-- u.target_class receiver's class +``` + +### Stack Layout Per Frame + +```text +ci->stack: + [0] self (receiver) + [1..n] positional arguments + [n+1..] keyword argument pairs (key, value, key, value, ...) + [bidx] block argument + [bidx+1..] local variables and temporaries +``` + +### Argument Count Encoding + +The `n` and `nk` fields are 4 bits each (0-15). When `n == 15`, +positional arguments are packed into a single Array in register 1. +When `nk == 15`, keyword arguments are packed into a single Hash. + +The block index is calculated by `mrb_bidx(n, nk)`: + +```text +if n == 15: n = 1 (array) +if nk == 15: n += 1 (hash) +else: n += nk * 2 (key-value pairs) +return n + 1 (skip self) +``` + +### Call Context Info (cci) + +| Value | Name | Meaning | +| ----- | --------------- | ------------------------------------- | +| 0 | `CINFO_NONE` | Normal VM-to-VM call | +| 1 | `CINFO_DIRECT` | Explicit VM call (block, lambda.call) | +| 2 | `CINFO_SKIP` | Skip frame in stack traces | +| 3 | `CINFO_RESUMED` | Fiber resumed (stop execution) | + +## Dispatch Loop + +The main loop in `mrb_vm_run()` decodes and dispatches opcodes. +Two dispatch strategies are available: + +- **Computed goto** (default on GCC/Clang): a jump table of label + addresses (`optable[]`) for direct dispatch. Faster due to + better branch prediction. +- **Switch-based** (`MRB_USE_VM_SWITCH_DISPATCH`): a standard + `switch(insn)` statement. Default on MSVC and other compilers. + +The dispatch loop is wrapped in `MRB_TRY`/`MRB_CATCH` for exception +handling (see [Exception Handling](#exception-handling)). + +## Method Dispatch + +When `OP_SEND` (or `OP_SSEND`, `OP_SUPER`) executes: + +### 1. Prepare Arguments + +Determine argument layout. If argument count < 15, the fast path +uses inline registers. Otherwise, arguments are packed into an +Array (varargs mode). + +### 2. Push Call Frame + +```c +ci = cipush(mrb, a, CINFO_DIRECT, NULL, NULL, blk, mid, argc); +``` + +The new frame's stack starts at the previous frame's stack + `a` +(the receiver's register index). + +### 3. Method Lookup + +The lookup sequence: + +1. **Method cache check**: hash table lookup by `(class, mid)`. + Default cache size: `MRB_METHOD_CACHE_SIZE` (256 entries). +2. **Method table walk**: if cache misses, search the receiver's + class method table (`mt`), then walk the superclass chain. +3. **Cache store**: on successful lookup, store in the cache. + +The method cache is invalidated when classes are modified +(`mrb_mc_clear_by_class`). + +### 4. Invoke + +- **Ruby method** (irep-based): extend the stack to `irep->nregs`, + set `ci->pc` to `irep->iseq`, and jump to the new bytecode. +- **C function**: call `func(mrb, recv)` directly, then pop the + call frame and store the return value. + +### 5. Visibility Check + +Private methods are only callable without an explicit receiver. +Protected methods are callable from the same class hierarchy. +Violations raise `NoMethodError`. + +## Exception Handling + +### setjmp/longjmp + +By default, mruby uses `setjmp`/`longjmp` for exception control +flow: + +```c +MRB_TRY(&c_jmp) { + mrb->jmp = &c_jmp; + /* dispatch loop */ +} +MRB_CATCH(&c_jmp) { + /* handle exception */ +} +MRB_END_EXC(&c_jmp); +``` + +With `MRB_USE_CXX_EXCEPTION`, C++ `try`/`catch` is used instead. + +### Handler Table + +Each irep contains a catch handler table (appended after iseq in +memory) with entries for `rescue` and `ensure` blocks: + +```text +mrb_irep_catch_handler ++-- type RESCUE (0) or ENSURE (1) ++-- begin[4] start PC of protected range ++-- end[4] end PC of protected range ++-- target[4] jump target when handler matches +``` + +### Unwinding Process + +When an exception occurs: + +1. Search the current irep's catch handler table (reverse order) + for a handler covering the current PC +2. If an `ensure` handler is found: execute it (may re-raise) +3. If a `rescue` handler is found: jump to handler code +4. If no handler: pop the call frame (`cipop`) and repeat with + the parent frame +5. `CINFO_DIRECT` frames are destroyed during propagation + +## Block and Closure Handling + +### REnv (Environment) + +Closures capture their enclosing scope's variables through `REnv`: + +```text +REnv ++-- stack pointer to captured variable values ++-- cxt owning context (NULL if detached from stack) ++-- mid method symbol ++-- flags length, block index, visibility +``` + +While the defining scope is active, `REnv::stack` points directly +into the VM value stack (shared). This avoids copying. + +### Environment Unsharing + +When a closure outlives its defining scope, `mrb_env_unshare()` +copies the captured variables from the stack to a heap-allocated +buffer: + +```c +mrb_env_unshare(mrb, env, noraise); +``` + +After unsharing, `MRB_ENV_CLOSE(env)` sets `cxt = NULL` to indicate +the environment is detached. A write barrier is issued for GC +correctness. + +### Proc Types + +| Flag | Meaning | +| ------------------- | ------------------------------ | +| `MRB_PROC_CFUNC_FL` | C function (not irep-based) | +| `MRB_PROC_STRICT` | Lambda (strict argument check) | +| `MRB_PROC_ORPHAN` | No environment attachment | +| `MRB_PROC_ENVSET` | Has captured environment | +| `MRB_PROC_SCOPE` | Defines a new variable scope | + +## Fiber Switching + +Fibers are lightweight coroutines. Each fiber has its own +`mrb_context` with separate value and call info stacks. + +### Fiber States + +```text +CREATED --> RUNNING --> SUSPENDED --> TERMINATED + | ^ + +-----------+ + (yield/resume) + TRANSFERRED (via Fiber#transfer) +``` + +### Context Switch + +On `Fiber#resume`: + +1. Save current context state +2. Set `mrb->c` to the fiber's context +3. Push arguments onto the fiber's stack +4. Continue execution in the fiber + +On `Fiber.yield`: + +1. Save fiber context +2. Restore the parent context (`mrb->c = c->prev`) +3. Return yield values to the parent + +### Fiber Termination + +When a fiber completes (`fiber_terminate`): + +1. Unshare any environments that reference the fiber's stack +2. Set status to `TERMINATED` +3. Free the fiber's stacks +4. Switch to the previous context + +### C Function Boundary + +Fibers cannot yield across C function boundaries. You cannot call +`Fiber.yield` from within a C-implemented method (except via +`mrb_fiber_yield` at return). This is because C call frames cannot +be suspended and resumed. + +## GC Integration + +The VM saves the arena index at the start of the dispatch loop: + +```c +int ai = mrb_gc_arena_save(mrb); +``` + +After each C function call, the arena is shrunk back: + +```c +mrb_gc_arena_shrink(mrb, ai); +``` + +This prevents temporary objects created by C functions from +accumulating in the arena. + +Write barriers are issued when environments are detached or closed, +ensuring the incremental GC correctly tracks live references. + +## Source Files + +| File | Contents | +| ----------------------- | ---------------------------------------------- | +| `src/vm.c` | Dispatch loop, method invocation (~1900 lines) | +| `include/mruby.h` | `mrb_state`, `mrb_callinfo`, `mrb_context` | +| `include/mruby/proc.h` | `RProc`, `REnv` structures | +| `include/mruby/throw.h` | `MRB_TRY`/`MRB_CATCH` macros | diff --git a/doc/limitations.md b/doc/limitations.md index ae0ca5de00..b0908583a7 100644 --- a/doc/limitations.md +++ b/doc/limitations.md @@ -1,3 +1,5 @@ + + # Limitations and Differences The philosophy of mruby is to be a lightweight implementation of @@ -13,6 +15,23 @@ This document is collecting these limitations. This document does not contain a complete list of limitations. Please help to improve it by submitting your findings. +## Features provided by mrbgems + +Many Ruby features that CRuby builds into its core are provided by +mrbgems in mruby. Which features are actually available depends on +which mrbgems are linked into the build. The `default.gembox` and +`stdlib.gembox` cover the common cases, but a minimal build can omit +familiar features such as `Kernel#binding` (provided by +`mruby-binding`), `Kernel#catch`/`throw` (by `mruby-catch`), +`Enumerable` extensions, `Comparable`, IO, regular expressions, and +many more. + +This is by design rather than a limitation per se. When porting Ruby +code to mruby, a `NoMethodError` or `NameError` often means "the gem +providing this feature is not linked in" rather than "mruby does not +support it." Adding the relevant gem to the build configuration is +usually enough. + ## `Kernel.raise` in rescue clause `Kernel.raise` without arguments does not raise the current exception within @@ -26,11 +45,11 @@ rescue end ``` -#### Ruby [ruby 2.0.0p645 (2015-04-13 revision 50299)] +#### CRuby `ZeroDivisionError` is raised. -#### mruby [3.1.0 (2022-05-12)] +#### mruby `RuntimeError` is raised instead of `ZeroDivisionError`. To re-raise the exception, you have to do: @@ -62,200 +81,371 @@ end p Liste.new "foobar" ``` -#### Ruby [ruby 2.0.0p645 (2015-04-13 revision 50299)] +#### CRuby -` [] ` +`[]` -#### mruby [3.1.0 (2022-05-12)] +#### mruby `ArgumentError` is raised. -## Method visibility +## `defined?` -For simplicity reasons no method visibility (public/private/protected) is -supported. Those methods are defined, but they are dummy methods. +The `defined?` keyword is considered too complex to be fully +implemented. It is recommended to use `const_defined?` and +other reflection methods instead. ```ruby -class VisibleTest +defined?(Foo) +``` - def public_method; end +#### CRuby - private - def private_method; end +``` +nil +``` -end +#### mruby -p VisibleTest.new.respond_to?(:private_method, false) -p VisibleTest.new.respond_to?(:private_method, true) -``` +`NameError` is raised. -#### Ruby [ruby 2.0.0p645 (2015-04-13 revision 50299)] +## `alias` on global variables -``` -false -true +Aliasing a global variable works in CRuby but is not part +of the ISO standard. + +```ruby +alias $a $__a__ ``` -#### mruby [3.1.0 (2022-05-12)] +#### CRuby -``` -true -true -``` +`nil` -### Visibility Declaration +#### mruby -The declaration form of following visibility methods are not implemented. +Syntax error -* `public` -* `private` -* `protected` -* `module_function` +## Operator modification -Especially, `module_function` method is not dummy, but no declaration form. +Operators on some of the primitive classes cannot be overridden, as they are +optimized in the VM. -``` -module TestModule - module_function - def test_func - p 'test_func called' +```ruby +class String + def + end - - test_func end -p 'ok' +'a' + 'b' ``` -#### Ruby [ruby 2.5.5p157 (2019-03-15 revision 67260)] +#### CRuby -``` -ok -``` +`ArgumentError` is raised. +The re-defined `+` operator does not accept any arguments. -#### mruby [3.1.0 (2022-05-12)] +#### mruby -``` -test.rb:8: undefined method 'test_func' (NoMethodError) -``` +`'ab'` +Behavior of the operator wasn't changed. -## `defined?` +## `nil?` redefinition in conditional expressions -The `defined?` keyword is considered too complex to be fully -implemented. It is recommended to use `const_defined?` and -other reflection methods instead. +Redefinition of `nil?` is ignored in conditional expressions. ```ruby -defined?(Foo) +a = "a" +def a.nil? + true +end +puts(a.nil? ? "truthy" : "falsy") ``` -#### Ruby [ruby 2.0.0p645 (2015-04-13 revision 50299)] +Ruby outputs `truthy`. mruby outputs `falsy`. +## Argument Destructuring + +```ruby +def m(a,(b,c),d); p [a,b,c,d]; end +m(1,[2,3],4) # => [1,2,3,4] ``` -nil + +Destructured arguments (`b` and `c` in above example) cannot be accessed +from the default expression of optional arguments and keyword arguments, +since actual assignment is done after the evaluation of those default +expressions. Thus: + +```ruby +def f(a,(b,c),d=b) + p [a,b,c,d] +end +f(1,[2,3]) ``` -#### mruby [3.1.0 (2022-05-12)] +CRuby gives `[1,2,3,nil]`. mruby raises `NoMethodError` for `b`. -`NameError` is raised. +Keyword argument expansion has similar restrictions. The following example, gives `[1, 1]` for CRuby, mruby raises `NoMethodError` for `b`. -## `alias` on global variables +```ruby +def g(a: 1, b: a) + p [a,b] +end +g(a:1) +``` -Aliasing a global variable works in CRuby but is not part -of the ISO standard. +## No Double Dispatch in Module Loading + +To make implementation simpler, mruby does not use double dispatching in module loading (`include`/`prepend`/`extend`). +Those method internally called corresponding actual load methods (`append_features`/`prepend_features`/`extend_object`). +But they are rarely overloaded, consumes more memory, and make loading little bit slower. As a Ruby implementation for the smaller device, +we decided mruby simpler. ```ruby -alias $a $__a__ +module M + def self.append_features(mod) + p :append + end +end + +class C + include M +end ``` -#### Ruby [ruby 2.0.0p645 (2015-04-13 revision 50299)] +#### CRuby -` nil ` +Prints `:append`. -#### mruby [3.1.0 (2022-05-12)] +#### mruby -Syntax error +Nothing printed (since `include` does not call `append_features` internally). -## Operator modification +## No `#hash` call for small hashes + +For performance reasons, mruby avoids calling the `#hash` method on keys when a hash table is small. This means that custom `#hash` methods on key objects may not be executed. -An operator can't be overwritten by the user. +## Pattern Matching + +Pattern matching is only partially supported in mruby. Currently, only the rightward assignment operator (`=>`) with simple variable binding is implemented. ```ruby -class String - def + - end +expr => var # Supported: assigns expr to var +``` + +#### CRuby + +Full pattern matching with `case/in` syntax and various pattern types: + +```ruby +case [1, 2, 3] +in [a, b, c] + puts "#{a}, #{b}, #{c}" # => "1, 2, 3" end -'a' + 'b' +case {name: "Alice", age: 30} +in {name:, age:} + puts "#{name} is #{age}" # => "Alice is 30" +end ``` -#### Ruby [ruby 2.0.0p645 (2015-04-13 revision 50299)] +#### mruby -`ArgumentError` is raised. -The re-defined `+` operator does not accept any arguments. +Only rightward assignment with simple variable binding: -#### mruby [3.1.0 (2022-05-12)] +```ruby +[1, 2, 3] => x +puts x # => [1, 2, 3] +``` -` 'ab' ` -Behavior of the operator wasn't changed. +The following are **not supported**: + +- `case/in` syntax +- Array patterns: `in [a, b, c]` +- Hash patterns: `in {name:, age:}` +- Guard clauses: `in pattern if condition` +- Pin operator: `in ^variable` +- Find patterns: `in [*, x, *]` +- Alternative patterns: `in pattern1 | pattern2` +- Boolean pattern check: `value in pattern` + +Note: mruby does provide `Array#deconstruct` and `Hash#deconstruct_keys` methods for future pattern matching compatibility. -## `Kernel#binding` is not supported until [3.0.0 (2021-03-05)] +## No Refinements -`Kernel#binding` method is not supported. +Module refinements (`refine`, `using`) are not supported in mruby. -#### Ruby [ruby 2.5.1p57 (2018-03-29 revision 63029)] +## No `Encoding` Class +mruby does not have an `Encoding` class. Strings are treated as +byte sequences by default. UTF-8 aware string operations can be +enabled with the `MRB_UTF8_STRING` compile flag. + +## Integer Precision Varies by Boxing Mode + +Integer size depends on the value boxing configuration: + +| Configuration | Integer range | +| ----------------------------- | ---------------- | +| Word boxing, 64-bit (default) | roughly +/- 2^62 | +| Word boxing, 32-bit (default) | roughly +/- 2^30 | +| NaN boxing (64-bit only) | -2^31 to 2^31-1 | + +Code relying on 64-bit integer precision may behave differently +across configurations. The `mruby-bigint` gem provides +arbitrary-precision integers when included. + +## No `ObjectSpace.each_object` by Default + +`ObjectSpace` is only available via the `mruby-objectspace` gem +(included in the `stdlib` gembox). Even with the gem, +`ObjectSpace.each_object` has limited functionality compared +to CRuby. + +## No Implicit Type Conversion (`to_int`, `to_str`, `to_ary`, ...) + +mruby does not perform implicit type conversion through methods +like `to_int`, `to_str`, `to_ary`, or `to_hash`. CRuby uses these +to let user-defined classes duck-type as built-in types — for +example `Array#[]` calls `to_int` on its argument, `String#+` calls +`to_str`, and multiple assignment calls `to_ary` on its right-hand +side. mruby's built-in operations require the actual built-in type +and do not consult these conversion methods. + +```ruby +class MyInt; def to_int; 42; end; end +class MyStr; def to_str; "x"; end; end +class MyAry; def to_ary; [1,2,3]; end; end ``` -$ ruby -e 'puts Proc.new {}.binding' -# + +#### CRuby + +``` +[1,2,3][MyInt.new] # => nil (to_int called -> ary[42]) +"a" + MyStr.new # => "ax" (to_str called) +a, b, c = MyAry.new # => a=1, b=2, c=3 (to_ary called) ``` -#### mruby [3.0.0 (2021-03-05)] +#### mruby ``` -$ ./bin/mruby -e 'puts Proc.new {}.binding' -trace (most recent call last): - [0] -e:1 --e:1: undefined method 'binding' (NoMethodError) +[1,2,3][MyInt.new] # TypeError +"a" + MyStr.new # TypeError +a, b, c = MyAry.new # a=, b=nil, c=nil (treated as single value) ``` -#### mruby [3.1.0 (2022-05-12)] +Identity versions of `to_int`, `to_str`, `to_sym`, and `to_hash` +remain defined on the corresponding built-in types so that +`respond_to?(:to_str)`-style checks work for built-in instances. +`Float#to_int` and `Array#to_ary` are intentionally not defined. -`binding` has been supported since 3.1.0. +Explicit conversion methods (`to_i`, `to_s`, `to_a`) work as in +CRuby and are called by features such as string interpolation and +the splat operator (`*obj`). -See also [mrbgems/mruby-binding](https://github.com/mruby/mruby/tree/master/mrbgems/mruby-binding) and [mrbgems/mruby-binding-core](https://github.com/mruby/mruby/tree/master/mrbgems/mruby-binding-core). +This is a deliberate trade-off: implicit conversion forces every +coercion site to go through method dispatch and can silently mask +type-mismatch bugs. -## `nil?` redefinition in conditional expressions +## Nested `def` in Singleton-Method Context -Redefinition of `nil?` is ignored in conditional expressions. +`def` written inside a singleton method (`def self.foo`) is placed +on a different class in mruby than in CRuby. CRuby registers the +inner method as an instance method of the lexical enclosing class. +mruby registers it as a method of the enclosing receiver's +singleton class, which makes it visible as a class method of the +enclosing class. ```ruby -a = "a" -def a.nil? - true +class SomeClass + def self.class_method + def nested; 'nested!'; end + end end -puts(a.nil? ? "truthy" : "falsy") +SomeClass.class_method ``` -Ruby outputs `falsy`. mruby outputs `truthy`. +#### CRuby -## Argument Destructuring +``` +SomeClass.nested # NoMethodError +SomeClass.new.nested # => "nested!" (instance method) +``` -```ruby -def m(a,(b,c),d); p [a,b,c,d]; end -m(1,[2,3],4) # => [1,2,3,4] +#### mruby + +``` +SomeClass.nested # => "nested!" (class method) +SomeClass.new.nested # NoMethodError ``` -Destructured arguments (`b` and `c` in above example) cannot be accessed -from the default expression of optional arguments and keyword arguments, -since actual assignment is done after the evaluation of those default -expressions. Thus: +Writing nested `def` like this is unusual; this difference rarely +surfaces in practical code. + +## `Proc#dup` / `Proc#clone` is Always Orphan + +A `dup` or `clone` of a block given to a method is always treated as +an orphan block in mruby — calling it raises `LocalJumpError` if the +block contains `break` or `return`. CRuby is finer-grained: the copy +inherits the orphan status of its original, so the copy only becomes +orphan once the original yielding method returns. ```ruby -def f(a,(b,c),d=b) - p [a,b,c,d] +def m(&b) + b.dup end -f(1,[2,3]) + +x = m { break 1 } +x.call ``` -CRuby gives `[1,2,3,nil]`. mruby raises `NoMethodError` for `b`. +#### CRuby + +``` +LocalJumpError # raised only after m returns; if called inside m, + # the dup is still a live block +``` + +#### mruby + +``` +LocalJumpError # always raised — the dup is orphan from the moment + # it is created +``` + +mruby's stricter rule keeps `RProc` from needing a back-pointer to +the original block (which would also enlarge the GC mark set). + +## `Class#initialize` Can Be Re-Invoked + +CRuby raises `TypeError: already initialized class` when `initialize` +is invoked on a class that has already been set up. mruby's +`Class#initialize` has no such guard — invoking it on an existing +class through `__send__`, `send`, or `UnboundMethod#bind_call` +silently succeeds. The superclass argument is ignored in this case, +so the call cannot rewrite the class hierarchy; only the block (if +any) is evaluated with the class as receiver. + +```ruby +Klass = Class.new +Klass.__send__(:initialize) {} +``` + +#### CRuby + +``` +TypeError: already initialized class +``` + +#### mruby + +``` +The block is evaluated in the context of Klass; no error is raised. +The superclass is not changed even when one is passed as an argument. +``` + +`Module#initialize` is re-callable in both implementations, so this +divergence is `Class`-specific. Adding the CRuby check would require +an additional flag bit on every `RClass`; mruby leaves the bit +unspent because no destructive side effects are possible through +this path. diff --git a/doc/mruby3.0.md b/doc/mruby3.0.md index 359d82c7b0..aed22fce2d 100644 --- a/doc/mruby3.0.md +++ b/doc/mruby3.0.md @@ -7,17 +7,17 @@ Typical build configuration files are located in `build_config` directory. For examples: -* `default`: the default configuration -* `host-gprof`: compiles with `gprof` for performance tuning -* `host-m32`: compiles in gcc 32bit mode on 64bit platforms -* `boxing`: compiles all three boxing options -* `clang-asan`: compiles with `clang`'s Address Sanitizer +- `default`: the default configuration +- `host-gprof`: compiles with `gprof` for performance tuning +- `host-m32`: compiles in gcc 32-bit mode on 64-bit platforms +- `boxing`: compiles all three boxing options +- `clang-asan`: compiles with `clang`'s Address Sanitizer You can specify the build configuration file with the `MRUBY_CONFIG` environment variable (or `CONFIG` in short). If the value specified by `MRUBY_CONFIG` is not the path to the configuration file, `build_config/${MRUBY_CONFIG}.rb` is -used. So you can specify it as `rake MRUBY_CONFIG=boxing`, +used. So you can specify it as `rake MRUBY_CONFIG=boxing`, for example. # Build Configuration Contribution @@ -32,20 +32,20 @@ pull-request. We have ported some new syntax from CRuby. -* Single line pattern matching (`12 => x`); +- Single line pattern matching (`12 => x`); mruby matches only with local variables at the moment -* Numbered block parameter (`x.map{_1 * 2}`) -* End-less `def` (`def double(x) = x*2`) +- Numbered block parameter (`x.map{_1 * 2}`) +- End-less `def` (`def double(x) = x*2`) # Configuration Options Changed ## Renamed for consistency Some configuration macro names are changed for consistency (use `MRB_USE_XXX` - or `MRB_NO_XXX`). +or `MRB_NO_XXX`). -| mruby2 | mruby3 | -|--------------------------------|---------------------------| +| mruby2 | mruby3 | +| ------------------------------ | ------------------------- | | `MRB_ENABLE_ALL_SYMBOLS` | `MRB_USE_ALL_SYMBOLS` | | `MRB_ENABLE_CXX_ABI` | `MRB_USE_CXX_ABI` | | `MRB_ENABLE_CXX_EXCEPTION` | `MRB_USE_CXX_EXCEPTION` | @@ -59,11 +59,11 @@ Some configuration macro names are changed for consistency (use `MRB_USE_XXX` | `ENABLE_READLINE` | `MRB_USE_READLINE` | | `DISABLE_MIRB_UNDERSCORE` | `MRB_NO_MIRB_UNDERSCORE` | -* `MRB_USE_FLOAT32` is changed from `MRB_USE_FLOAT` to make sure `float` here - means using single precision float, and not the opposite of `MRB_NO_FLOAT`. -* `MRB_USE_METHOD_T_STRUCT` uses `struct` version of `mrb_method_t`. More - portable but consumes more memory. Turned on by default on 32bit platforms. -* `MRB_` prefix is added to those without. +- `MRB_USE_FLOAT32` is changed from `MRB_USE_FLOAT` to make sure `float` here + means using single-precision float, and not the opposite of `MRB_NO_FLOAT`. +- `MRB_USE_METHOD_T_STRUCT` uses `struct` version of `mrb_method_t`. More + portable but consumes more memory. Turned on by default on 32-bit platforms. +- `MRB_` prefix is added to those without. ## `MRB_NO_BOXING` @@ -81,7 +81,7 @@ does not fit in `Fixnum` integers. ## `MRB_NAN_BOXING` -Pack `mrb_value` in a floating pointer number. Nothing +Pack `mrb_value` in a floating-point number. Nothing changed from previous versions. ## `MRB_USE_MALLOC_TRIM` @@ -112,44 +112,45 @@ $ bin/mruby -r lib1.rb -r lib2.rb < app.mrb Instructions that access pool[i]/syms[i] where i>255. -* `OP_LOADL16` -* `OP_STRING16` -* `OP_LOADSYM16` +- `OP_LOADL16` +- `OP_STRING16` +- `OP_LOADSYM16` Instructions that load a 32-bit integer. -* `OP_LOADI32` +- `OP_LOADI32` Instruction that unwinds jump table for rescue/ensure. -* `OP_JMPUW` +- `OP_JMPUW` Renamed from `OP_RAISE` -* `OP_RAISEIF` +- `OP_RAISEIF` Instruction that is reserved for the future keyword argument support. -* OP_SENDVK +- OP_SENDVK ## Removed Instructions Instructions for old exception handling -* `OP_ONERR` -* `OP_POPERR` -* `OP_EPUSH` -* `OP_EPOP` +- `OP_ONERR` +- `OP_POPERR` +- `OP_EPUSH` +- `OP_EPOP` No more operand extension -* `OP_EXT1` -* `OP_EXT2` -* `OP_EXT3` +- `OP_EXT1` +- `OP_EXT2` +- `OP_EXT3` ## Changed Instructions -Jump addresses used to be specified by absolute offset from the start of `iseq`. Now they are relative offset from the address of the next instruction. +Jump addresses used to be specified by absolute offset from the start of `iseq`. Now they are relative offset from the +address of the next instruction. ## `Random` now use `xoshiro128++`. @@ -159,4 +160,4 @@ For better and faster random number generation. Preallocated symbols are interned at compile-time. They can be accessed via symbols macros (e.g. `MRB_SYM()`). -See [Symbols](https://github.com/mruby/mruby/blob/master/doc/guides/symbol.md). +See [Symbols](guides/symbol.md). diff --git a/doc/mruby3.1.md b/doc/mruby3.1.md index 158b8ea7e1..953536198c 100644 --- a/doc/mruby3.1.md +++ b/doc/mruby3.1.md @@ -20,7 +20,7 @@ Keyword arguments are basically separated from ordinal arguments. Some configuration macros are available: -- `MRB_WORDBOX_NO_FLOAT_TRUNCATE`: by default, float values are packed in the word if possible, but define this macro to allocate float values in the heap. +- `MRB_WORDBOX_NO_INLINE_FLOAT` (formerly `MRB_WORDBOX_NO_FLOAT_TRUNCATE`): by default, float values are packed in the word if possible, but define this macro to allocate float values in the heap. - `MRB_USE_RO_DATA_P_ETEXT`: define this macro if `_etext` is available on your platform. - `MRB_NO_DEFAULT_RO_DATA_P`: define this macro to avoid using predefined `mrb_ro_data_p()` function @@ -37,7 +37,7 @@ We have added several new build configurations in the `build_config` directory. - `nintendo_switch.rb` - `serenity.rb` - `minimal`: minimal configuration -- `host-f32`: compiles with `mrb_float` as 32 bit `float` +- `host-f32`: compiles with `mrb_float` as 32-bit `float` - `host-nofloat`: compiles with no float configuration - `android_arm64_v8a.rb`: renamed from `android_arm64-v8a.rb` @@ -141,8 +141,8 @@ Method calling instructions are unified. Now `OP_SEND` and `OP_SENDB` (method ca The brief description of the instructions: -|`OP_SEND` | BBB | `R[a] = R[a].call(Syms[b],R[a+1..n],R[a+n+1],R[a+n+2]..nk) c=n|nk<<4` | -|`OP_SENDB` | BBB | `R[a] = R[a].call(Syms[b],R[a+1..n],R[a+n+1..nk],R[a+n+2..nk],&R[a+n+2*nk+2]) c=n|nk<<4` | +|`OP_SEND` | BBB | `R[a] = R[a].call(Syms[b],R[a+1..n],R[a+n+1],R[a+n+2]..nk) c=n|nk<<4` | +|`OP_SENDB` | BBB | `R[a] = R[a].call(Syms[b],R[a+1..n],R[a+n+1..nk],R[a+n+2..nk],&R[a+n+2*nk+2]) c=n|nk<<4` | Operand C specifies the number of arguments. Lower 4 bits (`n`) represents the number of ordinal arguments, and higher 4 bits (`nk`) represents the number of keyword arguments. When `n == 15`, the method takes arguments packed in an array. When `nk == 15`, the method takes keyword arguments are packed in a hash. @@ -155,12 +155,12 @@ Now takes 2 operands and pushes multiple entries to an array. ### Word Boxing -`MRB_WORD_BOXING` now packs floating point numbers in the word, if the size of `mrb_float` is equal or smaller than the size of `mrb_int` by default. -If the size of `mrb_float` and `mrb_int` are same, the last 2 bits in the `mrb_float` are trimmed and used as flags. If you need full precision, you need to define `MRB_WORDBOX_NO_FLOAT_TRUNCATE` as described above. +`MRB_WORD_BOXING` now packs floating-point numbers in the word, if the size of `mrb_float` is equal or smaller than the size of `mrb_int` by default. +If the size of `mrb_float` and `mrb_int` are same, the last 2 bits in the `mrb_float` are trimmed and used as flags. If you need full precision, you need to define `MRB_WORDBOX_NO_INLINE_FLOAT` (formerly `MRB_WORDBOX_NO_FLOAT_TRUNCATE`) as described above. ### NaN Boxing -Previous NaN boxing packs values in NaN representation, but pointer retrievals are far more frequent than floating point number references. So we add constant offset to NaN representation to clear higher bits of pointer representation. This representation is called "Favor Pointer" NaN Boxing. +Previous NaN boxing packs values in NaN representation, but pointer retrievals are far more frequent than floating-point number references. So we add constant offset to NaN representation to clear higher bits of pointer representation. This representation is called "Favor Pointer" NaN Boxing. Also, previous NaN boxing limit the size of `mrb_int` to 4 bytes (32 bits) to fit in NaN values. Now we allocate integer values in the heap, if the value does not fit in the 32 bit range, just like we did in Word Boxing. @@ -197,7 +197,7 @@ For better and faster hash values. --- -# Major bug fixes +# Major bugfixes - Fix infinite recursive call bugs in integer division [98799aa6](https://github.com/mruby/mruby/commit/98799aa6) - Fix to raise TypeError with super inside instance_eval / class_eval [#5476](https://github.com/mruby/mruby/pull/5476) @@ -225,31 +225,31 @@ For better and faster hash values. Following CVEs are fixed in this release. -- [CVE-2021-4110](https://nvd.nist.gov/vuln/detail/CVE-2021-4110) -- [CVE-2021-4188](https://nvd.nist.gov/vuln/detail/CVE-2021-4188) -- [CVE-2022-0080](https://nvd.nist.gov/vuln/detail/CVE-2022-0080) -- [CVE-2022-0240](https://nvd.nist.gov/vuln/detail/CVE-2022-0240) -- [CVE-2022-0326](https://nvd.nist.gov/vuln/detail/CVE-2022-0326) -- [CVE-2022-0481](https://nvd.nist.gov/vuln/detail/CVE-2022-0481) -- [CVE-2022-0631](https://nvd.nist.gov/vuln/detail/CVE-2022-0631) -- [CVE-2022-0632](https://nvd.nist.gov/vuln/detail/CVE-2022-0632) -- [CVE-2022-0890](https://nvd.nist.gov/vuln/detail/CVE-2022-0890) -- [CVE-2022-1071](https://nvd.nist.gov/vuln/detail/CVE-2022-1071) -- [CVE-2022-1106](https://nvd.nist.gov/vuln/detail/CVE-2022-1106) -- [CVE-2022-1201](https://nvd.nist.gov/vuln/detail/CVE-2022-1201) -- [CVE-2022-1427](https://nvd.nist.gov/vuln/detail/CVE-2022-1427) +- [CVE-2021-4110](https://www.cve.org/CVERecord?id=CVE-2021-4110) +- [CVE-2021-4188](https://www.cve.org/CVERecord?id=CVE-2021-4188) +- [CVE-2022-0080](https://www.cve.org/CVERecord?id=CVE-2022-0080) +- [CVE-2022-0240](https://www.cve.org/CVERecord?id=CVE-2022-0240) +- [CVE-2022-0326](https://www.cve.org/CVERecord?id=CVE-2022-0326) +- [CVE-2022-0481](https://www.cve.org/CVERecord?id=CVE-2022-0481) +- [CVE-2022-0631](https://www.cve.org/CVERecord?id=CVE-2022-0631) +- [CVE-2022-0632](https://www.cve.org/CVERecord?id=CVE-2022-0632) +- [CVE-2022-0890](https://www.cve.org/CVERecord?id=CVE-2022-0890) +- [CVE-2022-1071](https://www.cve.org/CVERecord?id=CVE-2022-1071) +- [CVE-2022-1106](https://www.cve.org/CVERecord?id=CVE-2022-1106) +- [CVE-2022-1201](https://www.cve.org/CVERecord?id=CVE-2022-1201) +- [CVE-2022-1427](https://www.cve.org/CVERecord?id=CVE-2022-1427) ## Unaffected CVEs Following CVEs do not cause problems in this release. They are fixed in the later release. -- [CVE-2022-0481](https://nvd.nist.gov/vuln/detail/CVE-2022-0481) -- [CVE-2022-0525](https://nvd.nist.gov/vuln/detail/CVE-2022-0525) -- [CVE-2022-0570](https://nvd.nist.gov/vuln/detail/CVE-2022-0570) -- [CVE-2022-0614](https://nvd.nist.gov/vuln/detail/CVE-2022-0614) -- [CVE-2022-0623](https://nvd.nist.gov/vuln/detail/CVE-2022-0623) -- [CVE-2022-0630](https://nvd.nist.gov/vuln/detail/CVE-2022-0630) -- [CVE-2022-0717](https://nvd.nist.gov/vuln/detail/CVE-2022-0817) -- [CVE-2022-1212](https://nvd.nist.gov/vuln/detail/CVE-2022-1212) -- [CVE-2022-1276](https://nvd.nist.gov/vuln/detail/CVE-2022-1276) -- [CVE-2022-1286](https://nvd.nist.gov/vuln/detail/CVE-2022-1286) +- [CVE-2022-0481](https://www.cve.org/CVERecord?id=CVE-2022-0481) +- [CVE-2022-0525](https://www.cve.org/CVERecord?id=CVE-2022-0525) +- [CVE-2022-0570](https://www.cve.org/CVERecord?id=CVE-2022-0570) +- [CVE-2022-0614](https://www.cve.org/CVERecord?id=CVE-2022-0614) +- [CVE-2022-0623](https://www.cve.org/CVERecord?id=CVE-2022-0623) +- [CVE-2022-0630](https://www.cve.org/CVERecord?id=CVE-2022-0630) +- [CVE-2022-0717](https://www.cve.org/CVERecord?id=CVE-2022-0817) +- [CVE-2022-1212](https://www.cve.org/CVERecord?id=CVE-2022-1212) +- [CVE-2022-1276](https://www.cve.org/CVERecord?id=CVE-2022-1276) +- [CVE-2022-1286](https://www.cve.org/CVERecord?id=CVE-2022-1286) diff --git a/doc/mruby3.2.md b/doc/mruby3.2.md index bbb274e762..ec616bea53 100644 --- a/doc/mruby3.2.md +++ b/doc/mruby3.2.md @@ -6,12 +6,18 @@ - Anonymous arguments `*`, `**`, `&` can be passed for forwarding. - Multi-precision integer is available now via `mruby-bigint` gem. +# mruby VM and bytecode + +- `OP_ARYDUP` was renamed to `OP_ARYSPLAT`. The instruction name + was changed but instruction number and basic behavior have not + changed (except that `ARYDUP nil` makes `[]`). + # Tools ## `mruby` - `-b` only specifies the script is the binary. The files loaded by `-r` are not affected by the option. -- `mruby` now loads complied binary if the suffix is `.mrb`. +- `mruby` now loads compiled binary if the suffix is `.mrb`. ## `mrbc` @@ -19,26 +25,55 @@ # mrbgems -## mruby-errno - -- `mruby-errno` gem is now bundled. - ## mruby-class-ext - Add `Class#subclasses` method. - Add `Module#undefined_instance_methods` method. +## New bundled gems + +- mruby-errno from +- mruby-set from +- mruby-dir from +- mruby-data + +# Breaking Changes + +## `mrb_vm_run()` may detach top-level local variables referenced from blocks + +When the `mrb_vm_run()` function (including `mrb_top_run()`) is called, +the previous top-level local variables referenced from blocks is detached under either of the following conditions. + +- If the `stack_keep` parameter is given as 0. +- If the number of variables in `irep` to be executed is less than the number of previous top-level local variables. + +This change also affects API functions such as `mrb_load_string()` and `mrb_load_file()`. +The conditions under which the previous top-level local variables referenced from blocks is detached in these functions are as follows: + +- If the function has no `mrbc_context` pointer parameter, or the `mrbc_context` pointer parameter is set to `NULL`. +- If the number of variables held in the `mrbc_context` pointer is less than the number of previous top-level local variables. + +Intentional reliance on previous behavior may cause compatibility problems in your application. + # CVEs Following CVEs are fixed. -- [CVE-2022-0481](https://nvd.nist.gov/vuln/detail/CVE-2022-0481) -- [CVE-2022-0525](https://nvd.nist.gov/vuln/detail/CVE-2022-0525) -- [CVE-2022-0570](https://nvd.nist.gov/vuln/detail/CVE-2022-0570) -- [CVE-2022-0614](https://nvd.nist.gov/vuln/detail/CVE-2022-0614) -- [CVE-2022-0623](https://nvd.nist.gov/vuln/detail/CVE-2022-0623) -- [CVE-2022-0630](https://nvd.nist.gov/vuln/detail/CVE-2022-0630) -- [CVE-2022-0717](https://nvd.nist.gov/vuln/detail/CVE-2022-0817) -- [CVE-2022-1212](https://nvd.nist.gov/vuln/detail/CVE-2022-1212) -- [CVE-2022-1276](https://nvd.nist.gov/vuln/detail/CVE-2022-1276) -- [CVE-2022-1286](https://nvd.nist.gov/vuln/detail/CVE-2022-1286) +- [CVE-2022-0080](https://www.cve.org/CVERecord?id=CVE-2022-0080) +- [CVE-2022-0240](https://www.cve.org/CVERecord?id=CVE-2022-0240) +- [CVE-2022-0326](https://www.cve.org/CVERecord?id=CVE-2022-0326) +- [CVE-2022-0481](https://www.cve.org/CVERecord?id=CVE-2022-0481) +- [CVE-2022-0525](https://www.cve.org/CVERecord?id=CVE-2022-0525) +- [CVE-2022-0570](https://www.cve.org/CVERecord?id=CVE-2022-0570) +- [CVE-2022-0614](https://www.cve.org/CVERecord?id=CVE-2022-0614) +- [CVE-2022-0623](https://www.cve.org/CVERecord?id=CVE-2022-0623) +- [CVE-2022-0630](https://www.cve.org/CVERecord?id=CVE-2022-0630) +- [CVE-2022-0631](https://www.cve.org/CVERecord?id=CVE-2022-0631) +- [CVE-2022-0632](https://www.cve.org/CVERecord?id=CVE-2022-0632) +- [CVE-2022-0717](https://www.cve.org/CVERecord?id=CVE-2022-0717) +- [CVE-2022-0890](https://www.cve.org/CVERecord?id=CVE-2022-0890) +- [CVE-2022-1106](https://www.cve.org/CVERecord?id=CVE-2022-1106) +- [CVE-2022-1212](https://www.cve.org/CVERecord?id=CVE-2022-1212) +- [CVE-2022-1276](https://www.cve.org/CVERecord?id=CVE-2022-1276) +- [CVE-2022-1286](https://www.cve.org/CVERecord?id=CVE-2022-1286) +- [CVE-2022-1934](https://www.cve.org/CVERecord?id=CVE-2022-1934) diff --git a/doc/mruby3.3.md b/doc/mruby3.3.md new file mode 100644 index 0000000000..eec7732fb3 --- /dev/null +++ b/doc/mruby3.3.md @@ -0,0 +1,186 @@ +# User visible changes in `mruby3.3` from `mruby3.2` + +"**_NOTE_**:" are changes to be aware of. + +# The language + +- aliases work properly with `super` ([2ad3f0e](https://github.com/mruby/mruby/commit/2ad3f0e)) +- `callee` method work differently with aliases in mruby ([f2dc76e](https://github.com/mruby/mruby/commit/f2dc76e)) +- define `Kernel#respond_to_missing?` method ([347586e](https://github.com/mruby/mruby/commit/347586e)) +- `_inspect` method (`inspect` with recursive check) is removed + ([e2bbf75](https://github.com/mruby/mruby/commit/e2bbf75), [5cb0c74](https://github.com/mruby/mruby/commit/5cb0c74), [113565a](https://github.com/mruby/mruby/commit/113565a), + [0713f2a](https://github.com/mruby/mruby/commit/0713f2a), [6ae6b63](https://github.com/mruby/mruby/commit/6ae6b63), [fc9fffc](https://github.com/mruby/mruby/commit/fc9fffc)) +- `__printstr__` method is removed; use `print` instead + ([acecee0](https://github.com/mruby/mruby/commit/acecee0), [192e6e3](https://github.com/mruby/mruby/commit/192e6e3)) +- New method `String#bytesplice` ([5274647](https://github.com/mruby/mruby/commit/5274647), [a2e2e83](https://github.com/mruby/mruby/commit/a2e2e83)) +- Allow `return` in blocks to cross C boundaries ([#6125](https://github.com/mruby/mruby/pull/6125)) + +# Configuration + +- mruby can be built using Docker now. Try `docker-compose build` for example. ([#5961](https://github.com/mruby/mruby/pull/5961)) +- New Platform: DJGPP (MS-DOS) ([#6022](https://github.com/mruby/mruby/pull/6022)) +- New Platform: Nintendo Wii ([#6086](https://github.com/mruby/mruby/pull/6086)) +- Improved Platform: Android ([#6013](https://github.com/mruby/mruby/pull/6013)) +- Improved Platform: Dreamcast ([#6130](https://github.com/mruby/mruby/pull/6130)) +- Allow tests to be disabled for specific gems; warn about disabled tests ([#6012](https://github.com/mruby/mruby/pull/6012)) +- Replace `MRB_NO_DIRECT_THREADING` with `MRB_USE_VM_SWITCH_DISPATCH` ([#5902](https://github.com/mruby/mruby/pull/5902)) + +# mruby memory API + +- `mrb_default_allocf` can be overridden by the application ([34c5d96](https://github.com/mruby/mruby/commit/34c5d96)) +- `mrb_open_allocf` will be deprecated ([cfee5c2](https://github.com/mruby/mruby/commit/cfee5c2)) + +# Changes in C API + +- add new error handling API functions ([8c8bbd9](https://github.com/mruby/mruby/commit/8c8bbd9)) +- Add `mrb_vm_ci_env_clear()` function with `MRB_API` ([#5945](https://github.com/mruby/mruby/pull/5945)) +- a new function `mrb_check_frozen_value()` ([ccdf75c](https://github.com/mruby/mruby/commit/ccdf75c)) +- avoid formatting in `mrb_bug()` ([82a48bd](https://github.com/mruby/mruby/commit/82a48bd))
+ **_NOTE_**: If you are using it, you must give a simple string or replace it with a call to `mrb_raise()` series. +- stop using `mrbc_` prefix for compiler context ([c5e3cbe](https://github.com/mruby/mruby/commit/c5e3cbe))
+ The same names are provided as before, but we recommend replacing them. +- Allow `Class#allocate` to be prohibited + ([#5979](https://github.com/mruby/mruby/pull/5979), [#6122](https://github.com/mruby/mruby/pull/6122), [#6123](https://github.com/mruby/mruby/pull/6123))
+ To disable `#allocate`, use `MRB_UNDEF_ALLOCATOR()`. + This is also automatically applied when the subclass is created, but to explicitly allow it, use `MRB_DEFINE_ALLOCATOR()`. + +# Changes in mrbgems + +- **default.gembox**: Add mruby debugger mrdb (`mruby-bin-debugger`) ([#5966](https://github.com/mruby/mruby/pull/5966)) +- **mruby-bin-config**: new options `--cxx`, `--cxxflags`, `--as`, `--asflags`, `--objc`, `--objcflags` ([#6054](https://github.com/mruby/mruby/pull/6054)) +- **mruby-binding**: renamed from `mruby-binding-core` of mruby3.2 ([11af5db](https://github.com/mruby/mruby/commit/11af5db))
+ **_NOTE_**: If using `mruby-binding-core` of mruby 3.2, replace it with `mruby-binding`. +- **mruby-binding**: implemented `Binding#initialize_copy` method ([#5517](https://github.com/mruby/mruby/pull/5517)) +- **mruby-binding**: `Kernel#binding` responds only to calls from Ruby ([#5981](https://github.com/mruby/mruby/pull/5981)) +- **mruby-compar-ext**: Comparable#clamp to accept nil as arguments ([836bebc](https://github.com/mruby/mruby/commit/836bebc)) +- **mruby-compiler**: add print name for identifier tokens ([d7b2e3a](https://github.com/mruby/mruby/commit/d7b2e3a)) +- **mruby-data**: allow empty Data ([927a9df](https://github.com/mruby/mruby/commit/927a9df)) +- **mruby-enumerator**: remove internal attribute methods `obj`, `args`, `kwd`, `meth`, `fib`. ([735fa24](https://github.com/mruby/mruby/commit/735fa24)) +- **mruby-enumerator**: add Enumerator#size ([861f8bd](https://github.com/mruby/mruby/commit/861f8bd)) +- **mruby-eval**: merged `mruby-binding` of mruby3.2 ([501b22a](https://github.com/mruby/mruby/commit/501b22a), [#5989](https://github.com/mruby/mruby/pull/5989))
+ **_NOTE_**: If using `mruby-binding` of mruby 3.2, replace it with `mruby-eval`. +- **mruby-fiber**: Add a new `mrb_fiber_new()` with `MRB_API` ([#6097](https://github.com/mruby/mruby/pull/6097)) +- **mruby-fiber**: Allows calling `Fiber#resume` from C ([#6106](https://github.com/mruby/mruby/pull/6106)) +- **mruby-fiber**: `Fiber#to_s` format changed ([#6105](https://github.com/mruby/mruby/pull/6105)) +- **mruby-io**: add File#atime and File#ctime ([321cfe9](https://github.com/mruby/mruby/commit/321cfe9)) +- **mruby-io**: Add "x" mode option for `IO.open` ([#6081](https://github.com/mruby/mruby/pull/6081)) +- **mruby-io**: File.new should not take blocks ([53de964](https://github.com/mruby/mruby/commit/53de964)) +- **mruby-method**: `Method#to_s` format changed ([f5bc82f](https://github.com/mruby/mruby/commit/f5bc82f), [02f189c](https://github.com/mruby/mruby/commit/02f189c)) +- **mruby-numeric-ext**: `int.pow(n,m)` to take bigint as exponential ([d482eab](https://github.com/mruby/mruby/commit/d482eab)) +- **mruby-pack**: support new directives `j`, `J`, `b`, `B`, `#` + ([2a1e3a5](https://github.com/mruby/mruby/commit/2a1e3a5), [e7021f1](https://github.com/mruby/mruby/commit/e7021f1), [e17f325](https://github.com/mruby/mruby/commit/e17f325)) +- **mruby-range-ext**: new method `Range#overlap?` ([384d0e2](https://github.com/mruby/mruby/commit/384d0e2)) +- **mruby-string-ext**: Add `String#valid_encoding?` method ([eabe2d9](https://github.com/mruby/mruby/commit/eabe2d9)) +- **mruby-struct**: allow empty Struct when a name is not given ([c212ede](https://github.com/mruby/mruby/commit/c212ede)) +- **mruby-time**: should allow year before 1900 ([e5de08b](https://github.com/mruby/mruby/commit/e5de08b)) +- **mruby-time**: support bigint to time_t if necessary ([7096d27](https://github.com/mruby/mruby/commit/7096d27)) +- **mruby-time**: need to handle negative time_t ([b064d7e](https://github.com/mruby/mruby/commit/b064d7e)) + +# Changes in build system + +- Extended `rake install` task ([#5928](https://github.com/mruby/mruby/pull/5928))
+ **_NOTE_**: Due to this impact, executable files in the `mruby/bin/` directory by default are now symbolic links (batch files on Windows). + If previously relied on those executables, should be replaced with direct references to the entity created under the build directory (e.g. `mruby/build/host/bin/`). +- Encode and decode escape characters for presym ([#6011](https://github.com/mruby/mruby/pull/6011)) +- Rakefile: remove default build target directories in `deep_clean` ([#6032](https://github.com/mruby/mruby/pull/6032), [1e38569](https://github.com/mruby/mruby/commit/1e38569)) + +# Other breaking changes + +- `mrb_f_raise()` is now an internal function + ([#5923](https://github.com/mruby/mruby/pull/5923), [#6070](https://github.com/mruby/mruby/pull/6070)) +- `mrb_make_exception()` is now an internal function with different parameters + ([431f83e](https://github.com/mruby/mruby/commit/431f83e), [78137f3](https://github.com/mruby/mruby/commit/78137f3)) +- The `File#path` method no longer uses the `#to_path` method for implicit conversion + ([d86c4a7](https://github.com/mruby/mruby/commit/d86c4a7)) +- stop mrb isolation for each test file ([a20fbe5](https://github.com/mruby/mruby/commit/a20fbe5)) +- RBreak remembers the CI location ([#6103](https://github.com/mruby/mruby/pull/6103)) + +# Bugs Fixed + +- [#5724](https://github.com/mruby/mruby/issues/5724) Rational#\*\* is missing +- [#5725](https://github.com/mruby/mruby/issues/5725) weird const_missing exceptions in mrblib code +- [#5789](https://github.com/mruby/mruby/issues/5789) No memory release of backtrace information due to stack error +- [#5932](https://github.com/mruby/mruby/issues/5932) How to create a block using the C API? mrb_yield keeps crashing! +- [#5943](https://github.com/mruby/mruby/issues/5943) TCPSocket#write is failed +- [#5944](https://github.com/mruby/mruby/issues/5944) Behavior of calling method with a hash variable +- [#5946](https://github.com/mruby/mruby/pull/5946) Don't switch constant search path from modules to Object +- [#5949](https://github.com/mruby/mruby/issues/5949) Caller appears to report wrong line when block passed and brackets omitted +- [0906cd7](https://github.com/mruby/mruby/commit/0906cd7) numeric.c: fix rounding function issues with big numbers +- [#5974](https://github.com/mruby/mruby/issues/5974) Invalid escape sequences in gem_init.c on windows +- [#5975](https://github.com/mruby/mruby/issues/5975) Equals comparison fails on extreme ends of 64-bit integers +- [#5985](https://github.com/mruby/mruby/issues/5985) Sign extension with OP_LOADI32 in get_int_operand() +- [#5986](https://github.com/mruby/mruby/issues/5986) Fix bugs in String#bytesplice +- [#5987](https://github.com/mruby/mruby/issues/5987) ~(-1 << 64) is incorrect +- [#5991](https://github.com/mruby/mruby/issues/5991) 'gets' method not working in mruby-3.2.0 +- [#5994](https://github.com/mruby/mruby/pull/5994) fix typo in mrbgems/mruby-io/src/io.c +- [#5995](https://github.com/mruby/mruby/issues/5995) One seemingly unnecessary parameter is passed in the block parameters +- [#6008](https://github.com/mruby/mruby/pull/6008) Make "bintest" independent of directory +- [b47c8b7](https://github.com/mruby/mruby/commit/b47c8b7) gc.c (clear_all_old): fix a generational GC bug +- [#6029](https://github.com/mruby/mruby/issues/6029) mruby build fails under mrbgems directory +- [a264965](https://github.com/mruby/mruby/commit/a264965) mruby-os-memsize/memsize.c: fix irep size calculation +- [3310e10](https://github.com/mruby/mruby/commit/3310e10) mruby-test/mrbgem.rake: fix mrb_state handling bug +- [#6041](https://github.com/mruby/mruby/issues/6041) GC Performance may have degraded +- [#6044](https://github.com/mruby/mruby/issues/6044) Generated presym/table.h contains invalid characters +- [#6051](https://github.com/mruby/mruby/issues/6051) Null pointer dereference in mrb_addrinfo_unix_path +- [#6052](https://github.com/mruby/mruby/issues/6052) Null pointer dereference while handling the Proc class +- [#6055](https://github.com/mruby/mruby/pull/6055) Fix libmruby name for VisualC++ +- [#6060](https://github.com/mruby/mruby/issues/6060) SEGFAULT Issue Related to Fiber Usage in ngx_mruby Development +- [#6061](https://github.com/mruby/mruby/issues/6061) Performance issue in String#codepoints +- [#6064](https://github.com/mruby/mruby/issues/6064) MRUBY_PACKAGE_DIR does not always have a value. +- [#6065](https://github.com/mruby/mruby/issues/6065) Null pointer dereference while handling the Proc class +- [#6066](https://github.com/mruby/mruby/issues/6066) Null pointer dereference involving Struct.new() +- [#6067](https://github.com/mruby/mruby/issues/6067) Null pointer dereference in mrb_string_value_cstr +- [#6068](https://github.com/mruby/mruby/issues/6068) Stack overflow in mrb_vm_exec +- [#6076](https://github.com/mruby/mruby/pull/6076) Fixed unwinding block that could point to invalid PC +- [#6084](https://github.com/mruby/mruby/issues/6084) Incorrect symbolic sinks in binary built on Linux +- [#6087](https://github.com/mruby/mruby/issues/6087) 'Remote branch HEAD not found in upstream origin' error on build +- [#6089](https://github.com/mruby/mruby/issues/6089) binding.eval() handles def expressions differently from CRuby +- [#6098](https://github.com/mruby/mruby/issues/6098) Fails to call superclass of wrapped method +- [#6099](https://github.com/mruby/mruby/issues/6099) `ensure` section is not executed if the function exits via a return in a proc +- [#6108](https://github.com/mruby/mruby/issues/6108) VM crashes with break +- [#6118](https://github.com/mruby/mruby/pull/6118) Fixed IO#read with buf +- [#6120](https://github.com/mruby/mruby/pull/6120) Set EBADF if check_file_descriptor() fails +- [#6126](https://github.com/mruby/mruby/pull/6126) Fixed return value of `OP_RETURN_BLK` called directly under C function +- [#6134](https://github.com/mruby/mruby/issues/6134) String#unpack1 returns an array instead of a single string +- [#6136](https://github.com/mruby/mruby/pull/6136) Fixed when combined `mrb_fiber_resume()` and `Fiber#transfer` + +# Pull Requests (User Visible Ones) + +- [#5517](https://github.com/mruby/mruby/pull/5517) Fixed local variables not separated between copied binding objects +- [#5902](https://github.com/mruby/mruby/pull/5902) Replace `MRB_NO_DIRECT_THREADING` with `MRB_USE_VM_SWITCH_DISPATCH` +- [#5923](https://github.com/mruby/mruby/pull/5923) Demotion `mrb_f_raise()` from `MRB_API` +- [#5928](https://github.com/mruby/mruby/pull/5928) Improved `rake install` +- [#5945](https://github.com/mruby/mruby/pull/5945) Avoid exposure for `REnv` objects +- [#5946](https://github.com/mruby/mruby/pull/5946) Don't switch constant search path from modules to Object +- [#5966](https://github.com/mruby/mruby/pull/5966) Update default.gembox add mruby debugger mrdb +- [#5979](https://github.com/mruby/mruby/pull/5979) Allow Class#allocate to be prohibited +- [#5981](https://github.com/mruby/mruby/pull/5981) `Kernel#binding` responds only to calls from Ruby +- [#5989](https://github.com/mruby/mruby/pull/5989) Integrate mruby-binding-eval into mruby-eval +- [#5961](https://github.com/mruby/mruby/pull/5961) Add Docker to build and run all mruby tests. Run pre-commit and generate YARD docs with Docker +- [#5994](https://github.com/mruby/mruby/pull/5994) fix typo in mrbgems/mruby-io/src/io.c +- [#6008](https://github.com/mruby/mruby/pull/6008) Make "bintest" independent of directory +- [#6009](https://github.com/mruby/mruby/pull/6009) Avoid adding /bintest which does not exist +- [#6011](https://github.com/mruby/mruby/pull/6011) Encode and decode escape characters for presym +- [#6012](https://github.com/mruby/mruby/pull/6012) Allow tests to be disabled for specific gems; warn about disabled tests +- [#6013](https://github.com/mruby/mruby/pull/6013) Fix Android toolchain +- [#6022](https://github.com/mruby/mruby/pull/6022) Build configuration for MS-DOS and DJGPP +- [#6032](https://github.com/mruby/mruby/pull/6032) Rake: update task clean to remove bin and build folders +- [#6045](https://github.com/mruby/mruby/pull/6045) Fixes escape sequence bug and enhancements in Presym scanning +- [#6054](https://github.com/mruby/mruby/pull/6054) Extends `bin/mruby-config` +- [#6055](https://github.com/mruby/mruby/pull/6055) Fix libmruby name for VisualC++ +- [#6070](https://github.com/mruby/mruby/pull/6070) Demotion mrb_f_raise() in kernel.c from MRB_API too +- [#6076](https://github.com/mruby/mruby/pull/6076) Fixed unwinding block that could point to invalid PC +- [#6081](https://github.com/mruby/mruby/pull/6081) Add "x" mode option for IO.open +- [#6086](https://github.com/mruby/mruby/pull/6086) Add build config for Nintendo Wii +- [#6097](https://github.com/mruby/mruby/pull/6097) Add a new mrb_fiber_new() with MRB_API +- [#6103](https://github.com/mruby/mruby/pull/6103) RBreak remembers the CI location +- [#6105](https://github.com/mruby/mruby/pull/6105) Implement `Fiber#to_s` method +- [#6106](https://github.com/mruby/mruby/pull/6106) Ease fiber limitations +- [#6118](https://github.com/mruby/mruby/pull/6118) Fixed IO#read with buf +- [#6120](https://github.com/mruby/mruby/pull/6120) Set EBADF if check_file_descriptor() fails +- [#6122](https://github.com/mruby/mruby/pull/6122) Prohibit `Class#allocate` in a different way +- [#6123](https://github.com/mruby/mruby/pull/6123) Inherit `MRB_FL_UNDEF_ALLOCATE` in subclasses +- [#6125](https://github.com/mruby/mruby/pull/6125) Allow `OP_RETURN_BLK` to cross C boundaries +- [#6126](https://github.com/mruby/mruby/pull/6126) Fixed return value of `OP_RETURN_BLK` called directly under C function +- [#6130](https://github.com/mruby/mruby/pull/6130) `dreamcast_shelf build config`: complete overhaul +- [#6136](https://github.com/mruby/mruby/pull/6136) Fixed when combined `mrb_fiber_resume()` and `Fiber#transfer` diff --git a/doc/mruby3.4.md b/doc/mruby3.4.md new file mode 100644 index 0000000000..a22c08f5d2 --- /dev/null +++ b/doc/mruby3.4.md @@ -0,0 +1,251 @@ +# User visible changes in `mruby3.4` from `mruby3.3` + +"**_NOTE_**:" are changes to be aware of. + +# The language + +- mruby now supports `private` and `protected` visibility ([b0db0bd](https://github.com/mruby/mruby/commit/b0db0bd)) +- Maximum length of inlined symbols reduced from 5 to 4 characters to provide space for visibility flags ([6442a01](https://github.com/mruby/mruby/commit/6442a01)) +- Many methods are made private according to CRuby visibility ([4a0e806](https://github.com/mruby/mruby/commit/4a0e806)) +- Generate OP_SSEND for `self.method` type calls ([111fe4b](https://github.com/mruby/mruby/commit/111fe4b)) +- `initialize` method will be always private ([eb8b412](https://github.com/mruby/mruby/commit/eb8b412)) +- Add new hooks `method_removed`, `method_undefined` ([9c74f6e](https://github.com/mruby/mruby/commit/9c74f6e)) +- Add new hooks `singleton_method_removed`, `singleton_method_undefined` ([0863c08](https://github.com/mruby/mruby/commit/0863c08)) +- Updated `OP_DEF` output from codedump ([3a3e877](https://github.com/mruby/mruby/commit/3a3e877)) +- Better handling of binary strings, e.g. String#b ([b0127f0](https://github.com/mruby/mruby/commit/b0127f0)) +- Hash `to_s` format has changed ([baeeb5e](https://github.com/mruby/mruby/commit/baeeb5e)) +- Some encoding related method such as `#force_encoding` ([e47b4ca](https://github.com/mruby/mruby/commit/e47b4ca)), `#b` ([b0127f0](https://github.com/mruby/mruby/commit/b0127f0)) +- Constant folding for `String#+` ([6687bdd](https://github.com/mruby/mruby/commit/6687bdd)) +- Remove Float bit-operation ([db8368f](https://github.com/mruby/mruby/commit/db8368f)) +- use SWAR technique for strlen performance ([cbb31e6](https://github.com/mruby/mruby/commit/cbb31e6)) +- use merge sort for `Array#sort` ([5bd63d6](https://github.com/mruby/mruby/commit/5bd63d6)) + +# Changes in C API + +- pool.c renamed to mempool.c (and mrb_pool to mrb_mempool) ([49525fa](https://github.com/mruby/mruby/commit/49525fa)) +- mrb_pool_value renamed to mrb_irep_pool to reduce confusion ([62ef5db](https://github.com/mruby/mruby/commit/62ef5db)) +- rename BOXNIX_SET_VALUE to BOXNO_SET_VALUE ([#6397](https://github.com/mruby/mruby/pull/6397)) +- `MRB_FROZEN_P()` is replaced by `mrb_frozen_p()` ([c11d18e](https://github.com/mruby/mruby/commit/c11d18e)) +- rename `color` to `gc_color` ([0e79f6b](https://github.com/mruby/mruby/commit/0e79f6b), [1e36d76](https://github.com/mruby/mruby/commit/1e36d76)) +- add `obj->frozen` instead of flags `MRB_SET_FROZEN_FLAG`/`MRB_UNSET_FROZEN_FLAG` ([8276143](https://github.com/mruby/mruby/commit/8276143)) + +# Build & Configuration + +- New Build Target: `test:run:serial`, `test:run:serial:lib`, `test:run:serial:bin` ([#6423](https://github.com/mruby/mruby/pull/6423)) +- New Platform: PlayStation Portable ([#6022](https://github.com/mruby/mruby/pull/6465)) +- New Platform: emscripten ([#6487](https://github.com/mruby/mruby/pull/6487)) +- New Config: no-float (with MRB_NO_FLOAT) ([32200f1](https://github.com/mruby/mruby/commit/32200f1)) + +# Changes in mrbgems + +- **mruby-print**: removed; if you do not use `mruby-io`, mruby use `#print` etc. in the core ([8c8bbd9](https://github.com/mruby/mruby/commit/8c8bbd9)) +- **mruby-enum-lazy**: Add Enumerable::Lazy#grep_v to mruby-enum-lazy ([#6171](https://github.com/mruby/mruby/pull/6171)) +- **mruby-io**: Add `level` argument to `File.dirname` ([#6463](https://github.com/mruby/mruby/pull/6463)) +- **mruby-io**: File.absolute_path? ([#6482](https://github.com/mruby/mruby/pull/6482)) +- **mruby-io**: File.absolute_path ([96113a2](https://github.com/mruby/mruby/commit/96113a2)) +- **mruby-toplevel-ext**: top-level public/private/protected moved to the core ([2a876d2](https://github.com/mruby/mruby/commit/2a876d2)) +- **mruby-metaprog**: method list methods now works according to the visibility ([9229da1](https://github.com/mruby/mruby/commit/9229da1)) +- **mruby-metaprog**: `public_instance_methods`, `private_instance_methods`, `protected_instance_methods` ([9e3e7b2](https://github.com/mruby/mruby/commit/9e3e7b2)) +- **mruby-encoding**: MRB_UTF8_STRING turned on automatically with this gem ([74bdae9](https://github.com/mruby/mruby/commit/74bdae9)) + +# Fixed GitHub Issues + +- [#6173](https://github.com/mruby/mruby/issues/6173) Fails to build with tcc(Tiny C Compiler) +- [#6156](https://github.com/mruby/mruby/issues/6156) '/LIBPATH' issue +- [#6183](https://github.com/mruby/mruby/issues/6183) ".e".to_f returns NAN +- [#6182](https://github.com/mruby/mruby/issues/6182) mrb_read_float() converts "0.3" with a small error compared to strtod() +- [#6210](https://github.com/mruby/mruby/issues/6210) Unary minus seems broken +- [#6255](https://github.com/mruby/mruby/issues/6255) Wrong number of characters in broken UTF-8 string +- [#4038](https://github.com/mruby/mruby/issues/4038) Heap buffer overflow in OP_ENTER +- [#6262](https://github.com/mruby/mruby/issues/6262) Unable to define == for objects when using Array#delete +- [#6267](https://github.com/mruby/mruby/issues/6267) When MRB_UTF8_STRING is enabled, giving byte characters for String#index and String#split gives wrong results +- [#6277](https://github.com/mruby/mruby/issues/6277) MSVC: can't use malloc() in string.c with WIN32_LEAN_AND_MEAN +- [#6240](https://github.com/mruby/mruby/issues/6240) Differentiate between lib and lib64 in the build settings. +- [#6304](https://github.com/mruby/mruby/issues/6304) Calling method_missing with only Kwargs passes arguments incorrectly +- [#6317](https://github.com/mruby/mruby/issues/6317) mrb_gc_register() may cause GC and collect the object being protected +- [#6307](https://github.com/mruby/mruby/issues/6307) Planned change patch for mrb_vm_exec() +- [#6298](https://github.com/mruby/mruby/issues/6298) foo :bar {} is legal in mruby? +- [#6326](https://github.com/mruby/mruby/issues/6326) Detect “Use-after-free” with address sanitizer +- [#5358](https://github.com/mruby/mruby/issues/5358) static warning from getpwnam +- [#6339](https://github.com/mruby/mruby/issues/6339) mrb_ary_delete() may refer to an invalid address (use-after-free) +- [#6346](https://github.com/mruby/mruby/issues/6346) Block kwargs are passed as last positional arg when using yield +- [#6365](https://github.com/mruby/mruby/issues/6365) powl() not available when compiling for Dreamcast +- [#6369](https://github.com/mruby/mruby/issues/6369) 100x Performance Regression from 3.1 +- [#6270](https://github.com/mruby/mruby/issues/6270) NODE_ZSUPER from deeply nested blocks will result in a truncated digits for block index in OP_ARGARY +- [#6297](https://github.com/mruby/mruby/issues/6297) Assigning to a block variable changes the actual block (thus affecting block_given? and yield) +- [#6389](https://github.com/mruby/mruby/issues/6389) instance_exec named block args don't work properly +- [#6388](https://github.com/mruby/mruby/issues/6388) Recent commit broke my windows build +- [#6411](https://github.com/mruby/mruby/issues/6411) Wrong function unwinding when using return in a block +- [#6439](https://github.com/mruby/mruby/issues/6439) OP_JMPUW does not call the ensure block when it jumps to the beginning of the begin block +- [#6441](https://github.com/mruby/mruby/issues/6441) break inside while loop will execute the ensure block outside of the while loop +- [#6453](https://github.com/mruby/mruby/issues/6453) Bigint: incorrect behavior of ^ operator +- [#6452](https://github.com/mruby/mruby/issues/6452) Bigint: weird mod behavior +- [#6451](https://github.com/mruby/mruby/issues/6451) Bigint: incorrect division behavior +- [#6456](https://github.com/mruby/mruby/issues/6456) bigint: bug with division of a small number by a bigint +- [#6466](https://github.com/mruby/mruby/issues/6466) mruby-bin-mruby using Kernel#p and Kernel#print in bintest fails test +- [#6467](https://github.com/mruby/mruby/issues/6467) Heap-Use-After-Free due to Recursive group_by Calls +- [#6471](https://github.com/mruby/mruby/issues/6471) Discrepancy in codegen for binary operations between master branch and 3.3.0 +- [#6477](https://github.com/mruby/mruby/issues/6477) heap-buffer-overflow in mrb_vm_exec +- [#6485](https://github.com/mruby/mruby/issues/6485) Hash#rehash does not check if the hash is frozen +- [#6483](https://github.com/mruby/mruby/issues/6483) Hash#default_proc= accepts arbitrary objects that are not callable +- [#6491](https://github.com/mruby/mruby/issues/6491) Destroy existing string literals when composing string literals + +# Merged Pull Requests (User Visible Ones) + +- [#6171](https://github.com/mruby/mruby/pull/6171) Add Enumerable::Lazy#grep_v to mruby-enum-lazy +- [#6174](https://github.com/mruby/mruby/pull/6174) Fix MRUBY_PACKAGE_DIR in mruby-config.bat +- [#6175](https://github.com/mruby/mruby/pull/6175) Corrected strange conditional in mrb_vm_run() +- [#6176](https://github.com/mruby/mruby/pull/6176) Stop assuming alias proc in CI_PROC_SET() +- [#6177](https://github.com/mruby/mruby/pull/6177) gha: add macOS 14 to the build +- [#6184](https://github.com/mruby/mruby/pull/6184) Remove the L_STOP label +- [#6185](https://github.com/mruby/mruby/pull/6185) Added mrb_callinfo::u.keep_context for clarity +- [#6186](https://github.com/mruby/mruby/pull/6186) Omit NULL check of e->cxt in OP_RETURN_BLK +- [#6191](https://github.com/mruby/mruby/pull/6191) Speed up symbol equality comparison +- [#6192](https://github.com/mruby/mruby/pull/6192) Fix `OP_STOP` with exception +- [#6193](https://github.com/mruby/mruby/pull/6193) Fix wrong assertion in `OP_SENDB`. +- [#6194](https://github.com/mruby/mruby/pull/6194) Simplify the calculation of the number of closures in `MRB_TT_FIBER` +- [#6197](https://github.com/mruby/mruby/pull/6197) Fix int_xor to call flo_xor. +- [#6201](https://github.com/mruby/mruby/pull/6201) tasks/doc.rake: standardize the `rake doc` error messages +- [#6202](https://github.com/mruby/mruby/pull/6202) Remove the `.yardoc` folder with `rake doc:clean:api` +- [#6204](https://github.com/mruby/mruby/pull/6204) Clean up the `.editorconfig` file +- [#6209](https://github.com/mruby/mruby/pull/6209) Minor `.gitignore` clean up +- [#6211](https://github.com/mruby/mruby/pull/6211) Minor `.gitignore` clean up; order entries +- [#6216](https://github.com/mruby/mruby/pull/6216) Shared empty `iv_tbl` of module +- [#6217](https://github.com/mruby/mruby/pull/6217) Strict declaration for `mrb_istruct_size()` +- [#6219](https://github.com/mruby/mruby/pull/6219) Avoid assigning a fixed value in the loop +- [#6220](https://github.com/mruby/mruby/pull/6220) Reorganize `mrb_cache_entry` and `mrb_method_t` types +- [#6221](https://github.com/mruby/mruby/pull/6221) Arranging `each_backtrace()` +- [#6222](https://github.com/mruby/mruby/pull/6222) Need to synchronize `dbg->regs` after VM call in `mrdb` +- [#6224](https://github.com/mruby/mruby/pull/6224) `mrb_env_unshare()` to break the link to fiber +- [#6225](https://github.com/mruby/mruby/pull/6225) Revert "Adjust environment when `mrb_exec_irep` happened." +- [#6227](https://github.com/mruby/mruby/pull/6227) fix: `Array#shuffle(!)` result distribution +- [#6228](https://github.com/mruby/mruby/pull/6228) Revert "`env` referred from top-level callinfo should not be unshared; fix #4019" +- [#6230](https://github.com/mruby/mruby/pull/6230) Fix status of fiber after switched by exception raised +- [#6231](https://github.com/mruby/mruby/pull/6231) Add a way to let other gems handle closing of fds in mruby-io +- [#6232](https://github.com/mruby/mruby/pull/6232) Fold the code for freeing `env` +- [#6233](https://github.com/mruby/mruby/pull/6233) Free stack memory at end of fiber +- [#6235](https://github.com/mruby/mruby/pull/6235) fix `Array#delete` always firing the block when deleting `nil` +- [#6236](https://github.com/mruby/mruby/pull/6236) unify the code for filter methods (and speed up `#reject!`) +- [#6237](https://github.com/mruby/mruby/pull/6237) Stricter env objects to attach to ci +- [#6238](https://github.com/mruby/mruby/pull/6238) Minimize zero initialization of the stack +- [#6243](https://github.com/mruby/mruby/pull/6243) Fixed base64 decoding in `mruby-pack` +- [#6244](https://github.com/mruby/mruby/pull/6244) Revise scope of role of `mrb_vm_run()` +- [#6246](https://github.com/mruby/mruby/pull/6246) Fix typo in `test/t/hash.rb` +- [#6249](https://github.com/mruby/mruby/pull/6249) Fix spelling in `src/vm.c` +- [#6250](https://github.com/mruby/mruby/pull/6250) Fix spelling +- [#6251](https://github.com/mruby/mruby/pull/6251) Clean up root move `CODEOWNERS` to `.github` directory +- [#6253](https://github.com/mruby/mruby/pull/6253) Allow recycling fibers by GC if not referenced directly +- [#6256](https://github.com/mruby/mruby/pull/6256) Update documentation for `mrb_top_run()` +- [#6257](https://github.com/mruby/mruby/pull/6257) fix some mrbconf.md typos +- [#6260](https://github.com/mruby/mruby/pull/6260) Remove `exc_caught` from `mrb_vm_exec()` +- [#6261](https://github.com/mruby/mruby/pull/6261) fix: `to_a` integer ranges with `begin > end` failing +- [#6263](https://github.com/mruby/mruby/pull/6263) fix: `Array#delete` mistakenly calling block even if not passed +- [#6264](https://github.com/mruby/mruby/pull/6264) Must pass keyword arguments for `Kernel#to_enum` +- [#6265](https://github.com/mruby/mruby/pull/6265) Fixes `Dir.children` and `Dir.each_child` +- [#6266](https://github.com/mruby/mruby/pull/6266) Passes the nonexistent key as a block argument in `Array#delete` +- [#6273](https://github.com/mruby/mruby/pull/6273) Improvements to `mrb_protect_atexit()` +- [#6275](https://github.com/mruby/mruby/pull/6275) Fixed `Binding#eval` that failed to assign to the same variable +- [#6276](https://github.com/mruby/mruby/pull/6276) Always run `atexit` on the top-level call frame +- [#6279](https://github.com/mruby/mruby/pull/6279) Include headers for malloc() explicitly; fix #6277 +- [#6280](https://github.com/mruby/mruby/pull/6280) Remove `MRB_ENV_CLOSED` flag +- [#6281](https://github.com/mruby/mruby/pull/6281) Fixes local variables in `mruby-binding`. +- [#6283](https://github.com/mruby/mruby/pull/6283) Simplify `uvenv()` +- [#6288](https://github.com/mruby/mruby/pull/6288) Detach `env` of ci explicitly on atexit +- [#6289](https://github.com/mruby/mruby/pull/6289) Simplify `OP_RETURN_BLK` and `OP_BREAK` +- [#6290](https://github.com/mruby/mruby/pull/6290) Allow to change the output directory name of the `libmruby` file +- [#6293](https://github.com/mruby/mruby/pull/6293) Changed the instruction table in `opcode.md` +- [#6294](https://github.com/mruby/mruby/pull/6294) Optimise `mrb_iv_get` +- [#6302](https://github.com/mruby/mruby/pull/6302) Minor cleanup in mrb_str_init +- [#6303](https://github.com/mruby/mruby/pull/6303) mrb_str_aset_m() should return replace instead of str +- [#6305](https://github.com/mruby/mruby/pull/6305) Protect keyword arguments in `prepare_missing()` +- [#6308](https://github.com/mruby/mruby/pull/6308) Assume that `MRB_CATCH()` has `mrb->exc` set +- [#6310](https://github.com/mruby/mruby/pull/6310) Doubling the call stack when extending it +- [#6311](https://github.com/mruby/mruby/pull/6311) Added fast-path for positional arguments less than 15 in `OP_SEND` +- [#6312](https://github.com/mruby/mruby/pull/6312) Omit error checking at `OP_RETURN`, `OP_RETURN_BLK` and `OP_BREAK` +- [#6313](https://github.com/mruby/mruby/pull/6313) Fix wrong column number in opcode.md +- [#6314](https://github.com/mruby/mruby/pull/6314) Optimize even?/odd? for big integers +- [#6318](https://github.com/mruby/mruby/pull/6318) Shrink variables in `mrb_vm_exec()` +- [#6320](https://github.com/mruby/mruby/pull/6320) Shrinking the code in `OP_BREAK` and `OP_RETURN_BLK` +- [#6321](https://github.com/mruby/mruby/pull/6321) Avoid warnings in `lib/**/*.rb` +- [#6322](https://github.com/mruby/mruby/pull/6322) Fix mrb_ro_data_p on Intel Mac +- [#6324](https://github.com/mruby/mruby/pull/6324) Remove `localjump_error()` +- [#6327](https://github.com/mruby/mruby/pull/6327) fix ncurses linking issues +- [#6328](https://github.com/mruby/mruby/pull/6328) Fix use-after-free in `obj_free()` for env objects +- [#6329](https://github.com/mruby/mruby/pull/6329) Fix use-after-free in `mrb_obj_alloc()` +- [#6330](https://github.com/mruby/mruby/pull/6330) Add a precondition to call `mrb_env_unshare()`. +- [#6331](https://github.com/mruby/mruby/pull/6331) Restore the GC arena with tests +- [#6332](https://github.com/mruby/mruby/pull/6332) Must not depend on the “host” build to generate `mruby-compiler/core/y.tab.c` +- [#6333](https://github.com/mruby/mruby/pull/6333) Reduce the number of branch instructions in the `heap_p()` +- [#6335](https://github.com/mruby/mruby/pull/6335) Add `return_ci` in `CHECKPOINT_MAIN()` of `OP_RETURN` +- [#6338](https://github.com/mruby/mruby/pull/6338) Need to place static proc objects into 8-byte alignments +- [#6340](https://github.com/mruby/mruby/pull/6340) Fix use-after-free for `Array#<=>` +- [#6341](https://github.com/mruby/mruby/pull/6341) Need to restore the GC arena after some function calls +- [#6344](https://github.com/mruby/mruby/pull/6344) prefer using `mrb_yield` to call block arguments +- [#6347](https://github.com/mruby/mruby/pull/6347) codegen.c,parse.y: remove flattening of `yield` arguments; fix #6346 +- [#6348](https://github.com/mruby/mruby/pull/6348) Cancel the warning disablement +- [#6349](https://github.com/mruby/mruby/pull/6349) Perform GC before deleting directories +- [#6350](https://github.com/mruby/mruby/pull/6350) Fixed character encoding conversion function mismatch +- [#6351](https://github.com/mruby/mruby/pull/6351) Remove unnecessary `mrb_gc_arena_restore()` +- [#6353](https://github.com/mruby/mruby/pull/6353) Fix use-after-free in `mrb_ary_delete()` +- [#6356](https://github.com/mruby/mruby/pull/6356) Making splat argument objects invisible from Ruby side +- [#6373](https://github.com/mruby/mruby/pull/6373) Add build config for Milk-V Duo (RISC-V Linux) board +- [#6382](https://github.com/mruby/mruby/pull/6382) Make array objects invisible in `mrb_gc_register()` +- [#6385](https://github.com/mruby/mruby/pull/6385) Small improvements for `mrb_gc_register()` +- [#6386](https://github.com/mruby/mruby/pull/6386) Avoid calling `mrb_gv_set()` from `mrb_gc_unregister()` +- [#6387](https://github.com/mruby/mruby/pull/6387) Small improvements for `mrb_gc_unregister()` +- [#6390](https://github.com/mruby/mruby/pull/6390) Fix use-after-free by `mrb_gc_unregistor()` +- [#6391](https://github.com/mruby/mruby/pull/6391) Fixed argument forwarding in `instance_exec` +- [#6392](https://github.com/mruby/mruby/pull/6392) Fix argument forwarding in `mrb_exec_irep()` +- [#6393](https://github.com/mruby/mruby/pull/6393) Follow-up to #6391 +- [#6395](https://github.com/mruby/mruby/pull/6395) Storing method-id inside Symbol#to_proc +- [#6396](https://github.com/mruby/mruby/pull/6396) Milk-V Build Config: update GPIO gem URL +- [#6397](https://github.com/mruby/mruby/pull/6397) boxing_no.h: rename BOXNIX_SET_VALUE -> BOXNO_SET_VALUE +- [#6399](https://github.com/mruby/mruby/pull/6399) Add macOS 15 to the build +- [#6405](https://github.com/mruby/mruby/pull/6405) `io_read`: use `%i` instead of `%d` in call to `mrb_raisef` +- [#6407](https://github.com/mruby/mruby/pull/6407) Allow to exclude specific files in `rake install` +- [#6408](https://github.com/mruby/mruby/pull/6408) Improve compliance with C++ standards +- [#6410](https://github.com/mruby/mruby/pull/6410) Put `#include ` in `parse.y` +- [#6412](https://github.com/mruby/mruby/pull/6412) Distinguish the call frame of the generator with `OP_RETURN_BLK` +- [#6413](https://github.com/mruby/mruby/pull/6413) Add links to documentation in `README.md` +- [#6415](https://github.com/mruby/mruby/pull/6415) Fix numbered parameters when used as a singleton +- [#6416](https://github.com/mruby/mruby/pull/6416) Optimize the "new" method's iseq +- [#6419](https://github.com/mruby/mruby/pull/6419) Follow Ruby's behavior for numbered parameters in -> {} +- [#6420](https://github.com/mruby/mruby/pull/6420) Update `labeler.yml`: add label for the `tools` directory +- [#6422](https://github.com/mruby/mruby/pull/6422) Add annotations for function names defined in the preprocessor +- [#6423](https://github.com/mruby/mruby/pull/6423) Adding a serialized test task +- [#6427](https://github.com/mruby/mruby/pull/6427) Need to update `ci` variable after re-entry to VM +- [#6428](https://github.com/mruby/mruby/pull/6428) Change the limits of OP_ADDI and OP_SUBI from 0-127 to 0-255. +- [#6429](https://github.com/mruby/mruby/pull/6429) Fix numbered parameters when used as hash keys +- [#6432](https://github.com/mruby/mruby/pull/6432) Moving code in macro arguments out of macros +- [#6434](https://github.com/mruby/mruby/pull/6434) Added document "Layout of the mruby filesystem" +- [#6436](https://github.com/mruby/mruby/pull/6436) Make `rake doc:update-index` prettier friendly +- [#6437](https://github.com/mruby/mruby/pull/6437) Add more details to the pre-commit config +- [#6438](https://github.com/mruby/mruby/pull/6438) Remove unused `MRuby::Build#list_install_excludes` method +- [#6440](https://github.com/mruby/mruby/pull/6440) Fix `redo` keyword +- [#6442](https://github.com/mruby/mruby/pull/6442) Fixed wrong range condition in `OP_JMPUW` +- [#6443](https://github.com/mruby/mruby/pull/6443) Fix NODE_NEGATE for bigints +- [#6444](https://github.com/mruby/mruby/pull/6444) Add test cases for bigints +- [#6446](https://github.com/mruby/mruby/pull/6446) Omit the `_WIN64` definition check +- [#6447](https://github.com/mruby/mruby/pull/6447) Fixed `File.expand_path` +- [#6448](https://github.com/mruby/mruby/pull/6448) Suppress presym in `mruby/ext/io.h` file +- [#6449](https://github.com/mruby/mruby/pull/6449) Using presym in the `mruby-io/src/file_test.c` file +- [#6450](https://github.com/mruby/mruby/pull/6450) Change `MRB_WITH_IO_PREAD_PWRITE` configuration name +- [#6454](https://github.com/mruby/mruby/pull/6454) mruby-bigint: handle rhs bigint in int_mod and int_divmod functions +- [#6455](https://github.com/mruby/mruby/pull/6455) mruby-bigint: fix rounding behavior in mpz_mdiv and mpz_mdivmod functions +- [#6457](https://github.com/mruby/mruby/pull/6457) bigint: fix bug with division of a small number by a bigint +- [#6459](https://github.com/mruby/mruby/pull/6459) `FileTest` is a module +- [#6461](https://github.com/mruby/mruby/pull/6461) To create a release package file in draft +- [#6462](https://github.com/mruby/mruby/pull/6462) Properly cast the return value of `memchr()` +- [#6463](https://github.com/mruby/mruby/pull/6463) Add `level` argument to `File.dirname` +- [#6465](https://github.com/mruby/mruby/pull/6465) Add initial PlayStation portable crossbuild support +- [#6468](https://github.com/mruby/mruby/pull/6468) Fixed missing changes to `IB_FIND_BY_KEY()` parameter names +- [#6469](https://github.com/mruby/mruby/pull/6469) Add more `const` qualifier for `RProc` +- [#6472](https://github.com/mruby/mruby/pull/6472) Moved tests for `Integer#quo` +- [#6473](https://github.com/mruby/mruby/pull/6473) Hide `mpz_and()` symbol +- [#6473](https://github.com/mruby/mruby/pull/6473) Hide mpz_and() symbol +- [#6474](https://github.com/mruby/mruby/pull/6474) Avoid array object creation with “unknown keyword” error +- [#6475](https://github.com/mruby/mruby/pull/6475) Don't include deleted mruby-print +- [#6478](https://github.com/mruby/mruby/pull/6478) Fixed buffer overrun in function `chars2bytes()` +- [#6479](https://github.com/mruby/mruby/pull/6479) Reimplementation of `File.expand_path` method +- [#6482](https://github.com/mruby/mruby/pull/6482) Add `File.absolute_path?` method +- [#6487](https://github.com/mruby/mruby/pull/6487) Add Emscripten toolchain & build_config diff --git a/doc/mruby4.0.md b/doc/mruby4.0.md new file mode 100644 index 0000000000..200b58b9e3 --- /dev/null +++ b/doc/mruby4.0.md @@ -0,0 +1,367 @@ +# User visible changes in `mruby4.0` from `mruby3.4` + +"**_NOTE_**:" are changes to be aware of. + +# The language + +## Pattern Matching + +mruby now supports pattern matching (case/in) syntax: + +- Basic pattern matching with `case`/`in` syntax ([dadfac6](https://github.com/mruby/mruby/commit/dadfac6)) +- Array pattern matching ([ec67fd9](https://github.com/mruby/mruby/commit/ec67fd9)) +- Hash pattern matching ([2147263](https://github.com/mruby/mruby/commit/2147263)) +- Find pattern matching (`[*pre, target, *post]`) ([6c4d98b](https://github.com/mruby/mruby/commit/6c4d98b)) +- Pin operator (`^variable`) ([1de6340](https://github.com/mruby/mruby/commit/1de6340)) +- Guard clauses (`if`/`unless` conditions) ([07ac110](https://github.com/mruby/mruby/commit/07ac110)) +- One-line pattern matching (`expr in pattern`) ([e76ce24](https://github.com/mruby/mruby/commit/e76ce24)) +- Brace-less hash pattern support ([e8096bf](https://github.com/mruby/mruby/commit/e8096bf)) + +## Other Language Changes + +- `&nil` in formal parameters to explicitly opt out of block arguments ([b07518e](https://github.com/mruby/mruby/commit/b07518e)) +- Trailing comma in method definition parameters: `def foo(a, b,)` ([f78334b](https://github.com/mruby/mruby/commit/f78334b)) +- Array/Hash/String subclasses can now override `[]` and `[]=` methods ([#6675](https://github.com/mruby/mruby/pull/6675)) +- `OP_SETIDX` optimization for Array and Hash ([ddd8fe1](https://github.com/mruby/mruby/commit/ddd8fe1)) +- `case`/`in` without `else` now raises `NoMatchingPatternError` ([d8de35b](https://github.com/mruby/mruby/commit/d8de35b)) +- Allow compound statement in parenthesized argument context ([919cbd8](https://github.com/mruby/mruby/commit/919cbd8)) + +# Changes in C API + +- **_NOTE_**: `mrb_alloca()` renamed to `mrb_temp_alloc()` ([7fe5c2e](https://github.com/mruby/mruby/commit/7fe5c2e)) +- **_NOTE_**: `mruby/ext/io.h` renamed to `mruby/io.h` ([2813f79](https://github.com/mruby/mruby/commit/2813f79)) +- `mrb_gc_add_region()` for contiguous heap region support ([072855a](https://github.com/mruby/mruby/commit/072855a)) +- `mrb_class_outer()` to get the outer class/module ([3a1b771](https://github.com/mruby/mruby/commit/3a1b771)) +- `MRB_ENSURE()` macro for exception-safe cleanup ([3ac682b](https://github.com/mruby/mruby/commit/3ac682b)) +- `mrb_time_get_tm()` for accessing struct tm ([daaaafe](https://github.com/mruby/mruby/commit/daaaafe)) +- `MRB_OPEN_FAILURE()` macro for checking mrb_open result ([40b0cb9](https://github.com/mruby/mruby/commit/40b0cb9)) +- `mrb_print_error()` now handles NULL gracefully ([8e50a45](https://github.com/mruby/mruby/commit/8e50a45)) +- `mrb_open()` returns mrb_state with exc set on init failure ([05ffe0c](https://github.com/mruby/mruby/commit/05ffe0c)) +- `mrb_utf8_to_buf()` for UTF-8 encoding consolidation ([7e28e68](https://github.com/mruby/mruby/commit/7e28e68)) +- `kh_is_end()` macro for safe khash iteration ([893cc75](https://github.com/mruby/mruby/commit/893cc75)) +- `mrb_bigint_p()` always defined regardless of bigint gem presence ([6c4a8c0](https://github.com/mruby/mruby/commit/6c4a8c0)) +- `RInteger` and `RFloat` added to `RVALUE` union ([13dbca0](https://github.com/mruby/mruby/commit/13dbca0)) + +# ROM Method Tables + +All built-in classes and most extension gems now use read-only method +tables stored in `.rodata` instead of heap-allocated hash tables. Method +definitions no longer consume heap memory, significantly reducing memory +footprint for embedded use. + +Core classes converted: BasicObject, Object, Module, Class, Kernel, +String, Array, Hash, Numeric, Integer, Float, NilClass, TrueClass, +FalseClass, Range, Symbol, Exception, Proc. + +Extension gems converted: mruby-string-ext, mruby-array-ext, mruby-set, +mruby-struct, mruby-class-ext, mruby-numeric-ext, mruby-random, +mruby-kernel-ext, mruby-complex, mruby-rational, mruby-io, mruby-socket, +mruby-method, mruby-metaprog, mruby-time, mruby-hash-ext, mruby-proc-ext, +mruby-symbol-ext, mruby-range-ext, mruby-object-ext. + +# GC and Memory + +- **_NOTE_**: `MRB_NO_PRESYM` removed; presym is now always enabled ([81689045](https://github.com/mruby/mruby/commit/81689045)) +- Replace `gcnext` gray linked list with fixed-size gray stack, reducing per-object overhead ([31fea170](https://github.com/mruby/mruby/commit/31fea170)) +- `mrb_gc_add_region()` for providing contiguous memory buffers as GC heap pages ([072855a](https://github.com/mruby/mruby/commit/072855a)) +- Chunk-based pool for symbol string allocation ([e05bd8f](https://github.com/mruby/mruby/commit/e05bd8f)) +- Reduce `IV_INITIAL_SIZE` from 4 to 2 ([6bd1f51](https://github.com/mruby/mruby/commit/6bd1f51)) +- Lossless float encoding using rotation in word boxing ([b6148c8](https://github.com/mruby/mruby/commit/b6148c8)) +- Lossless rotation encoding for 32-bit float32 word boxing ([14a5cfb](https://github.com/mruby/mruby/commit/14a5cfb)) +- Consolidated irep allocation for .mrb loading ([74fb045](https://github.com/mruby/mruby/commit/74fb045)) +- Object shapes (hidden classes) for `MRB_TT_OBJECT` IV storage, sharing key layouts across objects with the same instance variable assignment order ([8d10056](https://github.com/mruby/mruby/commit/8d10056)) + +# Build & Configuration + +- **_NOTE_**: `MRB_WORDBOX_NO_FLOAT_TRUNCATE` renamed to `MRB_WORDBOX_NO_INLINE_FLOAT` (old name still works) ([59e1fe2](https://github.com/mruby/mruby/commit/59e1fe2)) +- **_NOTE_**: `MRB_INT64` on 32-bit now requires `MRB_NO_BOXING` (other boxing modes cannot guarantee alignment for heap-allocated 64-bit integers) ([eaaa66b](https://github.com/mruby/mruby/commit/eaaa66b)) +- Amalgamation support via `rake amalgam` task ([d995ca2](https://github.com/mruby/mruby/commit/d995ca2)) +- New Platform: Cosmopolitan Libc ([#6681](https://github.com/mruby/mruby/pull/6681)) +- Emscripten: use native WASM exception handling ([ca364e3](https://github.com/mruby/mruby/commit/ca364e3)) +- HAL (Hardware Abstraction Layer) for platform abstraction in mruby-io, mruby-socket, mruby-dir, mruby-task ([74ca22f](https://github.com/mruby/mruby/commit/74ca22f)) +- `MRUBY_MIRB_READLINE` environment variable to control readline library selection ([0aafb83](https://github.com/mruby/mruby/commit/0aafb83)) +- MSYS2 drive letter support in build script ([77f6ffe](https://github.com/mruby/mruby/commit/77f6ffe)) +- Inter-gem headers separated from external API headers ([#6671](https://github.com/mruby/mruby/pull/6671)) + +# Changes in mrbgems + +## New Gems + +- **mruby-task**: Cooperative multitasking with preemptive scheduling ([ae0d7a0](https://github.com/mruby/mruby/commit/ae0d7a0)) +- **mruby-benchmark**: Benchmarking gem ([2f40f3d](https://github.com/mruby/mruby/commit/2f40f3d)) +- **mruby-strftime**: Time#strftime implementation ([b31e22f](https://github.com/mruby/mruby/commit/b31e22f)) + +## mruby-bin-mirb Improvements + +- Custom multi-line editor replacing readline ([527018c](https://github.com/mruby/mruby/commit/527018c)) +- Syntax highlighting for keywords, strings, result values, hash key symbols ([624272b](https://github.com/mruby/mruby/commit/624272b), [1713d4a](https://github.com/mruby/mruby/commit/1713d4a)) +- Automatic light/dark theme detection via OSC 11 ([db4c8d9](https://github.com/mruby/mruby/commit/db4c8d9)) +- Tab completion support ([2f15282](https://github.com/mruby/mruby/commit/2f15282)) +- Colored output for prompts and errors ([b36e0b4](https://github.com/mruby/mruby/commit/b36e0b4)) +- Auto-indentation and auto-dedent ([d52f318](https://github.com/mruby/mruby/commit/d52f318), [e901b6d](https://github.com/mruby/mruby/commit/e901b6d)) +- Command history with Up/Down navigation ([5f85c1b](https://github.com/mruby/mruby/commit/5f85c1b)) +- Line numbers in multi-line prompts ([5a3f0e2](https://github.com/mruby/mruby/commit/5a3f0e2)) +- UTF-8 multibyte character support ([4a97da3](https://github.com/mruby/mruby/commit/4a97da3)) + +## mruby-bigint Improvements + +- Toom-3 multiplication for large numbers ([99620804](https://github.com/mruby/mruby/commit/99620804)) +- Karatsuba multiplication for medium-sized numbers ([85e81072](https://github.com/mruby/mruby/commit/85e81072)) +- Balance multiplication for asymmetric operands ([0220ec2b](https://github.com/mruby/mruby/commit/0220ec2b)) +- Divide-and-conquer optimization for `to_s` ([990ff90f](https://github.com/mruby/mruby/commit/990ff90f)) +- Consolidated mpn layer for low-level limb operations ([9ef3362f](https://github.com/mruby/mruby/commit/9ef3362f)) +- Always use 32-bit limbs by default ([c747c77f](https://github.com/mruby/mruby/commit/c747c77f)) + +## Other Gem Changes + +- **_NOTE_**: `Hash#deconstruct_keys` removed for CRuby compatibility ([34b9412](https://github.com/mruby/mruby/commit/34b9412)) +- **mruby-enum-lazy**: Fix `Lazy#flat_map` to handle non-enumerable block return values ([#6765](https://github.com/mruby/mruby/pull/6765)) +- **mruby-array-ext**: Add `Array#find` and `Array#rfind` methods +- **mruby-io**: Add `IO#putc` and `Kernel#putc` ([baff6e6](https://github.com/mruby/mruby/commit/baff6e6)) +- **mruby-random**: Replace xoshiro with PCG for better memory efficiency ([f1bab01](https://github.com/mruby/mruby/commit/f1bab01)) +- **mruby-compiler**: Variable-sized AST nodes for reduced memory usage +- **mruby-compiler**: `no_return_value` context flag for script optimization ([613b03a](https://github.com/mruby/mruby/commit/613b03a)) +- `initialize_copy` and `respond_to_missing?` defined as private ([#6708](https://github.com/mruby/mruby/pull/6708)) +- Struct keyword argument initialization ([#6574](https://github.com/mruby/mruby/pull/6574)) + +# Compiler Improvements + +- Variable-sized AST nodes for reduced memory consumption ([821b989](https://github.com/mruby/mruby/commit/821b989)) +- Pattern matching bytecode optimizations ([21d4135](https://github.com/mruby/mruby/commit/21d4135)) +- Optimized masgn to generate literals directly into target registers ([fb5d966](https://github.com/mruby/mruby/commit/fb5d966)) +- Optimized splat of literal arrays in args/literals ([1cb8d73](https://github.com/mruby/mruby/commit/1cb8d73)) +- Early termination after too many parse errors ([510ebd7](https://github.com/mruby/mruby/commit/510ebd7)) +- Chunk array literals at 64 elements to reduce register pressure ([f98d641](https://github.com/mruby/mruby/commit/f98d641)) +- Chunk `%w()` and `%i()` literals to reduce register pressure ([62cf0dc](https://github.com/mruby/mruby/commit/62cf0dc)) + +# VM Optimizations + +New super-instructions that fuse common opcode sequences to reduce bytecode size and improve performance: + +- `OP_SEND0`/`OP_SSEND0`: Zero-argument method call, avoiding argument count setup ([9123ef4](https://github.com/mruby/mruby/commit/9123ef4)) +- `OP_TDEF`/`OP_SDEF`: Fused method definition combining TCLASS/SCLASS+METHOD+DEF into single instruction, saving 4 bytes per method ([8d4f47e](https://github.com/mruby/mruby/commit/8d4f47e)) +- `OP_GETIDX0`: Fast path for `array[0]` and `Array#first` access ([680f7ec](https://github.com/mruby/mruby/commit/680f7ec)) +- `OP_ADDILV`/`OP_SUBILV`: Local variable increment/decrement fusion for `i += n` patterns ([43f64b9](https://github.com/mruby/mruby/commit/43f64b9)) +- `OP_RETSELF`: Single-byte instruction for `return self` pattern ([a71db8c](https://github.com/mruby/mruby/commit/a71db8c)) +- `OP_RETNIL`: Single-byte instruction for `return nil` pattern ([64e30bf](https://github.com/mruby/mruby/commit/64e30bf)) +- `OP_RETTRUE`/`OP_RETFALSE`: Single-byte instructions for `return true`/`return false` patterns ([0b15727](https://github.com/mruby/mruby/commit/0b15727)) +- `OP_MATCHERR`: Pattern matching error with conditional execution ([944168a](https://github.com/mruby/mruby/commit/944168a)) +- `OP_BLKCALL`: Direct block call for `yield`, bypassing method dispatch (13-17% faster) ([3aa2872](https://github.com/mruby/mruby/commit/3aa2872)) + +Other optimizations: + +- 1.5x stack growth instead of linear growth for reduced reallocations ([f7988c93](https://github.com/mruby/mruby/commit/f7988c93)) +- Skip keyword argument hash duplication ([5970e350](https://github.com/mruby/mruby/commit/5970e350)) + +# Fixed GitHub Issues + +- [#5531](https://github.com/mruby/mruby/issues/5531) Hash recursion detection +- [#6506](https://github.com/mruby/mruby/issues/6506) Constant lookup in singleton class +- [#6507](https://github.com/mruby/mruby/issues/6507) tally multi-values +- [#6508](https://github.com/mruby/mruby/issues/6508) Enumerable#sum index +- [#6509](https://github.com/mruby/mruby/issues/6509) scope_new nregs initialization +- [#6515](https://github.com/mruby/mruby/issues/6515) y.tab.c in repository +- [#6516](https://github.com/mruby/mruby/issues/6516) Private backquote +- [#6554](https://github.com/mruby/mruby/issues/6554) Socket private #initialize +- [#6570](https://github.com/mruby/mruby/issues/6570) instance_eval crash +- [#6613](https://github.com/mruby/mruby/issues/6613) const_added hook during bootstrapping +- [#6635](https://github.com/mruby/mruby/issues/6635), [#6636](https://github.com/mruby/mruby/issues/6636) Colon3 constant lookup +- [#6637](https://github.com/mruby/mruby/issues/6637) arm64 mingw64 builtin setjmp/longjmp +- [#6642](https://github.com/mruby/mruby/issues/6642) Task segfault when sleep called from C +- [#6645](https://github.com/mruby/mruby/issues/6645) Set memory leak from double initialization +- [#6646](https://github.com/mruby/mruby/issues/6646) IO#gets negative length +- [#6647](https://github.com/mruby/mruby/issues/6647) IO#ungetc buffer overflow +- [#6648](https://github.com/mruby/mruby/issues/6648) sprintf buffer overread +- [#6649](https://github.com/mruby/mruby/issues/6649) Array#sort! use-after-realloc +- [#6650](https://github.com/mruby/mruby/issues/6650) Array#fill validation +- [#6652](https://github.com/mruby/mruby/issues/6652) Array comparison use-after-realloc +- [#6657](https://github.com/mruby/mruby/issues/6657) Exception handling for ||= on class variables +- [#6659](https://github.com/mruby/mruby/issues/6659) Super with keyword arguments +- [#6660](https://github.com/mruby/mruby/issues/6660) Regression on struct/array/hash == override with super +- [#6662](https://github.com/mruby/mruby/issues/6662) Array set operations use-after-free +- [#6664](https://github.com/mruby/mruby/issues/6664) Set#flatten memory leak +- [#6666](https://github.com/mruby/mruby/issues/6666) Regexp literal with encoding +- [#6668](https://github.com/mruby/mruby/issues/6668) Method#== for aliased methods and comparison bug +- [#6671](https://github.com/mruby/mruby/issues/6671) Separate inter-gem headers from external API headers +- [#6674](https://github.com/mruby/mruby/issues/6674) Document pattern matching limitations +- [#6675](https://github.com/mruby/mruby/issues/6675) Allow Hash#[] to be aliased again +- [#6687](https://github.com/mruby/mruby/issues/6687) Expand MRB_SYM/MRB_GVSYM support for symbols with special characters +- [#6698](https://github.com/mruby/mruby/issues/6698) Bigint tests fail on architectures other than x86_64 and i386 +- [#6701](https://github.com/mruby/mruby/issues/6701) Heap-use-after-free in mrb_vm_exec involving mruby-rational / mruby-bigint +- [#6702](https://github.com/mruby/mruby/issues/6702) mruby-bigint doesn't compile in C++ project +- [#6704](https://github.com/mruby/mruby/issues/6704) Heap-buffer-overflow in mrb_vm_exec via malformed source code +- [#6705](https://github.com/mruby/mruby/issues/6705) Can't get outer class of an object in C +- [#6713](https://github.com/mruby/mruby/issues/6713) mruby-polarssl not work +- [#6720](https://github.com/mruby/mruby/issues/6720) Random float range: different behavior from CRuby +- [#6722](https://github.com/mruby/mruby/issues/6722) RBreak size overflow on 32-bit platforms with MRB_NO_BOXING +- [#6740](https://github.com/mruby/mruby/issues/6740) `%w()`/`%i()` register pressure with large literals +- [#6741](https://github.com/mruby/mruby/issues/6741) `case`/`in` without `else` should raise `NoMatchingPatternError` +- [#6760](https://github.com/mruby/mruby/issues/6760) `mrb_gc_unregister()` not removing all matching entries + +# Merged Pull Requests + +- [#6418](https://github.com/mruby/mruby/pull/6418) Add `ls-lint` with GitHub Actions +- [#6492](https://github.com/mruby/mruby/pull/6492) fix a typo, update specs +- [#6493](https://github.com/mruby/mruby/pull/6493) Fix TYPO in memory.md +- [#6495](https://github.com/mruby/mruby/pull/6495) Remove `MRB_ENDIAN_LOHI()` that is no longer in use +- [#6497](https://github.com/mruby/mruby/pull/6497) gha: update `build.yml` try `windows-2025` image +- [#6498](https://github.com/mruby/mruby/pull/6498) Clean up and standardize the pre-commit config +- [#6501](https://github.com/mruby/mruby/pull/6501) Update pre-commit Node.js version to `v22.14.0 LTS` +- [#6502](https://github.com/mruby/mruby/pull/6502) pre-commit: update prettier to the latest version +- [#6503](https://github.com/mruby/mruby/pull/6503) misc: fix typos +- [#6505](https://github.com/mruby/mruby/pull/6505) mrbgems: fix spelling +- [#6510](https://github.com/mruby/mruby/pull/6510) Fixed class method visibility via `module_function` +- [#6511](https://github.com/mruby/mruby/pull/6511) Exclude the external project "lrama" from pre-commit +- [#6513](https://github.com/mruby/mruby/pull/6513) mruby 3.4.0 released +- [#6517](https://github.com/mruby/mruby/pull/6517) core/codegen.c: remove unneeded duplicate semicolon +- [#6518](https://github.com/mruby/mruby/pull/6518) Change mrbc_args.flags bit width from 2 to 3 +- [#6519](https://github.com/mruby/mruby/pull/6519) Add `tools/lrama` to `.prettierignore` +- [#6520](https://github.com/mruby/mruby/pull/6520) pre-commit: autoupdate and update node LTS version +- [#6521](https://github.com/mruby/mruby/pull/6521) Add codespell config file `.codespellrc` +- [#6522](https://github.com/mruby/mruby/pull/6522) gha: label more files +- [#6523](https://github.com/mruby/mruby/pull/6523) add `rand(Range)` and unify implementations of `Random#rand` and `Kernel#rand` +- [#6524](https://github.com/mruby/mruby/pull/6524) Fix Kernel#p when no argument +- [#6525](https://github.com/mruby/mruby/pull/6525) Skip adding empty input to mirb history +- [#6526](https://github.com/mruby/mruby/pull/6526) Add build config for Luckfox Pico embedded SBC +- [#6528](https://github.com/mruby/mruby/pull/6528) misc: fix spelling +- [#6530](https://github.com/mruby/mruby/pull/6530) Revert "class.c (find_visibility_scope): when callinfo returns, \*ep == NULL; #6512" +- [#6531](https://github.com/mruby/mruby/pull/6531) Improve method table performance by rehashing at 75% load factor +- [#6532](https://github.com/mruby/mruby/pull/6532) Reverted method table optimizations to prioritize memory savings +- [#6533](https://github.com/mruby/mruby/pull/6533) Fix calling `extended` callback +- [#6534](https://github.com/mruby/mruby/pull/6534) Add descriptive comment to mrb_read_float function +- [#6535](https://github.com/mruby/mruby/pull/6535) Added descriptive comments for functions/macros in src/mempool.c +- [#6536](https://github.com/mruby/mruby/pull/6536) Add descriptive comments to public functions in src/debug.c +- [#6537](https://github.com/mruby/mruby/pull/6537) Updated comments in `cdump.c` to remove the `@brief` tag +- [#6539](https://github.com/mruby/mruby/pull/6539) Add descriptive comments for functions in src/load.c +- [#6540](https://github.com/mruby/mruby/pull/6540) Add descriptive comments to MRB_API functions in object.c +- [#6541](https://github.com/mruby/mruby/pull/6541) Add descriptive comments for MRB_API functions in src/array.c +- [#6542](https://github.com/mruby/mruby/pull/6542) Add descriptive comments to MRB_API functions in src/symbol.c +- [#6543](https://github.com/mruby/mruby/pull/6543) Add descriptive comments to several functions in src/dump.c +- [#6544](https://github.com/mruby/mruby/pull/6544) Fix build strings that must be mutable +- [#6545](https://github.com/mruby/mruby/pull/6545) Add descriptive comments for MRB_API functions in src/class.c +- [#6548](https://github.com/mruby/mruby/pull/6548) Add descriptive comments to MRB_API functions in src/etc.c +- [#6549](https://github.com/mruby/mruby/pull/6549) Add descriptive comments to kernel functions +- [#6550](https://github.com/mruby/mruby/pull/6550) Add descriptive comments for MRB_API functions in src/proc.c +- [#6551](https://github.com/mruby/mruby/pull/6551) Add descriptive comments for MRB_API functions in src/state.c +- [#6552](https://github.com/mruby/mruby/pull/6552) Fix: Correct placement of comments in src/variable.c +- [#6553](https://github.com/mruby/mruby/pull/6553) Add descriptive comments for MRB_API functions in src/vm.c +- [#6555](https://github.com/mruby/mruby/pull/6555) `mrb_mt_foreach()` needs to update the pointer at each loop +- [#6556](https://github.com/mruby/mruby/pull/6556) `iv_foreach()` needs to update the pointer at each loop +- [#6560](https://github.com/mruby/mruby/pull/6560) Refactor: Improve Set GC marking and freeing +- [#6561](https://github.com/mruby/mruby/pull/6561) pre-commit updates and fix prettier entrypoint +- [#6562](https://github.com/mruby/mruby/pull/6562) misc: fix spelling word case +- [#6563](https://github.com/mruby/mruby/pull/6563) pre-commit add rubocop with one rule spaces for indentation +- [#6564](https://github.com/mruby/mruby/pull/6564) Remove jumanjihouse pre-commit hooks no longer maintained +- [#6565](https://github.com/mruby/mruby/pull/6565) Rubocop: fix target Ruby version; add two more cops; fix lint error +- [#6566](https://github.com/mruby/mruby/pull/6566) Removed unreferenced variables in `CrossBuild#run_bintest` +- [#6567](https://github.com/mruby/mruby/pull/6567) Avoid array object creation in `cmd_bin` method in bintest +- [#6568](https://github.com/mruby/mruby/pull/6568) mruby-bin-debugger depends on mruby-bin-mrbc in bintest +- [#6569](https://github.com/mruby/mruby/pull/6569) sed s/Mruby/MRuby/g +- [#6571](https://github.com/mruby/mruby/pull/6571) Update limitations.md to add behavior on small hash +- [#6572](https://github.com/mruby/mruby/pull/6572) Add Claude Code GitHub Workflow +- [#6573](https://github.com/mruby/mruby/pull/6573) pre-commit fixes and updates +- [#6574](https://github.com/mruby/mruby/pull/6574) Support initializing structs via keyword arguments +- [#6575](https://github.com/mruby/mruby/pull/6575) Fix typo in file time methods +- [#6581](https://github.com/mruby/mruby/pull/6581) Merge `mrb_obj_iv_inspect()` into `mrb_obj_inspect()` +- [#6582](https://github.com/mruby/mruby/pull/6582) Stricter type tag in `mrb_obj_alloc()` +- [#6583](https://github.com/mruby/mruby/pull/6583) Add fallback to local build_config.rb before using default configuration +- [#6585](https://github.com/mruby/mruby/pull/6585) Fix typo in mruby3.2 docs +- [#6586](https://github.com/mruby/mruby/pull/6586) Makefile: refactor add docs and add command line `help` target +- [#6587](https://github.com/mruby/mruby/pull/6587) Add Set#hash tests +- [#6588](https://github.com/mruby/mruby/pull/6588) Add CodeQL Analysis for GitHub Actions +- [#6589](https://github.com/mruby/mruby/pull/6589) Add pre-commit hook `check-zip-file-is-not-committed` +- [#6591](https://github.com/mruby/mruby/pull/6591) mruby-eval fix license link in README +- [#6593](https://github.com/mruby/mruby/pull/6593) README: Add Contributors Avatars, Star History, Table of Contents +- [#6598](https://github.com/mruby/mruby/pull/6598) Fix heap buffer overflow in `#method_missing` +- [#6599](https://github.com/mruby/mruby/pull/6599) pre-commit: run `markdown-link-check`, `oxipng`, `prettier` manually +- [#6600](https://github.com/mruby/mruby/pull/6600) `dreamcast_shelf build config`: update to use KallistiOS wrappers +- [#6601](https://github.com/mruby/mruby/pull/6601) fix: skip local build_config.rb when working in MRUBY_ROOT +- [#6602](https://github.com/mruby/mruby/pull/6602) Improved iseq annotations for `new` and `!=` +- [#6604](https://github.com/mruby/mruby/pull/6604) pre-commit config updates +- [#6607](https://github.com/mruby/mruby/pull/6607) fix bigint on raspberry pi +- [#6610](https://github.com/mruby/mruby/pull/6610) Extract golden ratio prime into constant +- [#6614](https://github.com/mruby/mruby/pull/6614) Fix uninitialized variable in io_gets causing segmentation fault +- [#6617](https://github.com/mruby/mruby/pull/6617) Fix various minor problems and speed up build +- [#6618](https://github.com/mruby/mruby/pull/6618) Stop generating unnecessary C++ files in mruby-bin-mruby +- [#6621](https://github.com/mruby/mruby/pull/6621) Set up all GEMS before mruby core tasks definition +- [#6624](https://github.com/mruby/mruby/pull/6624) Fixed wrong `MRuby::Build.current` at the top level of `mrbgem.rake` +- [#6628](https://github.com/mruby/mruby/pull/6628) Revert `File.absolute_path` logic +- [#6629](https://github.com/mruby/mruby/pull/6629) pre-commit update +- [#6631](https://github.com/mruby/mruby/pull/6631) Revert "Rakefile: make the whole thing parallel unless SERIAL=1" +- [#6633](https://github.com/mruby/mruby/pull/6633) Fix a heap-buffer-overflow in str strip! methods +- [#6643](https://github.com/mruby/mruby/pull/6643) Fix crash caused by an incorrect node type check in `codegen_masgn` +- [#6651](https://github.com/mruby/mruby/pull/6651) Address stack-use-after-return in the mruby bigint implementation +- [#6653](https://github.com/mruby/mruby/pull/6653) Improve HAL-related components for MinGW +- [#6655](https://github.com/mruby/mruby/pull/6655) Preventing Memory Leaks in `Array#__combination_init` +- [#6656](https://github.com/mruby/mruby/pull/6656) Fix integer overflow in allocation size calculation +- [#6663](https://github.com/mruby/mruby/pull/6663) Added the `kh_is_end()` macro function +- [#6665](https://github.com/mruby/mruby/pull/6665) Fixed use-after-free with `Set#join` +- [#6670](https://github.com/mruby/mruby/pull/6670) Arranging VM dispatch macros +- [#6673](https://github.com/mruby/mruby/pull/6673) Adjust broken license links; clean up Markdown +- [#6677](https://github.com/mruby/mruby/pull/6677) gha: run pre-commit with `--color=always` +- [#6678](https://github.com/mruby/mruby/pull/6678) Put ls-lint and pre-commit in separate workflow files +- [#6679](https://github.com/mruby/mruby/pull/6679) pre-commit autoupdate; update node and prettier +- [#6681](https://github.com/mruby/mruby/pull/6681) Add Cosmopolitan Libc build configuration +- [#6689](https://github.com/mruby/mruby/pull/6689) docs: fix pre-commit manual hooks; fix link +- [#6694](https://github.com/mruby/mruby/pull/6694) Fix mirb build under Cosmopolitan +- [#6695](https://github.com/mruby/mruby/pull/6695) Dependabot: add a cooldown period for new releases +- [#6696](https://github.com/mruby/mruby/pull/6696) Fix parse error with required kwargs and omitted parens +- [#6699](https://github.com/mruby/mruby/pull/6699) Fix mruby-task for PicoRuby Integration +- [#6700](https://github.com/mruby/mruby/pull/6700) Fix float/double pack/unpack on s390x +- [#6706](https://github.com/mruby/mruby/pull/6706) Refactor task class to use symbol IDs +- [#6708](https://github.com/mruby/mruby/pull/6708) `initialize_copy` and `respond_to_missing?` defined as private +- [#6709](https://github.com/mruby/mruby/pull/6709) Add the `MRB_ENSURE()` macro +- [#6711](https://github.com/mruby/mruby/pull/6711) Fix out of bounds read and write in IO.select +- [#6714](https://github.com/mruby/mruby/pull/6714) Fix OP_DEBUG operand type and add NULL check for debug_op_hook +- [#6716](https://github.com/mruby/mruby/pull/6716) Fixes identity for proc object +- [#6717](https://github.com/mruby/mruby/pull/6717) Fix mruby-task: wrapping by critical section and setting initial task receiver +- [#6718](https://github.com/mruby/mruby/pull/6718) Add installation instructions for conda and Homebrew +- [#6723](https://github.com/mruby/mruby/pull/6723) Add `RInteger` and `RFloat` to `RVALUE` +- [#6727](https://github.com/mruby/mruby/pull/6727) Language documentation: update wording of "overloading" section +- [#6729](https://github.com/mruby/mruby/pull/6729) Simplifying dependency addition for gensym task +- [#6730](https://github.com/mruby/mruby/pull/6730) Simplifying presym file generation actions +- [#6733](https://github.com/mruby/mruby/pull/6733) Include `mruby/presym.h` for all source files +- [#6734](https://github.com/mruby/mruby/pull/6734) Chunk array literals at 64 elements to reduce register pressure +- [#6735](https://github.com/mruby/mruby/pull/6735) Prevent full recompilation without changes to presym file +- [#6739](https://github.com/mruby/mruby/pull/6739) Fix MSYS2 build error with drive letters +- [#6743](https://github.com/mruby/mruby/pull/6743) Chunk `%w()` and `%i()` literals to reduce register pressure +- [#6744](https://github.com/mruby/mruby/pull/6744) Raise `NoMatchingPatternError` in `case`/`in` without `else` +- [#6747](https://github.com/mruby/mruby/pull/6747) Correctly handle empty hash as default named argument +- [#6749](https://github.com/mruby/mruby/pull/6749) Fix microcontroller profile +- [#6750](https://github.com/mruby/mruby/pull/6750) Fix out-of-bounds read and divide-by-zero in `Array#product` +- [#6752](https://github.com/mruby/mruby/pull/6752) Fix `attr_reader`-generated methods accepting extra arguments +- [#6753](https://github.com/mruby/mruby/pull/6753) Further optimize `Array#product` +- [#6754](https://github.com/mruby/mruby/pull/6754) Mark `attr_reader` procs as noarg +- [#6755](https://github.com/mruby/mruby/pull/6755) Reload `ci` after `mrb_hash_delete_key()` in keyword argument handling +- [#6756](https://github.com/mruby/mruby/pull/6756) Avoid impact of object modifications caused by `mrb_vm_exec()` calls +- [#6758](https://github.com/mruby/mruby/pull/6758) Don't assign result of `mrb_funcall()` directly to `regs` +- [#6759](https://github.com/mruby/mruby/pull/6759) Define `mrb_bigint_p()` always +- [#6761](https://github.com/mruby/mruby/pull/6761) Fix `mrb_gc_unregister()` to remove all matching entries +- [#6762](https://github.com/mruby/mruby/pull/6762) Write generated test C files atomically to avoid build race condition +- [#6765](https://github.com/mruby/mruby/pull/6765) Fix `Lazy#flat_map` to handle non-enumerable block return values +- [#6767](https://github.com/mruby/mruby/pull/6767) Allow compound statement in parenthesized argument context +- [#6780](https://github.com/mruby/mruby/pull/6780) Fix `String#prepend` with self-referencing arguments +- [#6781](https://github.com/mruby/mruby/pull/6781) Protect `sprintf` format string from mutation during callbacks +- [#6783](https://github.com/mruby/mruby/pull/6783) Pin GitHub Actions workflows to commit hashes + +# Security Fixes + +- Buffer overflow in bigint uadd ([3f2611e](https://github.com/mruby/mruby/commit/3f2611e)) +- Stack buffer overflow in Montgomery reduction ([edce0a3](https://github.com/mruby/mruby/commit/edce0a3)) +- Buffer overflow in pack_uu encoding ([2993302](https://github.com/mruby/mruby/commit/2993302)) +- Buffer overflow in IO#ungetc ([01ab2ff](https://github.com/mruby/mruby/commit/01ab2ff)) +- Heap-buffer-overflow in pattern alternation codegen ([eea9e30](https://github.com/mruby/mruby/commit/eea9e30)) +- Out of bounds read and write in IO.select ([44831711](https://github.com/mruby/mruby/commit/44831711)) +- Off-by-one in bounds check for symbol names and pool strings in load.c ([b3b8c01](https://github.com/mruby/mruby/commit/b3b8c01)) +- Use-after-free in Set operations ([a6b55e7](https://github.com/mruby/mruby/commit/a6b55e7)) +- Use-after-free in Array set operations ([729b84c](https://github.com/mruby/mruby/commit/729b84c)) +- Use-after-free in Set#join ([0e653eb](https://github.com/mruby/mruby/commit/0e653eb)) +- Use-after-realloc in Array#sort! ([eb39897](https://github.com/mruby/mruby/commit/eb39897)) +- Heap-use-after-free in insertion_sort ([099d2c47](https://github.com/mruby/mruby/commit/099d2c47)) +- Integer overflow in str_check_length ([6afff1c3](https://github.com/mruby/mruby/commit/6afff1c3)) +- Integer overflow in Integer#lcm ([070bef24](https://github.com/mruby/mruby/commit/070bef24)) +- Heap buffer overflow in `#method_missing` ([550d10a](https://github.com/mruby/mruby/commit/550d10a)) +- Out-of-bounds read and divide-by-zero in `Array#product` ([8441eaf](https://github.com/mruby/mruby/commit/8441eaf)) +- Heap buffer overflow in `String#prepend` with self-referencing arguments ([18ba026](https://github.com/mruby/mruby/commit/18ba026)) +- Use-after-free in `sprintf` via `to_s` callback mutating format string ([48fc422](https://github.com/mruby/mruby/commit/48fc422)) +- Multiple memory leak fixes in bigint, Set, Array, and Task gems diff --git a/doc/mruby_logo_red_icon.png b/doc/mruby_logo_red_icon.png index 9006c84eb9..e36f34a1f2 100644 Binary files a/doc/mruby_logo_red_icon.png and b/doc/mruby_logo_red_icon.png differ diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000..ef6a2d6d45 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,13 @@ +version: "3.8" +services: + test: + build: + context: . + command: sh -c 'rake deep_clean && rake -m test:run:serial' + environment: + - MRUBY_CONFIG=ci/gcc-clang + - CC=gcc + - CXX=g++ + - LD=gcc + - SKIP=check-executables-have-shebangs + working_dir: /app diff --git a/examples/mrbgems/c_and_ruby_extension_example/README.md b/examples/mrbgems/c_and_ruby_extension_example/README.md index 0b428b0b6e..7678da41dc 100644 --- a/examples/mrbgems/c_and_ruby_extension_example/README.md +++ b/examples/mrbgems/c_and_ruby_extension_example/README.md @@ -1,4 +1,3 @@ -C and Ruby Extension Example -========= +# C and Ruby Extension Example This is an example gem which implements a C and Ruby extension. diff --git a/examples/mrbgems/c_extension_example/README.md b/examples/mrbgems/c_extension_example/README.md index 3803c20654..87ec7bc785 100644 --- a/examples/mrbgems/c_extension_example/README.md +++ b/examples/mrbgems/c_extension_example/README.md @@ -1,4 +1,3 @@ -C Extension Example -========= +# C Extension Example This is an example gem which implements a C extension. diff --git a/examples/mrbgems/cdata_extension_example/README.md b/examples/mrbgems/cdata_extension_example/README.md new file mode 100644 index 0000000000..1739b63e15 --- /dev/null +++ b/examples/mrbgems/cdata_extension_example/README.md @@ -0,0 +1,3 @@ +# C Data Extension Example + +This is an example gem which implements a C extension with Data. diff --git a/examples/mrbgems/cdata_extension_example/mrbgem.rake b/examples/mrbgems/cdata_extension_example/mrbgem.rake new file mode 100644 index 0000000000..b642d0fa7e --- /dev/null +++ b/examples/mrbgems/cdata_extension_example/mrbgem.rake @@ -0,0 +1,23 @@ +MRuby::Gem::Specification.new('cdata_extension_example') do |spec| + spec.license = 'MIT' + spec.author = 'mruby developers' + + # Add compile flags + # spec.cc.flags << '-g' + + # Add cflags to all + # spec.mruby.cc.flags << '-g' + + # Add libraries + # spec.linker.libraries << 'external_lib' + + # Default build files + # spec.rbfiles = Dir.glob("#{dir}/mrblib/*.rb") + # spec.objs = Dir.glob("#{dir}/src/*.{c,cpp,m,asm,S}").map { |f| objfile(f.relative_path_from(dir).pathmap("#{build_dir}/%X")) } + # spec.test_rbfiles = Dir.glob("#{dir}/test/*.rb") + # spec.test_objs = Dir.glob("#{dir}/test/*.{c,cpp,m,asm,S}").map { |f| objfile(f.relative_path_from(dir).pathmap("#{build_dir}/%X")) } + # spec.test_preload = 'test/assert.rb' + + # Values accessible as TEST_ARGS inside test scripts + # spec.test_args = {'tmp_dir' => Dir::tmpdir} +end diff --git a/examples/mrbgems/cdata_extension_example/src/example.c b/examples/mrbgems/cdata_extension_example/src/example.c new file mode 100644 index 0000000000..4be26489dc --- /dev/null +++ b/examples/mrbgems/cdata_extension_example/src/example.c @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include + +static void mrb_foo_free(mrb_state *mrb, void *ptr) { + /* custom destructor */ + mrb_free(mrb, ptr); +} +struct mrb_data_type mrb_foo_type = { "Foo", mrb_foo_free }; + +struct Foo { + int bar; + char baz[32]; +}; + +static mrb_value +mrb_foo_initialize(mrb_state *mrb, mrb_value self) +{ + struct Foo *f; + + f = (struct Foo*)mrb_malloc(mrb, sizeof(struct Foo)); + f->bar = 0; + + DATA_PTR(self) = f; + DATA_TYPE(self) = &mrb_foo_type; + + return self; +} + +static mrb_value +mrb_foo_get_bar(mrb_state *mrb, mrb_value self) +{ + struct Foo *f; + + f = (struct Foo*)mrb_data_get_ptr(mrb, self, &mrb_foo_type); + if (f == NULL) { + mrb_raise(mrb, E_RUNTIME_ERROR, "uninitialized data"); + } + + return mrb_fixnum_value(f->bar); +} + +static mrb_value +mrb_foo_set_bar(mrb_state *mrb, mrb_value self) +{ + struct Foo *f; + int v; + + f = (struct Foo*)mrb_data_get_ptr(mrb, self, &mrb_foo_type); + if (f == NULL) { + mrb_raise(mrb, E_RUNTIME_ERROR, "uninitialized data"); + } + + mrb_get_args(mrb, "i", &v); + + f->bar = v; + + return mrb_fixnum_value(f->bar); +} + +void +mrb_cdata_extension_example_gem_init(mrb_state* mrb) { + struct RClass *class_foo; + + class_foo = mrb_define_class(mrb, "Foo", mrb->object_class); + MRB_SET_INSTANCE_TT(class_foo, MRB_TT_CDATA); + mrb_define_method(mrb, class_foo, "initialize", mrb_foo_initialize, MRB_ARGS_NONE()); + mrb_define_method(mrb, class_foo, "bar", mrb_foo_get_bar, MRB_ARGS_NONE()); + mrb_define_method(mrb, class_foo, "bar=", mrb_foo_set_bar, MRB_ARGS_REQ(1)); +} + +void +mrb_cdata_extension_example_gem_final(mrb_state* mrb) { + /* gem finalizer */ +} diff --git a/examples/mrbgems/cdata_extension_example/test/example.c b/examples/mrbgems/cdata_extension_example/test/example.c new file mode 100644 index 0000000000..271de62609 --- /dev/null +++ b/examples/mrbgems/cdata_extension_example/test/example.c @@ -0,0 +1,7 @@ +#include + +void +mrb_cdata_extension_example_gem_test(mrb_state *mrb) +{ + /* test initializer in C */ +} diff --git a/examples/mrbgems/mruby-YOUR-bigint/TODO-HINT.md b/examples/mrbgems/mruby-YOUR-bigint/TODO-HINT.md new file mode 100644 index 0000000000..4926afe15e --- /dev/null +++ b/examples/mrbgems/mruby-YOUR-bigint/TODO-HINT.md @@ -0,0 +1,39 @@ +# Hints for creating your own bigint GEM + +This example gem, mruby-YOUR-bigint, is available under the Creative Commons Zero License (CC0). + +This file is placed for the purpose of describing hints for creating a `mruby-bigint` compatible GEM to realize multiple integers. + +The file structure in this example is as follows: + +``` ++- mruby-YOUR-bigint/ <- Make this directory public if necessary. + | Change the name of copied directory. + | + +- TODO-HINT.md <- You are currently viewing this file. + | Remove this from copied directory. + | + +- core/ + | | + | +- bigint.c <- Body of the implementation. + | + +- mrbgem.rake <- GEM name is "mruby-bigint". + May be depended on by other GEMs. +``` + +Implementers of their own bigints should copy below this directory to another directory and do the following: + +- Rewrite `spec.author`, `spec.license`, `spec.homepage` and `spec.summary` in `/mrbgem.rake` file to those of your own implementers. +- Implement the respective functions in `/core/bigint.c`. + - Define and use an object structure for `MRB_TT_BIGINT` type-tag. + It is recommended to use `mrb_static_assert_object_size()` to ensure that the size of the object structure is within six words. +- Delete this file from the destination of the copy. + +If you wish to use it as an alternative to the `mruby-bigint` provided by mruby, please leave the GEM name in `/mrbgem.rake` as it is. +This is an important factor when it is depended from other GEMs with `spec.add_dependency 'mruby-bigint'`. + +The name of the top directory of the GEM can be changed arbitrarily. +The name of the Git repository can also be changed arbitrarily. + +Note that there is no need for an initialization function as there is in a normal GEM. +If you need it, create a file `/src/bigint.c` for example, and implement the `mrb_mruby_bigint_gem_init()` function. diff --git a/examples/mrbgems/mruby-YOUR-bigint/core/bigint.c b/examples/mrbgems/mruby-YOUR-bigint/core/bigint.c new file mode 100644 index 0000000000..1174968efc --- /dev/null +++ b/examples/mrbgems/mruby-YOUR-bigint/core/bigint.c @@ -0,0 +1,65 @@ +/* + * If placed under the "mruby/examples/mrbgems/mruby-YOUR-bigint" directory, + * this file is available under the Creative Commons Zero License (CC0). + * Note that file is incomplete. + * + * TODO: If this file is copied and another implementation is written, + * remove this comment block from the copied file. + */ + +#include +#include + +/* + * The "mruby/internal.h" file should be placed after the other mruby header files. + */ +#include + +/* + * Define your own struct RBigint. + * + * - Object type must be MRB_TT_BIGINT. + * - If the structure is named RBigint, MRB_OBJ_ALLOC() can be used as is. + */ +struct RBigint { + /* + * Put MRB_OBJECT_HEADER before the first member of the structure. + */ + MRB_OBJECT_HEADER; + + /* + * Up to 3 words can be freely configured. + */ + size_t len; + size_t capa; + uintptr_t *num; +}; + +/* + * Assert with mrb_static_assert_object_size() that the entire structure is within 6 words. + */ +mrb_static_assert_object_size(struct RBigint); + +/* + * The lower 16 bits of the object flags (`obj->flags`) can be used freely by the GEM author. + */ +#define MY_BIGINT_NEGATIVE_FLAG 1 +#define MY_BIGINT_NEGATIVE_P(obj) ((obj)->flags & MY_BIGINT_NEGATIVE_FLAG) + +/* + * Implement the functions declared in `#ifdef MRUBY_USE_BIGINT ... #endif` in the "mruby/internal.h" file. + */ + +mrb_value +mrb_bint_new_int(mrb_state *mrb, mrb_int x) +{ + struct RBigint *obj = MRB_OBJ_ALLOC(mrb, MRB_TT_BIGINT, mrb->integer_class); + + ... + + return mrb_obj_value(obj); +} + +/* + * The implementation function continues... + */ diff --git a/examples/mrbgems/mruby-YOUR-bigint/mrbgem.rake b/examples/mrbgems/mruby-YOUR-bigint/mrbgem.rake new file mode 100644 index 0000000000..9695a3f838 --- /dev/null +++ b/examples/mrbgems/mruby-YOUR-bigint/mrbgem.rake @@ -0,0 +1,12 @@ +MRuby::Gem::Specification.new('mruby-bigint') do |spec| + spec.author = 'YOUR-NAME-HERE' + spec.license = 'YOUR-LICENSE-HERE' + spec.summary = 'Yet another multi-precision Integer extension' + spec.homepage = 'https://gem.example/for/mruby-YOUR-bigint' + spec.build.defines << 'MRB_USE_BIGINT' + #spec.build.linker.libraries << 'gmp' # when uses libgmp + + spec.build.libmruby_core_objs << Dir.glob(File.join(__dir__, 'core/**/*.c')).map { |fn| + objfile(fn.relative_path_from(__dir__).pathmap("#{spec.build_dir}/%X")) + } +end diff --git a/examples/mrbgems/ruby_extension_example/README.md b/examples/mrbgems/ruby_extension_example/README.md index 906a0d8f2e..9f383d673e 100644 --- a/examples/mrbgems/ruby_extension_example/README.md +++ b/examples/mrbgems/ruby_extension_example/README.md @@ -1,4 +1,3 @@ -Pure Ruby Extension Example -========= +# Pure Ruby Extension Example This is an example gem which implements a pure Ruby extension. diff --git a/examples/mrbgems/ruby_extension_example/mrbgem.rake b/examples/mrbgems/ruby_extension_example/mrbgem.rake index 6e5a5b729e..57cf1a69e1 100644 --- a/examples/mrbgems/ruby_extension_example/mrbgem.rake +++ b/examples/mrbgems/ruby_extension_example/mrbgem.rake @@ -11,8 +11,6 @@ MRuby::Gem::Specification.new('ruby_extension_example') do |spec| # Add libraries # spec.linker.libraries << 'external_lib' - spec.add_dependency('mruby-print', :core => 'mruby-print') - # Default build files # spec.rbfiles = Dir.glob("#{dir}/mrblib/*.rb") # spec.objs = Dir.glob("#{dir}/src/*.{c,cpp,m,asm,S}").map { |f| objfile(f.relative_path_from(dir).pathmap("#{build_dir}/%X")) } diff --git a/include/mrbconf.h b/include/mrbconf.h index 0f8aba9f7c..8938e6287a 100644 --- a/include/mrbconf.h +++ b/include/mrbconf.h @@ -18,7 +18,7 @@ #endif #if defined(MRB_32BIT) && defined(MRB_64BIT) -#error Cannot build for 32 and 64 bit architecture at the same time +#error Cannot build for 32 and 64-bit architecture at the same time #endif /* configuration options: */ @@ -42,21 +42,6 @@ #error Cannot define MRB_USE_FLOAT32 and MRB_NO_FLOAT at the same time #endif -/* add -DMRB_NO_METHOD_CACHE to disable method cache to save memory */ -//#define MRB_NO_METHOD_CACHE -/* size of the method cache (need to be the power of 2) */ -//#define MRB_METHOD_CACHE_SIZE (1<<8) - -/* add -DMRB_USE_METHOD_T_STRUCT on machines that use higher bits of function pointers */ -/* no MRB_USE_METHOD_T_STRUCT requires highest 2 bits of function pointers to be zero */ -#ifndef MRB_USE_METHOD_T_STRUCT - // can't use highest 2 bits of function pointers at least on 32bit - // Windows and 32bit Linux. -# ifdef MRB_32BIT -# define MRB_USE_METHOD_T_STRUCT -# endif -#endif - /* define on big endian machines; used by MRB_NAN_BOXING, etc. */ #ifndef MRB_ENDIAN_BIG # if (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN) || \ @@ -79,28 +64,41 @@ # define MRB_WORD_BOXING #endif -/* if defined mruby allocates Float objects in the heap to keep full precision if needed */ -//#define MRB_WORDBOX_NO_FLOAT_TRUNCATE +/* if defined mruby does not inline float values in word boxing; + all floats are heap-allocated as RFloat objects */ +//#define MRB_WORDBOX_NO_INLINE_FLOAT + +/* obsolete configuration */ +#if defined(MRB_WORDBOX_NO_FLOAT_TRUNCATE) +# define MRB_WORDBOX_NO_INLINE_FLOAT +#endif -/* add -DMRB_INT32 to use 32bit integer for mrb_int; conflict with MRB_INT64; +/* add -DMRB_INT32 to use 32-bit integer for mrb_int; conflict with MRB_INT64; Default for 32-bit CPU mode. */ //#define MRB_INT32 -/* add -DMRB_INT64 to use 64bit integer for mrb_int; conflict with MRB_INT32; +/* add -DMRB_INT64 to use 64-bit integer for mrb_int; conflict with MRB_INT32; Default for 64-bit CPU mode (unless using MRB_NAN_BOXING). */ //#define MRB_INT64 /* if no specific integer type is chosen */ #if !defined(MRB_INT32) && !defined(MRB_INT64) # if defined(MRB_64BIT) && !defined(MRB_NAN_BOXING) -/* Use 64bit integers on 64bit architecture (without MRB_NAN_BOXING) */ +/* Use 64-bit integers on 64-bit architecture (without MRB_NAN_BOXING) */ # define MRB_INT64 # else -/* Otherwise use 32bit integers */ +/* Otherwise use 32-bit integers */ # define MRB_INT32 # endif #endif +/* MRB_INT64 on 32-bit with word/NaN boxing causes alignment issues + for heap-allocated RInteger (int64_t needs 8-byte alignment but + GC heap slots may not guarantee it); use MRB_NO_BOXING instead */ +#if defined(MRB_INT64) && defined(MRB_32BIT) && !defined(MRB_NO_BOXING) +#error "MRB_INT64 on 32-bit requires MRB_NO_BOXING" +#endif + /* call malloc_trim(0) from mrb_full_gc() */ //#define MRB_USE_MALLOC_TRIM @@ -140,8 +138,8 @@ /* turn off generational GC by default */ //#define MRB_GC_TURN_OFF_GENERATIONAL -/* default size of khash table bucket */ -//#define KHASH_DEFAULT_SIZE 32 +/* initial size of khash table bucket */ +//#define KHASH_INITIAL_SIZE 32 /* allocated memory address alignment */ //#define POOL_ALIGNMENT 4 @@ -168,15 +166,25 @@ //#define MRB_USE_DEBUG_HOOK /* hooks for debugger */ //#define MRB_USE_ALL_SYMBOLS /* Symbol.all_symbols */ -/* obsolete configurations */ -#ifdef MRB_METHOD_T_STRUCT -# define MRB_USE_METHOD_T_STRUCT +/* Symbol table configuration */ +/* Threshold for switching from linear search to hash table */ +#ifndef MRB_SYMBOL_LINEAR_THRESHOLD +#define MRB_SYMBOL_LINEAR_THRESHOLD 256 #endif + +/* Maximum number of dynamic symbols (created at runtime via to_sym etc.) + Presyms, inline symbols, and mrb_intern_static symbols are excluded. + Set to 0 to disable the limit. */ +#ifndef MRB_SYMBOL_MAX +#define MRB_SYMBOL_MAX 4096 +#endif + +/* obsolete configurations */ #if defined(DISABLE_STDIO) || defined(MRB_DISABLE_STDIO) # define MRB_NO_STDIO #endif -#ifdef MRB_DISABLE_DIRECT_THREADING -# define MRB_NO_DIRECT_THREADING +#if defined(MRB_DISABLE_DIRECT_THREADING) || defined(MRB_NO_DIRECT_THREADING) +# define MRB_USE_VM_SWITCH_DISPATCH #endif #if defined(ENABLE_DEBUG) || defined(MRB_ENABLE_DEBUG_HOOK) # define MRB_USE_DEBUG_HOOK @@ -207,8 +215,8 @@ # define MRB_NO_METHOD_CACHE # endif -# ifndef KHASH_DEFAULT_SIZE -# define KHASH_DEFAULT_SIZE 16 +# ifndef KHASH_INITIAL_SIZE +# define KHASH_INITIAL_SIZE 16 # endif # ifndef MRB_HEAP_PAGE_SIZE diff --git a/include/mruby.h b/include/mruby.h index 6427164775..4a51b09730 100644 --- a/include/mruby.h +++ b/include/mruby.h @@ -1,7 +1,7 @@ /* ** mruby - An embeddable Ruby implementation ** -** Copyright (c) mruby developers 2010-2021 +** Copyright (c) mruby developers 2010- ** ** Permission is hereby granted, free of charge, to any person obtaining ** a copy of this software and associated documentation files (the @@ -113,6 +113,8 @@ #include "mrbconf.h" +typedef struct mrb_state mrb_state; + #include #include #include @@ -139,7 +141,7 @@ #endif /** - * MRuby C API entry point + * mruby C API entry point */ MRB_BEGIN_DECL @@ -155,18 +157,6 @@ typedef uint8_t mrb_code; typedef uint32_t mrb_aspec; typedef struct mrb_irep mrb_irep; -struct mrb_state; - -/** - * Function pointer type of custom allocator used in @see mrb_open_allocf. - * - * The function pointing it must behave similarly as realloc except: - * - If ptr is NULL it must allocate new space. - * - If s is NULL, ptr must be freed. - * - * See @see mrb_default_allocf for the default implementation. - */ -typedef void* (*mrb_allocf) (struct mrb_state *mrb, void*, size_t, void *ud); #ifndef MRB_FIXED_STATE_ATEXIT_STACK_SIZE #define MRB_FIXED_STATE_ATEXIT_STACK_SIZE 5 @@ -176,6 +166,8 @@ typedef struct { uint8_t n:4; /* (15=*) c=n|nk<<4 */ uint8_t nk:4; /* (15=*) */ uint8_t cci; /* called from C function */ + uint8_t vis; /* 5(ZERO):1(separate module):2(method visibility) */ + /* under 3-bit flags are copied to env, and after that, env takes precedence */ mrb_sym mid; const struct RProc *proc; struct RProc *blk; @@ -184,6 +176,7 @@ typedef struct { union { struct REnv *env; struct RClass *target_class; + const void *keep_context; /* if NULL, it means that the fiber has switched; for internal use */ } u; } mrb_callinfo; @@ -196,6 +189,10 @@ enum mrb_fiber_state { MRB_FIBER_TERMINATED, }; +/* Task context status aliases */ +#define MRB_TASK_CREATED MRB_FIBER_CREATED +#define MRB_TASK_STOPPED MRB_FIBER_TERMINATED + struct mrb_context { struct mrb_context *prev; @@ -227,19 +224,16 @@ mrb_static_assert_powerof2(MRB_METHOD_CACHE_SIZE); * @param self The self object * @return [mrb_value] The function's return value */ -typedef mrb_value (*mrb_func_t)(struct mrb_state *mrb, mrb_value self); +typedef mrb_value (*mrb_func_t)(mrb_state *mrb, mrb_value self); -#ifndef MRB_USE_METHOD_T_STRUCT -typedef uintptr_t mrb_method_t; -#else typedef struct { - uint8_t flags; + uint32_t flags; /* method flags (no symbol packed) */ + union { - struct RProc *proc; + const struct RProc *proc; mrb_func_t func; - }; + } as; } mrb_method_t; -#endif #ifndef MRB_NO_METHOD_CACHE struct mrb_cache_entry { @@ -249,16 +243,45 @@ struct mrb_cache_entry { }; #endif +#ifdef MRB_CONST_CACHE_SIZE +# undef MRB_NO_CONST_CACHE +mrb_static_assert_powerof2(MRB_CONST_CACHE_SIZE); +#else +/* default constant cache size: 64 */ +/* cache size needs to be power of 2 */ +# define MRB_CONST_CACHE_SIZE (1<<6) +#endif + +#ifndef MRB_NO_CONST_CACHE +struct mrb_const_cache_entry { + const struct mrb_irep *irep; + mrb_sym sym; + mrb_value value; +}; +#endif + struct mrb_jmpbuf; -typedef void (*mrb_atexit_func)(struct mrb_state*); +typedef void (*mrb_atexit_func)(mrb_state*); + +#ifdef MRB_USE_TASK_SCHEDULER +struct mrb_task; + +typedef struct mrb_task_state { + struct mrb_task *queues[4]; /* Task queues (dormant, ready, waiting, suspended) */ + volatile uint32_t tick; /* Current tick count */ + volatile uint32_t wakeup_tick; /* Next wakeup tick */ + volatile mrb_bool switching; /* Context switch pending flag */ + struct mrb_task *main_task; /* Main task wrapper for root context */ + uint8_t scheduler_lock; /* Lock counter for synchronous execution */ + mrb_bool loop_running; /* Active mrb_task_run loop flag */ + mrb_bool exception_as_result; /* Return unhandled task exceptions as values */ +} mrb_task_state; +#endif -typedef struct mrb_state { +struct mrb_state { struct mrb_jmpbuf *jmp; - mrb_allocf allocf; /* memory allocation function */ - void *allocf_ud; /* auxiliary data of allocf */ - struct mrb_context *c; struct mrb_context *root_c; struct iv_tbl *globals; /* global variable table */ @@ -287,27 +310,34 @@ typedef struct mrb_state { mrb_gc gc; + mrb_bool bootstrapping; + #ifndef MRB_NO_METHOD_CACHE struct mrb_cache_entry cache[MRB_METHOD_CACHE_SIZE]; #endif +#ifndef MRB_NO_CONST_CACHE + struct mrb_const_cache_entry const_cache[MRB_CONST_CACHE_SIZE]; +#endif + mrb_sym symidx; const char **symtbl; - uint8_t *symlink; - uint8_t *symflags; - mrb_sym symhash[256]; + uint8_t *sym_flags; /* per-symbol flags (SYM_FL_*) */ size_t symcapa; + struct mrb_sym_hash_table *symhash; + void *sym_pool; + mrb_sym dynamic_sym_count; /* count of dynamic (GC-candidate) symbols */ #ifndef MRB_USE_ALL_SYMBOLS - char symbuf[8]; /* buffer for small symbol names */ + char symbuf[8]; /* buffer for small symbol names */ #endif #ifdef MRB_USE_DEBUG_HOOK - void (*code_fetch_hook)(struct mrb_state* mrb, const struct mrb_irep *irep, const mrb_code *pc, mrb_value *regs); - void (*debug_op_hook)(struct mrb_state* mrb, const struct mrb_irep *irep, const mrb_code *pc, mrb_value *regs); + void (*code_fetch_hook)(mrb_state* mrb, const struct mrb_irep *irep, const mrb_code *pc, mrb_value *regs); + void (*debug_op_hook)(mrb_state* mrb, const struct mrb_irep *irep, const mrb_code *pc, mrb_value *regs); #endif #ifdef MRB_BYTECODE_DECODE_OPTION - mrb_code (*bytecode_decoder)(struct mrb_state* mrb, mrb_code code); + mrb_code (*bytecode_decoder)(mrb_state* mrb, mrb_code code); #endif struct RClass *eException_class; @@ -318,6 +348,10 @@ typedef struct mrb_state { struct RObject *arena_err; /* pre-allocated arena overflow error */ #endif + struct mrb_mt_rom_list *rom_mt; /* heap-allocated ROM wrappers (freed at close) */ + + struct mrb_iv_shape *root_shape; /* root of IV shape tree */ + void *ud; /* auxiliary data */ #ifdef MRB_FIXED_STATE_ATEXIT_STACK @@ -326,7 +360,11 @@ typedef struct mrb_state { mrb_atexit_func *atexit_stack; #endif uint16_t atexit_stack_len; -} mrb_state; + +#ifdef MRB_USE_TASK_SCHEDULER + mrb_task_state task; /* Task scheduler state */ +#endif +}; /** * Defines a new class. @@ -362,7 +400,18 @@ MRB_API struct RClass *mrb_define_class_id(mrb_state *mrb, mrb_sym name, struct MRB_API struct RClass *mrb_define_module(mrb_state *mrb, const char *name); MRB_API struct RClass *mrb_define_module_id(mrb_state *mrb, mrb_sym name); +/** + * Returns the singleton class of an object. + * + * Raises a `TypeError` exception for immediate values. + */ MRB_API mrb_value mrb_singleton_class(mrb_state *mrb, mrb_value val); + +/** + * Returns the singleton class of an object. + * + * Returns `NULL` for immediate values, + */ MRB_API struct RClass *mrb_singleton_class_ptr(mrb_state *mrb, mrb_value val); /** @@ -392,7 +441,7 @@ MRB_API void mrb_include_module(mrb_state *mrb, struct RClass *cla, struct RClas MRB_API void mrb_prepend_module(mrb_state *mrb, struct RClass *cla, struct RClass *prepended); /** - * Defines a global function in ruby. + * Defines a global function in Ruby. * * If you're creating a gem it may look something like this * @@ -409,14 +458,17 @@ MRB_API void mrb_prepend_module(mrb_state *mrb, struct RClass *cla, struct RClas * mrb_define_method(mrb, mrb->kernel_module, "example_method", example_method, MRB_ARGS_NONE()); * } * - * @param mrb The MRuby state reference. + * @param mrb The mruby state reference. * @param cla The class pointer where the method will be defined. * @param name The name of the method being defined. * @param func The function pointer to the method definition. * @param aspec The method parameters declaration. */ + MRB_API void mrb_define_method(mrb_state *mrb, struct RClass *cla, const char *name, mrb_func_t func, mrb_aspec aspec); MRB_API void mrb_define_method_id(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_func_t func, mrb_aspec aspec); +MRB_API void mrb_define_private_method(mrb_state *mrb, struct RClass *cla, const char *name, mrb_func_t func, mrb_aspec aspec); +MRB_API void mrb_define_private_method_id(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_func_t func, mrb_aspec aspec); /** * Defines a class method. @@ -437,7 +489,7 @@ MRB_API void mrb_define_method_id(mrb_state *mrb, struct RClass *c, mrb_sym mid, * foo = mrb_define_class(mrb, "Foo", mrb->object_class); * mrb_define_class_method(mrb, foo, "bar", bar_method, MRB_ARGS_NONE()); * } - * @param mrb The MRuby state reference. + * @param mrb The mruby state reference. * @param cla The class where the class method will be defined. * @param name The name of the class method being defined. * @param fun The function pointer to the class method definition. @@ -473,7 +525,7 @@ MRB_API void mrb_define_singleton_method_id(mrb_state *mrb, struct RObject *cla, * foo = mrb_define_module(mrb, "Foo"); * mrb_define_module_function(mrb, foo, "bar", bar_method, MRB_ARGS_NONE()); * } - * @param mrb The MRuby state reference. + * @param mrb The mruby state reference. * @param cla The module where the module function will be defined. * @param name The name of the module function being defined. * @param fun The function pointer to the module function definition. @@ -503,7 +555,7 @@ MRB_API void mrb_define_module_function_id(mrb_state *mrb, struct RClass *cla, m * mrb_value * mrb_example_gem_final(mrb_state* mrb){ * } - * @param mrb The MRuby state reference. + * @param mrb The mruby state reference. * @param cla A class or module the constant is defined in. * @param name The name of the constant being defined. * @param val The value for the constant. @@ -787,6 +839,8 @@ MRB_API struct RClass * mrb_module_get_under_id(mrb_state *mrb, struct RClass *o MRB_API void mrb_notimplement(mrb_state*); /* a function to be replacement of unimplemented method */ MRB_API mrb_value mrb_notimplement_m(mrb_state*, mrb_value); +/* just return it self */ +MRB_API mrb_value mrb_obj_itself(mrb_state*, mrb_value); /** * Duplicate an object. @@ -897,6 +951,11 @@ MRB_API struct RClass* mrb_define_module_under_id(mrb_state *mrb, struct RClass */ #define MRB_ARGS_BLOCK() ((mrb_aspec)1) +/** + * Function does not accept a block (&nil) + */ +#define MRB_ARGS_NOBLOCK() ((mrb_aspec)(1 << 23)) + /** * Function accepts any number of arguments */ @@ -931,7 +990,7 @@ MRB_API struct RClass* mrb_define_module_under_id(mrb_state *mrb, struct RClass * | `I` | inline struct | void *, struct RClass | `I!` gives `NULL` for `nil` | * | `&` | block | {mrb_value} | &! raises exception if no block given. | * | `*` | rest arguments | const {mrb_value} *, {mrb_int} | Receive the rest of arguments as an array; `*!` avoid copy of the stack. | - * | \| | optional | | After this spec following specs would be optional. | + * | `\|` | optional | | After this spec following specs would be optional. | * | `?` | optional given | {mrb_bool} | `TRUE` if preceding argument is given. Used to check optional argument is given. | * | `:` | keyword args | {mrb_kwargs} const | Get keyword arguments. @see mrb_kwargs | * @@ -965,8 +1024,8 @@ typedef const char *mrb_args_format; * * // def method(a: 1, b: 2) * - * uint32_t kw_num = 2; - * uint32_t kw_required = 0; + * mrb_int kw_num = 2; + * mrb_int kw_required = 0; * mrb_sym kw_names[] = { mrb_intern_lit(mrb, "a"), mrb_intern_lit(mrb, "b") }; * mrb_value kw_values[kw_num]; * mrb_kwargs kwargs = { kw_num, kw_required, kw_names, kw_values, NULL }; @@ -981,7 +1040,7 @@ typedef const char *mrb_args_format; * mrb_value str, kw_rest; * uint32_t kw_num = 3; * uint32_t kw_required = 1; - * // Note that `#include ` is required beforehand because `MRB_SYM()` is used. + * // `MRB_SYM()` is available via `mruby.h` (which includes `mruby/presym.h`). * // If the usage of `MRB_SYM()` is not desired, replace it with `mrb_intern_lit()`. * mrb_sym kw_names[] = { MRB_SYM(x), MRB_SYM(y), MRB_SYM(z) }; * mrb_value kw_values[kw_num]; @@ -996,8 +1055,8 @@ typedef struct mrb_kwargs mrb_kwargs; struct mrb_kwargs { - uint32_t num; /* number of keyword arguments */ - uint32_t required; /* number of required keyword arguments */ + mrb_int num; /* number of keyword arguments */ + mrb_int required; /* number of required keyword arguments */ const mrb_sym *table; /* C array of symbols for keyword names */ mrb_value *values; /* keyword argument values */ mrb_value *rest; /* keyword rest (dict) */ @@ -1006,7 +1065,7 @@ struct mrb_kwargs /** * Retrieve arguments from mrb_state. * - * @param mrb The current MRuby state. + * @param mrb The current mruby state. * @param format is a list of format specifiers * @param ... The passing variadic arguments must be a pointer of retrieving type. * @return the number of arguments retrieved. @@ -1064,13 +1123,13 @@ MRB_API mrb_bool mrb_block_given_p(mrb_state *mrb); #define mrb_strlen_lit(lit) (sizeof(lit "") - 1) /** - * Call existing ruby functions. + * Call existing Ruby functions. * * Example: * * #include * #include - * #include "mruby/compile.h" + * #include * * int * main() @@ -1097,11 +1156,11 @@ MRB_API mrb_bool mrb_block_given_p(mrb_state *mrb); MRB_API mrb_value mrb_funcall(mrb_state *mrb, mrb_value val, const char *name, mrb_int argc, ...); MRB_API mrb_value mrb_funcall_id(mrb_state *mrb, mrb_value val, mrb_sym mid, mrb_int argc, ...); /** - * Call existing ruby functions. This is basically the type safe version of mrb_funcall. + * Call existing Ruby functions. This is basically the type safe version of mrb_funcall. * * #include * #include - * #include "mruby/compile.h" + * #include * int * main() * { @@ -1112,7 +1171,7 @@ MRB_API mrb_value mrb_funcall_id(mrb_state *mrb, mrb_value val, mrb_sym mid, mrb * * FILE *fp = fopen("test.rb","r"); * mrb_value obj = mrb_load_file(mrb,fp); - * mrb_funcall_argv(mrb, obj, MRB_SYM(method_name), 1, &obj); // Calling ruby function from test.rb. + * mrb_funcall_argv(mrb, obj, MRB_SYM(method_name), 1, &obj); // Calling Ruby function from test.rb. * fclose(fp); * mrb_close(mrb); * } @@ -1125,13 +1184,29 @@ MRB_API mrb_value mrb_funcall_id(mrb_state *mrb, mrb_value val, mrb_sym mid, mrb * @see mrb_funcall */ MRB_API mrb_value mrb_funcall_argv(mrb_state *mrb, mrb_value val, mrb_sym name, mrb_int argc, const mrb_value *argv); +/* + * Convenience wrappers for `mrb_funcall_argv` with a fixed argument count. + * Avoids the 16-slot fixed argv buffer used by the variadic `mrb_funcall_id`. + */ +MRB_INLINE mrb_value +mrb_funcall_argv1(mrb_state *mrb, mrb_value val, mrb_sym name, mrb_value a1) +{ + return mrb_funcall_argv(mrb, val, name, 1, &a1); +} +MRB_INLINE mrb_value +mrb_funcall_argv2(mrb_state *mrb, mrb_value val, mrb_sym name, mrb_value a1, mrb_value a2) +{ + const mrb_value argv[] = { a1, a2 }; + return mrb_funcall_argv(mrb, val, name, 2, argv); +} /** - * Call existing ruby functions with a block. + * Call existing Ruby functions with a block. */ MRB_API mrb_value mrb_funcall_with_block(mrb_state *mrb, mrb_value val, mrb_sym name, mrb_int argc, const mrb_value *argv, mrb_value block); /** - * Create a symbol from C string. But usually it's better to use MRB_SYM, - * MRB_OPSYM, MRB_CVSYM, MRB_IVSYM, MRB_SYM_B, MRB_SYM_Q, MRB_SYM_E macros. + * Create a symbol from C string. But usually it's better to + * use MRB_SYM, MRB_OPSYM, MRB_CVSYM, MRB_IVSYM, MRB_GVSYM, + * MRB_SYM_B, MRB_SYM_Q, MRB_SYM_E macros. * * Example: * @@ -1224,49 +1299,100 @@ MRB_API char* mrb_locale_from_utf8(const char *p, int len); MRB_API mrb_state* mrb_open(void); /** - * Create new mrb_state with custom allocators. + * Create new mrb_state with just the mruby core * * @param f * Reference to the allocation function. + * Use mrb_basic_alloc_func for the default * @param ud * User data will be passed to custom allocator f. * If user data isn't required just pass NULL. * @return * Pointer to the newly created mrb_state. */ -MRB_API mrb_state* mrb_open_allocf(mrb_allocf f, void *ud); +MRB_API mrb_state* mrb_open_core(void); /** - * Create new mrb_state with just the MRuby core + * Closes and frees a mrb_state. * - * @param f - * Reference to the allocation function. - * Use mrb_default_allocf for the default - * @param ud - * User data will be passed to custom allocator f. - * If user data isn't required just pass NULL. + * @param mrb + * Pointer to the mrb_state to be closed. + */ +MRB_API void mrb_close(mrb_state *mrb); +#ifndef MRB_NO_METHOD_CACHE +MRB_API void mrb_method_cache_clear(mrb_state *mrb); +#else +#define mrb_method_cache_clear(mrb) ((void)0) +#endif +#ifndef MRB_NO_CONST_CACHE +MRB_API void mrb_const_cache_clear(mrb_state *mrb); +#else +#define mrb_const_cache_clear(mrb) ((void)0) +#endif + +/** + * Check if mrb_open() failed + * + * @param mrb + * Pointer returned from mrb_open() or mrb_open_core(). * @return - * Pointer to the newly created mrb_state. + * Non-zero if initialization failed, 0 if succeeded. + * @note + * mrb_open() may return non-NULL even on failure (with mrb->exc set). + * Use this macro to check for failure: + * @code + * mrb_state *mrb = mrb_open(); + * if (MRB_OPEN_FAILURE(mrb)) { + * if (mrb) { + * // Inspect mrb->exc for error details + * mrb_close(mrb); + * } + * return EXIT_FAILURE; + * } + * @endcode */ -MRB_API mrb_state* mrb_open_core(mrb_allocf f, void *ud); +#define MRB_OPEN_FAILURE(mrb) (!(mrb) || (mrb)->exc) /** - * Closes and frees a mrb_state. + * Check if mrb_open() succeeded * * @param mrb - * Pointer to the mrb_state to be closed. + * Pointer returned from mrb_open() or mrb_open_core(). + * @return + * Non-zero if initialization succeeded, 0 if failed. */ -MRB_API void mrb_close(mrb_state *mrb); +#define MRB_OPEN_SUCCESS(mrb) (!MRB_OPEN_FAILURE(mrb)) /** - * The default allocation function. + * The memory allocation function. You can redefine this function for your own allocator. * - * @see mrb_allocf */ -MRB_API void* mrb_default_allocf(mrb_state*, void*, size_t, void*); +MRB_API void* mrb_basic_alloc_func(void*, size_t); MRB_API mrb_value mrb_top_self(mrb_state *mrb); + +/** + * Enter the mruby VM and execute the proc. + * + * @param mrb + * The current mruby state. + * @param proc + * An object containing `irep`. + * If supplied an object containing anything other than `irep`, it will probably crash. + * @param self + * `self` on the execution context of `proc`. + * @param stack_keep + * Specifies the number of values to hold from the stack top. + * Values on the stack outside this range will be initialized to `nil`. + * + * @note + * When called from a C function defined as a method, the current stack is destroyed. + * If you want to use arguments obtained by `mrb_get_args()` or other methods after `mrb_top_run()`, + * you must protect them by `mrb_gc_protect()` or other ways before this function. + * Or consider using `mrb_yield()` family functions. + */ MRB_API mrb_value mrb_top_run(mrb_state *mrb, const struct RProc *proc, mrb_value self, mrb_int stack_keep); + MRB_API mrb_value mrb_vm_run(mrb_state *mrb, const struct RProc *proc, mrb_value self, mrb_int stack_keep); MRB_API mrb_value mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *iseq); /* compatibility macros */ @@ -1292,26 +1418,36 @@ MRB_API mrb_bool mrb_eql(mrb_state *mrb, mrb_value obj1, mrb_value obj2); /* mrb_cmp(mrb, obj1, obj2): 1:0:-1; -2 for error */ MRB_API mrb_int mrb_cmp(mrb_state *mrb, mrb_value obj1, mrb_value obj2); -MRB_INLINE int -mrb_gc_arena_save(mrb_state *mrb) -{ - return mrb->gc.arena_idx; -} +/* recursion detection */ +MRB_API mrb_bool mrb_recursive_method_p(mrb_state *mrb, mrb_sym mid, mrb_value obj1, mrb_value obj2); +MRB_API mrb_bool mrb_recursive_func_p(mrb_state *mrb, mrb_sym mid, mrb_value obj1, mrb_value obj2); -MRB_INLINE void -mrb_gc_arena_restore(mrb_state *mrb, int idx) -{ - mrb->gc.arena_idx = idx; -} +#define MRB_RECURSIVE_P(mrb, mid, obj1, obj2) \ + mrb_recursive_method_p(mrb, mid, obj1, obj2) + +#define MRB_RECURSIVE_UNARY_P(mrb, mid, obj) \ + mrb_recursive_method_p(mrb, mid, obj, mrb_nil_value()) + +#define MRB_RECURSIVE_BINARY_P(mrb, mid, obj1, obj2) \ + mrb_recursive_method_p(mrb, mid, obj1, obj2) + +#define MRB_RECURSIVE_FUNC_P(mrb, mid, obj) \ + mrb_recursive_func_p(mrb, mid, obj, mrb_nil_value()) + +#define MRB_RECURSIVE_BINARY_FUNC_P(mrb, mid, obj1, obj2) \ + mrb_recursive_func_p(mrb, mid, obj1, obj2) + +#define mrb_gc_arena_save(mrb) ((mrb)->gc.arena_idx) +#define mrb_gc_arena_restore(mrb, idx) ((mrb)->gc.arena_idx = (idx)) MRB_API void mrb_garbage_collect(mrb_state*); MRB_API void mrb_full_gc(mrb_state*); -MRB_API void mrb_incremental_gc(mrb_state *); +MRB_API void mrb_incremental_gc(mrb_state*); MRB_API void mrb_gc_mark(mrb_state*,struct RBasic*); #define mrb_gc_mark_value(mrb,val) do {\ if (!mrb_immediate_p(val)) mrb_gc_mark((mrb), mrb_basic_ptr(val)); \ } while (0) -MRB_API void mrb_field_write_barrier(mrb_state *, struct RBasic*, struct RBasic*); +MRB_API void mrb_field_write_barrier(mrb_state*, struct RBasic*, struct RBasic*); #define mrb_field_write_barrier_value(mrb, obj, val) do{\ if (!mrb_immediate_p(val)) mrb_field_write_barrier((mrb), (obj), mrb_basic_ptr(val)); \ } while (0) @@ -1355,7 +1491,7 @@ MRB_API mrb_noreturn void mrb_name_error(mrb_state *mrb, mrb_sym id, const char MRB_API mrb_noreturn void mrb_frozen_error(mrb_state *mrb, void *frozen_obj); MRB_API mrb_noreturn void mrb_argnum_error(mrb_state *mrb, mrb_int argc, int min, int max); MRB_API void mrb_warn(mrb_state *mrb, const char *fmt, ...); -MRB_API mrb_noreturn void mrb_bug(mrb_state *mrb, const char *fmt, ...); +MRB_API mrb_noreturn void mrb_bug(mrb_state *mrb, const char *mesg); MRB_API void mrb_print_backtrace(mrb_state *mrb); MRB_API void mrb_print_error(mrb_state *mrb); /* function for `raisef` formatting */ @@ -1367,6 +1503,8 @@ MRB_API mrb_value mrb_vformat(mrb_state *mrb, const char *format, va_list ap); + exception objects obtained from those macros are local to mrb */ #define MRB_ERROR_SYM(sym) mrb_intern_lit(mrb, #sym) +#define E_EXCEPTION mrb->eException_class +#define E_STANDARD_ERROR mrb->eStandardError_class #define E_RUNTIME_ERROR mrb_exc_get_id(mrb, MRB_ERROR_SYM(RuntimeError)) #define E_TYPE_ERROR mrb_exc_get_id(mrb, MRB_ERROR_SYM(TypeError)) #define E_ZERODIV_ERROR mrb_exc_get_id(mrb, MRB_ERROR_SYM(ZeroDivisionError)) @@ -1425,12 +1563,8 @@ MRB_API mrb_value mrb_ensure_int_type(mrb_state *mrb, mrb_value val); /* string type checking (contrary to the name, it doesn't convert) */ MRB_API void mrb_check_type(mrb_state *mrb, mrb_value x, enum mrb_vtype t); - -MRB_INLINE void mrb_check_frozen(mrb_state *mrb, void *o) -{ - if (mrb_frozen_p((struct RBasic*)o)) mrb_frozen_error(mrb, o); -} - +MRB_API void mrb_check_frozen(mrb_state *mrb, void *); +MRB_API void mrb_check_frozen_value(mrb_state *mrb, mrb_value v); MRB_API void mrb_define_alias(mrb_state *mrb, struct RClass *c, const char *a, const char *b); MRB_API void mrb_define_alias_id(mrb_state *mrb, struct RClass *c, mrb_sym a, mrb_sym b); MRB_API const char *mrb_class_name(mrb_state *mrb, struct RClass* klass); @@ -1439,16 +1573,25 @@ MRB_API void mrb_define_global_const(mrb_state *mrb, const char *name, mrb_value MRB_API mrb_value mrb_attr_get(mrb_state *mrb, mrb_value obj, mrb_sym id); MRB_API mrb_bool mrb_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym mid); -MRB_API mrb_bool mrb_obj_is_instance_of(mrb_state *mrb, mrb_value obj, struct RClass* c); +MRB_API mrb_bool mrb_obj_is_instance_of(mrb_state *mrb, mrb_value obj, const struct RClass* c); MRB_API mrb_bool mrb_func_basic_p(mrb_state *mrb, mrb_value obj, mrb_sym mid, mrb_func_t func); /* obsolete function(s); will be removed */ #define mrb_int(mrb, val) mrb_as_int(mrb, val) +/** + * Create a new Fiber from proc object + * + * Implemented in mruby-fiber + */ +MRB_API mrb_value mrb_fiber_new(mrb_state *mrb, const struct RProc *proc); + /** * Resume a Fiber * * Implemented in mruby-fiber + * + * Switches to the specified fiber and executes. Like the `Fiber#resume` method. */ MRB_API mrb_value mrb_fiber_resume(mrb_state *mrb, mrb_value fib, mrb_int argc, const mrb_value *argv); @@ -1456,6 +1599,15 @@ MRB_API mrb_value mrb_fiber_resume(mrb_state *mrb, mrb_value fib, mrb_int argc, * Yield a Fiber * * Implemented in mruby-fiber + * + * Passes control to the caller fiber of the running fiber. Like the `Fiber.yield` method. + * + * @note This function is only available from inside a function defined as a method by, + * for example, `mrb_define_method()`. + * Also, the work following `mrb_fiber_yield()` cannot be performed, + * and the return value of `mrb_fiber_yield()` must be returned as is. + * + * return mrb_fiber_yield(mrb, argc, argv); */ MRB_API mrb_value mrb_fiber_yield(mrb_state *mrb, mrb_int argc, const mrb_value *argv); @@ -1474,15 +1626,9 @@ MRB_API mrb_value mrb_fiber_alive_p(mrb_state *mrb, mrb_value fib); #define E_FIBER_ERROR mrb_exc_get_id(mrb, MRB_ERROR_SYM(FiberError)) MRB_API void mrb_stack_extend(mrb_state*, mrb_int); -/* memory pool implementation */ -typedef struct mrb_pool mrb_pool; -MRB_API struct mrb_pool* mrb_pool_open(mrb_state*); -MRB_API void mrb_pool_close(struct mrb_pool*); -MRB_API void* mrb_pool_alloc(struct mrb_pool*, size_t); -MRB_API void* mrb_pool_realloc(struct mrb_pool*, void*, size_t oldlen, size_t newlen); -MRB_API mrb_bool mrb_pool_can_realloc(struct mrb_pool*, void*, size_t); /* temporary memory allocation, only effective while GC arena is kept */ -MRB_API void* mrb_alloca(mrb_state *mrb, size_t); +MRB_API void* mrb_temp_alloc(mrb_state *mrb, size_t); +#define mrb_alloca(mrb, size) mrb_temp_alloc(mrb, size) /* for compatibility */ MRB_API void mrb_state_atexit(mrb_state *mrb, mrb_atexit_func func); @@ -1493,6 +1639,8 @@ MRB_API mrb_value mrb_format(mrb_state *mrb, const char *format, ...); #ifdef MRB_PRESYM_SCANNING # include +#else +# include #endif #if 0 @@ -1522,6 +1670,12 @@ mrbmemset(void *s, int c, size_t n) #define memset(a,b,c) mrbmemset(a,b,c) #endif +#define mrb_int_hash_func(mrb,key) (uint32_t)((key)^((key)<<2)^((key)>>2)) + +#define MRB_UNIQNAME(name) MRB_UNIQNAME_1(name, __LINE__) +#define MRB_UNIQNAME_1(name, line) MRB_UNIQNAME_2(name, line) +#define MRB_UNIQNAME_2(name, line) name##line + MRB_END_DECL #endif /* MRUBY_H */ diff --git a/include/mruby/array.h b/include/mruby/array.h index df5a16a91a..6e63db557a 100644 --- a/include/mruby/array.h +++ b/include/mruby/array.h @@ -20,7 +20,17 @@ typedef struct mrb_shared_array { mrb_value *ptr; } mrb_shared_array; -#if defined(MRB_32BIT) && defined(MRB_NO_BOXING) && !defined(MRB_USE_FLOAT32) && !defined(MRB_ARY_NO_EMBED) +/* On 32-bit platforms whose ABI gives 8-byte members 8-byte alignment + (ARM, MIPS, xtensa, ...), an embedded mrb_value array forces 8-byte + alignment of the inner union, padding the heap-form layout and + inflating struct size past the 5-word RVALUE limit. Disable embedding + whenever mrb_value contains an 8-byte aligned member: nan-boxing + (uint64_t), or no-boxing with int64_t/double inside the union. */ +#if defined(MRB_32BIT) && \ + (defined(MRB_NAN_BOXING) || \ + (defined(MRB_NO_BOXING) && \ + (!defined(MRB_USE_FLOAT32) || defined(MRB_INT64)))) && \ + !defined(MRB_ARY_NO_EMBED) # define MRB_ARY_NO_EMBED #endif @@ -57,7 +67,7 @@ struct RArray { #define ARY_UNSET_EMBED_FLAG(a) (void)0 #define ARY_EMBED_LEN(a) 0 #define ARY_SET_EMBED_LEN(a,len) (void)0 -#define ARY_EMBED_PTR(a) 0 +#define ARY_EMBED_PTR(a) ((mrb_value*)NULL) #else #define MRB_ARY_EMBED_MASK 7 #define ARY_EMBED_P(a) ((a)->flags & MRB_ARY_EMBED_MASK) @@ -71,6 +81,7 @@ struct RArray { #define ARY_PTR(a) (ARY_EMBED_P(a)?ARY_EMBED_PTR(a):(a)->as.heap.ptr) #define RARRAY_LEN(a) ARY_LEN(RARRAY(a)) #define RARRAY_PTR(a) ARY_PTR(RARRAY(a)) +#define RARRAY_GETMEM(a, ptr, len) ARY_GETMEM(RARRAY(a), ptr, len) #define ARY_SET_LEN(a,n) do {\ if (ARY_EMBED_P(a)) {\ mrb_assert((n) <= MRB_ARY_EMBED_LEN_MAX); \ @@ -84,8 +95,21 @@ struct RArray { #define ARY_SHARED_P(a) ((a)->flags & MRB_ARY_SHARED) #define ARY_SET_SHARED_FLAG(a) ((a)->flags |= MRB_ARY_SHARED) #define ARY_UNSET_SHARED_FLAG(a) ((a)->flags &= ~MRB_ARY_SHARED) +#define ARY_GETMEM(a, ptr, len) do { \ + struct RArray *MRB_UNIQNAME(_a_) = (a); \ + if (ARY_EMBED_P(MRB_UNIQNAME(_a_))) { \ + (len) = ARY_EMBED_LEN(MRB_UNIQNAME(_a_)); \ + (ptr) = ARY_EMBED_PTR(MRB_UNIQNAME(_a_)); \ + } \ + else { \ + (len) = MRB_UNIQNAME(_a_)->as.heap.len; \ + (ptr) = MRB_UNIQNAME(_a_)->as.heap.ptr; \ + } \ +} while (0) MRB_API void mrb_ary_modify(mrb_state*, struct RArray*); +MRB_API mrb_value mrb_ary_dup(mrb_state*, mrb_value ary); +MRB_API mrb_value mrb_ary_make_shared_copy(mrb_state*, mrb_value ary); MRB_API mrb_value mrb_ary_new_capa(mrb_state*, mrb_int); /* diff --git a/include/mruby/boxing_nan.h b/include/mruby/boxing_nan.h index f412568dda..6fcbcf9447 100644 --- a/include/mruby/boxing_nan.h +++ b/include/mruby/boxing_nan.h @@ -98,6 +98,17 @@ mrb_type(mrb_value o) } } +MRB_INLINE enum mrb_vtype +mrb_unboxed_type(mrb_value o) +{ + if (!mrb_float_p(o) && mrb_nb_tt(o) == MRB_NANBOX_TT_OBJECT && o.u != 0) { + return ((struct RBasic*)(uintptr_t)o.u)->tt; + } + else { + return MRB_TT_FALSE; + } +} + #define NANBOX_SET_MISC_VALUE(r,t,i) NANBOX_SET_VALUE(r, MRB_NANBOX_TT_MISC, ((uint64_t)(t)<<32) | (i)) #define mrb_float(o) mrb_nan_boxing_value_float(o) @@ -140,7 +151,7 @@ mrb_nan_boxing_value_int(mrb_value v) #define SET_TRUE_VALUE(r) NANBOX_SET_MISC_VALUE(r, MRB_TT_TRUE, 1) #define SET_BOOL_VALUE(r,b) NANBOX_SET_MISC_VALUE(r, (b) ? MRB_TT_TRUE : MRB_TT_FALSE, 1) #ifdef MRB_INT64 -MRB_API mrb_value mrb_boxing_int_value(struct mrb_state*, mrb_int); +MRB_API mrb_value mrb_boxing_int_value(mrb_state*, mrb_int); #define SET_INT_VALUE(mrb, r, n) ((r) = mrb_boxing_int_value(mrb, n)) #else #define SET_INT_VALUE(mrb, r, n) SET_FIXNUM_VALUE(r, n) diff --git a/include/mruby/boxing_no.h b/include/mruby/boxing_no.h index 84908a0d81..efb108b12a 100644 --- a/include/mruby/boxing_no.h +++ b/include/mruby/boxing_no.h @@ -26,33 +26,34 @@ typedef struct mrb_value { enum mrb_vtype tt; } mrb_value; -#define mrb_ptr(o) (o).value.p -#define mrb_cptr(o) mrb_ptr(o) +#define mrb_ptr(o) (o).value.p +#define mrb_cptr(o) mrb_ptr(o) #ifndef MRB_NO_FLOAT -#define mrb_float(o) (o).value.f +#define mrb_float(o) (o).value.f #endif -#define mrb_fixnum(o) (o).value.i -#define mrb_integer(o) mrb_fixnum(o) -#define mrb_symbol(o) (o).value.sym -#define mrb_type(o) (o).tt +#define mrb_fixnum(o) (o).value.i +#define mrb_integer(o) mrb_fixnum(o) +#define mrb_symbol(o) (o).value.sym +#define mrb_type(o) (o).tt +#define mrb_unboxed_type(o) (o).tt -#define BOXNIX_SET_VALUE(o, ttt, attr, v) do {\ +#define BOXNO_SET_VALUE(o, ttt, attr, v) do {\ (o).tt = ttt;\ (o).attr = v;\ } while (0) -#define SET_NIL_VALUE(r) BOXNIX_SET_VALUE(r, MRB_TT_FALSE, value.i, 0) -#define SET_FALSE_VALUE(r) BOXNIX_SET_VALUE(r, MRB_TT_FALSE, value.i, 1) -#define SET_TRUE_VALUE(r) BOXNIX_SET_VALUE(r, MRB_TT_TRUE, value.i, 1) -#define SET_BOOL_VALUE(r,b) BOXNIX_SET_VALUE(r, b ? MRB_TT_TRUE : MRB_TT_FALSE, value.i, 1) -#define SET_INT_VALUE(mrb,r,n) BOXNIX_SET_VALUE(r, MRB_TT_INTEGER, value.i, (n)) -#define SET_FIXNUM_VALUE(r,n) BOXNIX_SET_VALUE(r, MRB_TT_INTEGER, value.i, (n)) +#define SET_NIL_VALUE(r) BOXNO_SET_VALUE(r, MRB_TT_FALSE, value.i, 0) +#define SET_FALSE_VALUE(r) BOXNO_SET_VALUE(r, MRB_TT_FALSE, value.i, 1) +#define SET_TRUE_VALUE(r) BOXNO_SET_VALUE(r, MRB_TT_TRUE, value.i, 1) +#define SET_BOOL_VALUE(r,b) BOXNO_SET_VALUE(r, b ? MRB_TT_TRUE : MRB_TT_FALSE, value.i, 1) +#define SET_INT_VALUE(mrb,r,n) BOXNO_SET_VALUE(r, MRB_TT_INTEGER, value.i, (n)) +#define SET_FIXNUM_VALUE(r,n) BOXNO_SET_VALUE(r, MRB_TT_INTEGER, value.i, (n)) #ifndef MRB_NO_FLOAT -#define SET_FLOAT_VALUE(mrb,r,v) BOXNIX_SET_VALUE(r, MRB_TT_FLOAT, value.f, (v)) +#define SET_FLOAT_VALUE(mrb,r,v) BOXNO_SET_VALUE(r, MRB_TT_FLOAT, value.f, (v)) #endif -#define SET_SYM_VALUE(r,v) BOXNIX_SET_VALUE(r, MRB_TT_SYMBOL, value.sym, (v)) -#define SET_OBJ_VALUE(r,v) BOXNIX_SET_VALUE(r, (((struct RObject*)(v))->tt), value.p, (v)) -#define SET_CPTR_VALUE(mrb,r,v) BOXNIX_SET_VALUE(r, MRB_TT_CPTR, value.p, v) -#define SET_UNDEF_VALUE(r) BOXNIX_SET_VALUE(r, MRB_TT_UNDEF, value.i, 0) +#define SET_SYM_VALUE(r,v) BOXNO_SET_VALUE(r, MRB_TT_SYMBOL, value.sym, (v)) +#define SET_OBJ_VALUE(r,v) BOXNO_SET_VALUE(r, (((struct RObject*)(v))->tt), value.p, (v)) +#define SET_CPTR_VALUE(mrb,r,v) BOXNO_SET_VALUE(r, MRB_TT_CPTR, value.p, v) +#define SET_UNDEF_VALUE(r) BOXNO_SET_VALUE(r, MRB_TT_UNDEF, value.i, 0) #endif /* MRUBY_BOXING_NO_H */ diff --git a/include/mruby/boxing_word.h b/include/mruby/boxing_word.h index 3dff13e1ae..b9bf1a13d4 100644 --- a/include/mruby/boxing_word.h +++ b/include/mruby/boxing_word.h @@ -7,15 +7,44 @@ #ifndef MRUBY_BOXING_WORD_H #define MRUBY_BOXING_WORD_H -#if defined(MRB_32BIT) && !defined(MRB_USE_FLOAT32) && !defined(MRB_WORDBOX_NO_FLOAT_TRUNCATE) -# define MRB_WORDBOX_NO_FLOAT_TRUNCATE +#if defined(MRB_32BIT) && !defined(MRB_USE_FLOAT32) && !defined(MRB_WORDBOX_NO_INLINE_FLOAT) +# define MRB_WORDBOX_NO_INLINE_FLOAT #endif -#if !defined(MRB_NO_FLOAT) && defined(MRB_WORDBOX_NO_FLOAT_TRUNCATE) +#ifndef MRB_NO_FLOAT struct RFloat { MRB_OBJECT_HEADER; +#ifdef MRB_WORDBOX_NO_INLINE_FLOAT + /* avoid 8-byte alignment on 32-bit; use memcpy-based accessors */ + char f[sizeof(mrb_float)]; +#else mrb_float f; +#endif }; + +#include + +static inline mrb_float +mrb_rfloat_value(const struct RFloat *p) +{ +#ifdef MRB_WORDBOX_NO_INLINE_FLOAT + mrb_float f; + memcpy(&f, p->f, sizeof(mrb_float)); + return f; +#else + return p->f; +#endif +} + +static inline void +mrb_rfloat_set(struct RFloat *p, mrb_float f) +{ +#ifdef MRB_WORDBOX_NO_INLINE_FLOAT + memcpy(p->f, &f, sizeof(mrb_float)); +#else + p->f = f; +#endif +} #endif struct RInteger { @@ -50,8 +79,9 @@ enum mrb_special_consts { #define WORDBOX_FIXNUM_FLAG (1 << (WORDBOX_FIXNUM_BIT_POS - 1)) #define WORDBOX_FIXNUM_MASK ((1 << WORDBOX_FIXNUM_BIT_POS) - 1) -#if defined(MRB_WORDBOX_NO_FLOAT_TRUNCATE) +#if defined(MRB_WORDBOX_NO_INLINE_FLOAT) || defined(MRB_NO_FLOAT) /* floats are allocated in heaps */ +#define WORDBOX_IMMEDIATE_MASK 0x03 #define WORDBOX_SYMBOL_BIT_POS 2 #define WORDBOX_SYMBOL_SHIFT WORDBOX_SYMBOL_BIT_POS #define WORDBOX_SYMBOL_FLAG (1 << (WORDBOX_SYMBOL_BIT_POS - 1)) @@ -68,7 +98,9 @@ enum mrb_special_consts { #define WORDBOX_SYMBOL_MASK 0x1f #endif +#ifndef WORDBOX_IMMEDIATE_MASK #define WORDBOX_IMMEDIATE_MASK 0x07 +#endif #define WORDBOX_SET_SHIFT_VALUE(o,n,v) \ ((o).w = (((uintptr_t)(v)) << WORDBOX_##n##_SHIFT) | WORDBOX_##n##_FLAG) @@ -80,34 +112,29 @@ enum mrb_special_consts { /* * mrb_value representation: * - * 64bit word with inline float: + * 64-bit word with inline float (rotation encoding, lossless): * nil : ...0000 0000 (all bits are 0) * false : ...0000 0100 (mrb_fixnum(v) != 0) * true : ...0000 1100 * undef : ...0001 0100 * symbol: ...0001 1100 (use only upper 32-bit as symbol value with MRB_64BIT) * fixnum: ...IIII III1 - * float : ...FFFF FF10 (51 bit significands; require MRB_64BIT) + * float : ...FFFF FF10 (rotl64(float64-ADDEND, 3); exponent [-255,+256]) * object: ...PPPP P000 + * (floats outside inline range are heap-allocated as RFloat) * - * 32bit word with inline float: - * nil : ...0000 0000 (all bits are 0) - * false : ...0000 0100 (mrb_fixnum(v) != 0) - * true : ...0000 1100 - * undef : ...0001 0100 - * symbol: ...SSS1 0100 (symbol occupies 20bits) - * fixnum: ...IIII III1 - * float : ...FFFF FF10 (22 bit significands; require MRB_64BIT) - * object: ...PPPP P000 + * 64-bit word with inline float32 (MRB_USE_FLOAT32): + * float : ...FFFF FF10 (float32 shifted left by 2) + * (other values same as above) * - * and word boxing without inline float (MRB_WORDBOX_NO_FLOAT_TRUNCATE): + * word boxing without inline float (MRB_WORDBOX_NO_INLINE_FLOAT): * nil : ...0000 0000 (all bits are 0) * false : ...0000 0100 (mrb_fixnum(v) != 0) * true : ...0000 1100 * undef : ...0001 0100 * fixnum: ...IIII III1 * symbol: ...SSSS SS10 - * object: ...PPPP P000 (any bits are 1) + * object: ...PPPP PP00 (any bits are 1) */ typedef struct mrb_value { uintptr_t w; @@ -117,10 +144,9 @@ union mrb_value_ { void *p; struct RBasic *bp; #ifndef MRB_NO_FLOAT -#ifndef MRB_WORDBOX_NO_FLOAT_TRUNCATE - mrb_float f; -#else struct RFloat *fp; +#if !defined(MRB_WORDBOX_NO_INLINE_FLOAT) && defined(MRB_USE_FLOAT32) + mrb_float f; #endif #endif struct RInteger *ip; @@ -139,22 +165,26 @@ mrb_val_union(mrb_value v) return x; } -MRB_API mrb_value mrb_word_boxing_cptr_value(struct mrb_state*, void*); +MRB_API mrb_value mrb_word_boxing_cptr_value(mrb_state*, void*); #ifndef MRB_NO_FLOAT -MRB_API mrb_value mrb_word_boxing_float_value(struct mrb_state*, mrb_float); +MRB_API mrb_value mrb_word_boxing_float_value(mrb_state*, mrb_float); #endif -MRB_API mrb_value mrb_boxing_int_value(struct mrb_state*, mrb_int); +MRB_API mrb_value mrb_boxing_int_value(mrb_state*, mrb_int); +#if WORDBOX_IMMEDIATE_MASK == 0x3 +#define mrb_immediate_p(o) ((o).w & WORDBOX_IMMEDIATE_MASK || (o).w <= MRB_Qundef) +#else #define mrb_immediate_p(o) ((o).w & WORDBOX_IMMEDIATE_MASK || (o).w == MRB_Qnil) +#endif #define mrb_ptr(o) mrb_val_union(o).p #define mrb_cptr(o) mrb_val_union(o).vp->p #ifndef MRB_NO_FLOAT -#ifndef MRB_WORDBOX_NO_FLOAT_TRUNCATE +#ifndef MRB_WORDBOX_NO_INLINE_FLOAT MRB_API mrb_float mrb_word_boxing_value_float(mrb_value v); #define mrb_float(o) mrb_word_boxing_value_float(o) #else -#define mrb_float(o) mrb_val_union(o).fp->f +#define mrb_float(o) mrb_rfloat_value(mrb_val_union(o).fp) #endif #endif #define mrb_fixnum(o) (mrb_int)(((intptr_t)(o).w) >> WORDBOX_FIXNUM_SHIFT) @@ -175,11 +205,16 @@ mrb_integer_func(mrb_value o) { #define mrb_false_p(o) ((o).w == MRB_Qfalse) #define mrb_true_p(o) ((o).w == MRB_Qtrue) #ifndef MRB_NO_FLOAT -#ifndef MRB_WORDBOX_NO_FLOAT_TRUNCATE +#ifdef MRB_WORDBOX_NO_INLINE_FLOAT +#define mrb_float_p(o) WORDBOX_OBJ_TYPE_P(o, FLOAT) +#elif defined(MRB_USE_FLOAT32) && defined(MRB_64BIT) #define mrb_float_p(o) WORDBOX_SHIFT_VALUE_P(o, FLOAT) #else -#define mrb_float_p(o) WORDBOX_OBJ_TYPE_P(o, FLOAT) +/* rotation encoding: most floats inline, edge cases on heap */ +#define mrb_float_p(o) (WORDBOX_SHIFT_VALUE_P(o, FLOAT) || WORDBOX_OBJ_TYPE_P(o, FLOAT)) #endif +#else +#define mrb_float_p(o) FALSE #endif #define mrb_array_p(o) WORDBOX_OBJ_TYPE_P(o, ARRAY) #define mrb_string_p(o) WORDBOX_OBJ_TYPE_P(o, STRING) @@ -222,10 +257,23 @@ mrb_type(mrb_value o) mrb_fixnum_p(o) ? MRB_TT_INTEGER : mrb_symbol_p(o) ? MRB_TT_SYMBOL : mrb_undef_p(o) ? MRB_TT_UNDEF : -#ifndef MRB_NO_FLOAT mrb_float_p(o) ? MRB_TT_FLOAT : -#endif mrb_val_union(o).bp->tt; } +MRB_INLINE enum mrb_vtype +mrb_unboxed_type(mrb_value o) +{ + if (mrb_nil_p(o)) { + return MRB_TT_FALSE; + } + else if ((o.w & WORDBOX_IMMEDIATE_MASK) == 0) { + return mrb_val_union(o).bp->tt; + } + else { + return MRB_TT_FALSE; + } +} + + #endif /* MRUBY_BOXING_WORD_H */ diff --git a/include/mruby/class.h b/include/mruby/class.h index ff16f1ac11..33c0b90e7f 100644 --- a/include/mruby/class.h +++ b/include/mruby/class.h @@ -17,7 +17,7 @@ MRB_BEGIN_DECL struct RClass { MRB_OBJECT_HEADER; struct iv_tbl *iv; - struct mt_tbl *mt; + struct mrb_mt_tbl *mt; struct RClass *super; }; @@ -26,6 +26,10 @@ struct RClass { MRB_INLINE struct RClass* mrb_class(mrb_state *mrb, mrb_value v) { + if (!mrb_immediate_p(v)) { + return mrb_obj_ptr(v)->c; + } + switch (mrb_type(v)) { case MRB_TT_FALSE: if (mrb_fixnum(v)) @@ -43,20 +47,19 @@ mrb_class(mrb_state *mrb, mrb_value v) #endif case MRB_TT_CPTR: return mrb->object_class; - case MRB_TT_ENV: - return NULL; default: - return mrb_obj_ptr(v)->c; + return NULL; } } /* flags: - 20: frozen - 19: is_prepended - 18: is_origin - 17: is_inherited (used by method cache) - 16: unused - 0-15: instance type + 20: frozen + 19: is_prepended + 18: is_origin + 17: is_inherited (used by method cache) + 7-16: unused + 6: prohibit Class#allocate + 0-5: instance type */ #define MRB_FL_CLASS_IS_PREPENDED (1 << 19) #define MRB_FL_CLASS_IS_ORIGIN (1 << 18) @@ -69,9 +72,13 @@ mrb_class(mrb_state *mrb, mrb_value v) }\ } while (0) #define MRB_FL_CLASS_IS_INHERITED (1 << 17) -#define MRB_INSTANCE_TT_MASK (0xFF) +#define MRB_INSTANCE_TT_MASK (0x1F) #define MRB_SET_INSTANCE_TT(c, tt) ((c)->flags = (((c)->flags & ~MRB_INSTANCE_TT_MASK) | (char)(tt))) #define MRB_INSTANCE_TT(c) (enum mrb_vtype)((c)->flags & MRB_INSTANCE_TT_MASK) +#define MRB_FL_UNDEF_ALLOCATE (1 << 6) +#define MRB_UNDEF_ALLOCATOR(c) (mrb_assert((c)->tt == MRB_TT_CLASS), (c)->flags |= MRB_FL_UNDEF_ALLOCATE) +#define MRB_UNDEF_ALLOCATOR_P(c) ((c)->flags & MRB_FL_UNDEF_ALLOCATE) +#define MRB_DEFINE_ALLOCATOR(c) ((c)->flags &= ~MRB_FL_UNDEF_ALLOCATE) MRB_API void mrb_define_method_raw(mrb_state*, struct RClass*, mrb_sym, mrb_method_t); MRB_API void mrb_alias_method(mrb_state*, struct RClass *c, mrb_sym a, mrb_sym b); @@ -81,6 +88,7 @@ MRB_API mrb_method_t mrb_method_search_vm(mrb_state*, struct RClass**, mrb_sym); MRB_API mrb_method_t mrb_method_search(mrb_state*, struct RClass*, mrb_sym); MRB_API struct RClass* mrb_class_real(struct RClass* cl); +MRB_API struct RClass* mrb_class_outer(mrb_state *mrb, struct RClass *c); #ifndef MRB_NO_METHOD_CACHE void mrb_mc_clear_by_class(mrb_state *mrb, struct RClass* c); @@ -92,6 +100,61 @@ void mrb_mc_clear_by_class(mrb_state *mrb, struct RClass* c); typedef int (mrb_mt_foreach_func)(mrb_state*,mrb_sym,mrb_method_t,void*); MRB_API void mrb_mt_foreach(mrb_state*, struct RClass*, mrb_mt_foreach_func*, void*); +/* ROM method table types for static method registration. + * NOTE: `func` is kept as the first union member so that positional + * aggregate initialization in MRB_MT_ENTRY works without C99 + * designated initializers (required for legacy C++ compilers). */ +union mrb_mt_ptr { + mrb_func_t func; + const struct RProc *proc; +}; + +/* entry combining function pointer, symbol key, and flags */ +typedef struct mrb_mt_entry { + union mrb_mt_ptr val; + mrb_sym key; /* pure symbol ID (no flags packed) */ + uint32_t flags; /* method flags + aspec */ +} mrb_mt_entry; + +typedef struct mrb_mt_tbl { + int size; + int alloc; /* bit 30: MRB_MT_READONLY_BIT, bit 29: MRB_MT_FROZEN_BIT */ + mrb_mt_entry *ptr; + struct mrb_mt_tbl *next; +} mrb_mt_tbl; + +#define MRB_MT_READONLY_BIT (1 << 30) +#define MRB_MT_FROZEN_BIT (1 << 29) +#define MRB_MT_FUNC (1 << 24) /* MRB_METHOD_FUNC_FL */ +#define MRB_MT_PUBLIC 0 +#define MRB_MT_PRIVATE (1 << 25) /* MRB_METHOD_PRIVATE_FL */ + +/* ROM table entry: 3rd param is MRB_ARGS_*() optionally OR'd with MRB_MT_PRIVATE. */ +#define MRB_MT_ENTRY(fn, sym, flags) \ + { { (fn) }, (sym), (flags) | MRB_MT_FUNC } +#define MRB_MT_ASPEC(flags) ((mrb_aspec)((flags) & 0xffffff)) + +/* "removed" tombstone: MRB_MT_FUNC flag set with NULL function pointer. + This combination never occurs naturally (C functions are never NULL). + Unlike undef (proc=NULL without MRB_MT_FUNC), a removed marker makes + mt_get() return 0 ("not found"), blocking ROM chain walk while + allowing superclass lookup. */ +#define MRB_MT_REMOVED_P(e) (((e).flags&MRB_MT_FUNC) && (e).val.func==NULL) + +/* Singly-linked list node for tracking heap-allocated ROM wrappers. */ +struct mrb_mt_rom_list { + mrb_mt_tbl *tbl; + struct mrb_mt_rom_list *next; +}; + +/* Allocate a per-state ROM layer wrapping the const entries array, + and push it onto the class's method table chain. */ +void mrb_mt_init_rom(mrb_state *mrb, struct RClass *c, + const mrb_mt_entry *entries, int size); +#define MRB_MT_INIT_ROM(mrb, cls, entries) \ + mrb_mt_init_rom(mrb, cls, entries, \ + (int)(sizeof(entries)/sizeof(entries[0]))) + MRB_END_DECL #endif /* MRUBY_CLASS_H */ diff --git a/include/mruby/common.h b/include/mruby/common.h index 59214d3c1e..9faf4a0c72 100644 --- a/include/mruby/common.h +++ b/include/mruby/common.h @@ -59,6 +59,35 @@ MRB_BEGIN_DECL # define mrb_deprecated #endif +/** Branch prediction hints for optimization. */ +#if defined(__GNUC__) || defined(__clang__) +# define mrb_likely(x) __builtin_expect(!!(x), 1) +# define mrb_unlikely(x) __builtin_expect(!!(x), 0) +#else +# define mrb_likely(x) (x) +# define mrb_unlikely(x) (x) +#endif + +/** Declare a type or object as an alignment requirement. */ +#ifndef mrb_alignas +# if defined(__cplusplus) && __cplusplus >= 201103L +# // https://en.cppreference.com/w/cpp/language/alignas +# define mrb_alignas(n) alignas(n) +# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# // https://en.cppreference.com/w/c/language/_Alignas +# define mrb_alignas(n) _Alignas(n) +# elif defined(_MSC_VER) || defined(__INTEL_COMPILER) +# // https://learn.microsoft.com/en-us/cpp/cpp/align-cpp?view=msvc-170 +# define mrb_alignas(n) __declspec(align(n)) +# elif defined(__GNUC__) || defined(__clang__) +# // https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-aligned-type-attribute +# define mrb_alignas(n) __attribute__((aligned(n))) +# else +# // `mrb_alignas` defined as dummy. If necessary, send issues to https://github.com/mruby/mruby . +# define mrb_alignas(n) +# endif +#endif + /** Declare a function as always inlined. */ #if defined _MSC_VER && _MSC_VER < 1900 # ifndef __cplusplus @@ -67,7 +96,7 @@ MRB_BEGIN_DECL #endif #define MRB_INLINE static inline -/** Declare a public MRuby API function. */ +/** Declare a public mruby API function. */ #ifndef MRB_API #if defined(MRB_BUILD_AS_DLL) #if defined(MRB_CORE) || defined(MRB_LIB) diff --git a/include/mruby/compile.h b/include/mruby/compile.h index 70cf9a93b0..9abc50f579 100644 --- a/include/mruby/compile.h +++ b/include/mruby/compile.h @@ -8,9 +8,10 @@ #define MRUBY_COMPILE_H #include "common.h" +#include "mruby/mempool.h" /** - * MRuby Compiler + * mruby Compiler */ MRB_BEGIN_DECL @@ -18,7 +19,7 @@ MRB_BEGIN_DECL struct mrb_parser_state; /* load context */ -typedef struct mrbc_context { +typedef struct mrb_ccontext { mrb_sym *syms; int slen; char *filename; @@ -32,22 +33,28 @@ typedef struct mrbc_context { mrb_bool keep_lv:1; mrb_bool no_optimize:1; mrb_bool no_ext_ops:1; + mrb_bool no_return_value:1; const struct RProc *upper; size_t parser_nerr; -} mrbc_context; - -MRB_API mrbc_context* mrbc_context_new(mrb_state *mrb); -MRB_API void mrbc_context_free(mrb_state *mrb, mrbc_context *cxt); -MRB_API const char *mrbc_filename(mrb_state *mrb, mrbc_context *c, const char *s); -MRB_API void mrbc_partial_hook(mrb_state *mrb, mrbc_context *c, int (*partial_hook)(struct mrb_parser_state*), void*data); -MRB_API void mrbc_cleanup_local_variables(mrb_state *mrb, mrbc_context *c); +} mrb_ccontext; /* compiler context */ + +MRB_API mrb_ccontext* mrb_ccontext_new(mrb_state *mrb); +MRB_API void mrb_ccontext_free(mrb_state *mrb, mrb_ccontext *cxt); +MRB_API const char *mrb_ccontext_filename(mrb_state *mrb, mrb_ccontext *c, const char *s); +MRB_API void mrb_ccontext_partial_hook(mrb_ccontext *c, int (*partial_hook)(struct mrb_parser_state*), void*data); +MRB_API void mrb_ccontext_cleanup_local_variables(mrb_ccontext *c); + +/* compatibility macros */ +#define mrbc_context mrb_ccontext +#define mrbc_context_new mrb_ccontext_new +#define mrbc_context_free mrb_ccontext_free +#define mrbc_filename mrb_ccontext_filename +#define mrbc_partial_hook mrb_ccontext_partial_hook +#define mrbc_cleanup_local_variables mrb_ccontext_cleanup_local_variables /* AST node structure */ -typedef struct mrb_ast_node { - struct mrb_ast_node *car, *cdr; - uint16_t lineno, filename_index; -} mrb_ast_node; +typedef struct mrb_ast_node mrb_ast_node; /* lexer states */ enum mrb_lex_state_enum { @@ -72,56 +79,20 @@ struct mrb_parser_message { char* message; }; -#define STR_FUNC_PARSING 0x01 -#define STR_FUNC_EXPAND 0x02 -#define STR_FUNC_REGEXP 0x04 -#define STR_FUNC_WORD 0x08 -#define STR_FUNC_SYMBOL 0x10 -#define STR_FUNC_ARRAY 0x20 -#define STR_FUNC_HEREDOC 0x40 -#define STR_FUNC_XQUOTE 0x80 - -enum mrb_string_type { - str_not_parsing = (0), - str_squote = (STR_FUNC_PARSING), - str_dquote = (STR_FUNC_PARSING|STR_FUNC_EXPAND), - str_regexp = (STR_FUNC_PARSING|STR_FUNC_REGEXP|STR_FUNC_EXPAND), - str_sword = (STR_FUNC_PARSING|STR_FUNC_WORD|STR_FUNC_ARRAY), - str_dword = (STR_FUNC_PARSING|STR_FUNC_WORD|STR_FUNC_ARRAY|STR_FUNC_EXPAND), - str_ssym = (STR_FUNC_PARSING|STR_FUNC_SYMBOL), - str_ssymbols = (STR_FUNC_PARSING|STR_FUNC_SYMBOL|STR_FUNC_ARRAY), - str_dsymbols = (STR_FUNC_PARSING|STR_FUNC_SYMBOL|STR_FUNC_ARRAY|STR_FUNC_EXPAND), - str_heredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC), - str_xquote = (STR_FUNC_PARSING|STR_FUNC_XQUOTE|STR_FUNC_EXPAND), -}; - -/* heredoc structure */ -struct mrb_parser_heredoc_info { - mrb_bool allow_indent:1; - mrb_bool remove_indent:1; - mrb_bool line_head:1; - size_t indent; - mrb_ast_node *indented; - enum mrb_string_type type; - const char *term; - int term_len; - mrb_ast_node *doc; -}; - #define MRB_PARSER_TOKBUF_MAX (UINT16_MAX-1) #define MRB_PARSER_TOKBUF_SIZE 256 /* parser structure */ struct mrb_parser_state { mrb_state *mrb; - struct mrb_pool *pool; + mempool *pool; mrb_ast_node *cells; const char *s, *send; #ifndef MRB_NO_STDIO /* If both f and s are non-null, it will be taken preferentially from s until s < send. */ FILE *f; #endif - mrbc_context *cxt; + mrb_ccontext *cxt; mrb_sym filename_sym; uint16_t lineno; int column; @@ -133,7 +104,7 @@ struct mrb_parser_state { unsigned int cmdarg_stack; int paren_nest; int lpar_beg; - int in_def, in_single; + int in_def, in_single, in_kwarg; mrb_bool cmd_start:1; mrb_ast_node *locals; @@ -155,6 +126,7 @@ struct mrb_parser_state { mrb_bool no_optimize:1; mrb_bool capture_errors:1; mrb_bool no_ext_ops:1; + mrb_bool no_return_value:1; const struct RProc *upper; struct mrb_parser_message error_buffer[10]; struct mrb_parser_message warn_buffer[10]; @@ -162,44 +134,53 @@ struct mrb_parser_state { mrb_sym* filename_table; uint16_t filename_table_length; uint16_t current_filename_index; + uint16_t prev_file_lineno; /* saved lineno before partial_hook file switch */ + /* Variable-sized node management */ mrb_ast_node *nvars; }; MRB_API struct mrb_parser_state* mrb_parser_new(mrb_state*); MRB_API void mrb_parser_free(struct mrb_parser_state*); -MRB_API void mrb_parser_parse(struct mrb_parser_state*,mrbc_context*); +MRB_API void mrb_parser_parse(struct mrb_parser_state*,mrb_ccontext*); MRB_API void mrb_parser_set_filename(struct mrb_parser_state*, char const*); MRB_API mrb_sym mrb_parser_get_filename(struct mrb_parser_state*, uint16_t idx); /* utility functions */ #ifndef MRB_NO_STDIO -MRB_API struct mrb_parser_state* mrb_parse_file(mrb_state*,FILE*,mrbc_context*); +MRB_API struct mrb_parser_state* mrb_parse_file(mrb_state*,FILE*,mrb_ccontext*); #endif -MRB_API struct mrb_parser_state* mrb_parse_string(mrb_state*,const char*,mrbc_context*); -MRB_API struct mrb_parser_state* mrb_parse_nstring(mrb_state*,const char*,size_t,mrbc_context*); +MRB_API struct mrb_parser_state* mrb_parse_string(mrb_state*,const char*,mrb_ccontext*); +MRB_API struct mrb_parser_state* mrb_parse_nstring(mrb_state*,const char*,size_t,mrb_ccontext*); MRB_API struct RProc* mrb_generate_code(mrb_state*, struct mrb_parser_state*); -MRB_API mrb_value mrb_load_exec(mrb_state *mrb, struct mrb_parser_state *p, mrbc_context *c); - -/** program load functions -* Please note! Currently due to interactions with the GC calling these functions will -* leak one RProc object per function call. -* To prevent this save the current memory arena before calling and restore the arena -* right after, like so -* int ai = mrb_gc_arena_save(mrb); -* mrb_value status = mrb_load_string(mrb, buffer); -* mrb_gc_arena_restore(mrb, ai); -*/ +MRB_API mrb_value mrb_load_exec(mrb_state *mrb, struct mrb_parser_state *p, mrb_ccontext *c); + +/** + * program load functions + * + * Please note! Currently due to interactions with the GC calling these functions will + * leak one RProc object per function call. + * To prevent this save the current memory arena before calling and restore the arena + * right after, like so + * + * int ai = mrb_gc_arena_save(mrb); + * mrb_value status = mrb_load_string(mrb, buffer); + * mrb_gc_arena_restore(mrb, ai); + * + * Also, when called from a C function defined as a method, the current stack is destroyed. + * If processing continues after this function, the objects obtained from the arguments + * must be protected as needed before this function. + */ #ifndef MRB_NO_STDIO MRB_API mrb_value mrb_load_file(mrb_state*,FILE*); -MRB_API mrb_value mrb_load_file_cxt(mrb_state*,FILE*, mrbc_context *cxt); -MRB_API mrb_value mrb_load_detect_file_cxt(mrb_state *mrb, FILE *fp, mrbc_context *c); +MRB_API mrb_value mrb_load_file_cxt(mrb_state*,FILE*, mrb_ccontext *cxt); +MRB_API mrb_value mrb_load_detect_file_cxt(mrb_state *mrb, FILE *fp, mrb_ccontext *c); #endif MRB_API mrb_value mrb_load_string(mrb_state *mrb, const char *s); MRB_API mrb_value mrb_load_nstring(mrb_state *mrb, const char *s, size_t len); -MRB_API mrb_value mrb_load_string_cxt(mrb_state *mrb, const char *s, mrbc_context *cxt); -MRB_API mrb_value mrb_load_nstring_cxt(mrb_state *mrb, const char *s, size_t len, mrbc_context *cxt); +MRB_API mrb_value mrb_load_string_cxt(mrb_state *mrb, const char *s, mrb_ccontext *cxt); +MRB_API mrb_value mrb_load_nstring_cxt(mrb_state *mrb, const char *s, size_t len, mrb_ccontext *cxt); /** @} */ MRB_END_DECL diff --git a/include/mruby/data.h b/include/mruby/data.h index 7bdf1c34e8..a64301a57d 100644 --- a/include/mruby/data.h +++ b/include/mruby/data.h @@ -41,12 +41,12 @@ MRB_API struct RData *mrb_data_object_alloc(mrb_state *mrb, struct RClass* klass #define Data_Make_Struct(mrb,klass,strct,type,sval,data_obj) do { \ (data_obj) = Data_Wrap_Struct(mrb,klass,type,NULL);\ - (sval) = (strct *)mrb_malloc(mrb, sizeof(strct)); \ + (sval) = (strct*)mrb_malloc(mrb, sizeof(strct)); \ { static const strct zero = { 0 }; *(sval) = zero; };\ (data_obj)->data = (sval);\ } while (0) -#define RDATA(obj) ((struct RData *)(mrb_ptr(obj))) +#define RDATA(obj) ((struct RData*)(mrb_ptr(obj))) #define DATA_PTR(d) (RDATA(d)->data) #define DATA_TYPE(d) (RDATA(d)->type) MRB_API void mrb_data_check_type(mrb_state *mrb, mrb_value, const mrb_data_type*); diff --git a/include/mruby/debug.h b/include/mruby/debug.h index 4a62cce426..f0409351b0 100644 --- a/include/mruby/debug.h +++ b/include/mruby/debug.h @@ -10,7 +10,7 @@ #include "common.h" /** - * MRuby Debugging. + * mruby Debugging. */ MRB_BEGIN_DECL @@ -46,7 +46,7 @@ typedef struct mrb_irep_debug_info { } mrb_irep_debug_info; /* - * get line from irep's debug info and program counter + * get filename from irep's debug info and program counter * @return returns NULL if not found */ MRB_API const char *mrb_debug_get_filename(mrb_state *mrb, const mrb_irep *irep, uint32_t pc); @@ -57,6 +57,12 @@ MRB_API const char *mrb_debug_get_filename(mrb_state *mrb, const mrb_irep *irep, */ MRB_API int32_t mrb_debug_get_line(mrb_state *mrb, const mrb_irep *irep, uint32_t pc); +/* + * get line and filename from irep's debug info and program counter + * @return returns FALSE if not found + */ +MRB_API mrb_bool mrb_debug_get_position(mrb_state *mrb, const mrb_irep *irep, uint32_t pc, int32_t *lp, const char **fp); + MRB_API mrb_irep_debug_info *mrb_debug_info_alloc(mrb_state *mrb, mrb_irep *irep); MRB_API mrb_irep_debug_info_file *mrb_debug_info_append_file( mrb_state *mrb, mrb_irep_debug_info *info, diff --git a/include/mruby/dump.h b/include/mruby/dump.h index 863f22649e..e3abf8ccb0 100644 --- a/include/mruby/dump.h +++ b/include/mruby/dump.h @@ -19,11 +19,11 @@ MRB_BEGIN_DECL /* flags for mrb_dump_irep{,_binary,_cfunc,_cstruct} */ #define MRB_DUMP_DEBUG_INFO 1 #define MRB_DUMP_STATIC 2 -#define DUMP_DEBUG_INFO MRB_DUMP_DEBUG_INFO /* deprecated */ +#define MRB_DUMP_NO_LVAR 4 #ifndef MRB_NO_STDIO MRB_API mrb_value mrb_load_irep_file(mrb_state*,FILE*); -MRB_API mrb_value mrb_load_irep_file_cxt(mrb_state*, FILE*, mrbc_context*); +MRB_API mrb_value mrb_load_irep_file_cxt(mrb_state*, FILE*, mrb_ccontext*); mrb_irep *mrb_read_irep_file(mrb_state*, FILE*); int mrb_dump_irep_binary(mrb_state*, const mrb_irep*, uint8_t, FILE*); #endif @@ -52,13 +52,13 @@ MRB_API mrb_irep *mrb_read_irep_buf(mrb_state*, const void*, size_t); /* Binary Format Version Major:Minor */ /* Major: Incompatible to prior versions */ /* Minor: Upper-compatible to prior versions */ -#define RITE_BINARY_MAJOR_VER "03" +#define RITE_BINARY_MAJOR_VER "04" #define RITE_BINARY_MINOR_VER "00" #define RITE_BINARY_FORMAT_VER RITE_BINARY_MAJOR_VER RITE_BINARY_MINOR_VER #define RITE_COMPILER_NAME "MATZ" #define RITE_COMPILER_VERSION "0000" -#define RITE_VM_VER "0300" +#define RITE_VM_VER "0400" #define RITE_BINARY_EOF "END\0" #define RITE_SECTION_IREP_IDENT "IREP" diff --git a/include/mruby/endian.h b/include/mruby/endian.h index 477f3bc94e..940c47ea77 100644 --- a/include/mruby/endian.h +++ b/include/mruby/endian.h @@ -34,7 +34,7 @@ static inline int check_little_endian(void) { unsigned int n = 1; - return (*(unsigned char *)&n == 1); + return (*(unsigned char*)&n == 1); } # define littleendian check_little_endian() #endif diff --git a/include/mruby/error.h b/include/mruby/error.h index ccf2cdb6e1..dbe4bf7584 100644 --- a/include/mruby/error.h +++ b/include/mruby/error.h @@ -8,17 +8,18 @@ #define MRUBY_ERROR_H #include "common.h" +#include /** - * MRuby error handling. + * mruby error handling. */ MRB_BEGIN_DECL struct RException { MRB_OBJECT_HEADER; struct iv_tbl *iv; - struct RObject *mesg; // NULL or probably RString - struct RObject *backtrace; // NULL, RArray or RData + struct RBasic *mesg; // NULL or probably RString + struct RBasic *backtrace; // NULL, RArray or RData }; /* error that should terminate execution */ @@ -34,45 +35,84 @@ struct RException { MRB_API mrb_noreturn void mrb_sys_fail(mrb_state *mrb, const char *mesg); MRB_API mrb_value mrb_exc_new_str(mrb_state *mrb, struct RClass* c, mrb_value str); #define mrb_exc_new_lit(mrb, c, lit) mrb_exc_new_str(mrb, c, mrb_str_new_lit(mrb, lit)) -MRB_API mrb_value mrb_make_exception(mrb_state *mrb, mrb_int argc, const mrb_value *argv); MRB_API mrb_noreturn void mrb_no_method_error(mrb_state *mrb, mrb_sym id, mrb_value args, const char *fmt, ...); -/* declaration for `fail` method */ -MRB_API mrb_value mrb_f_raise(mrb_state*, mrb_value); - -#if defined(MRB_64BIT) || defined(MRB_USE_FLOAT32) || defined(MRB_NAN_BOXING) || defined(MRB_WORD_BOXING) +/* On 32-bit platforms whose ABI gives uint64_t/double 8-byte alignment + (ARM, MIPS, PowerPC, xtensa, ...), embedding mrb_value directly in + RBreak forces 8-byte alignment that pushes the struct past the 5-word + RVALUE budget via padding. Store the value bits as a uint32_t array + (4-byte aligned) to dodge that padding. Word-boxing's mrb_value is + just a uintptr_t with no over-alignment, and 64-bit platforms have no + alignment gap to begin with, so neither needs the workaround. */ +#if defined(MRB_64BIT) || defined(MRB_WORD_BOXING) +#undef MRB_USE_RBREAK_VALUE_UNION +#else +#define MRB_USE_RBREAK_VALUE_UNION 1 +#endif + +/* + * flags: + * 0..7: enum mrb_vtype (only when MRB_USE_RBREAK_VALUE_UNION and + * !MRB_NAN_BOXING; nan-boxing encodes the type in the bits) + * 8..10: RBREAK_TAGs in src/vm.c (otherwise, set to 0) + */ struct RBreak { MRB_OBJECT_HEADER; - const struct RProc *proc; + uintptr_t ci_break_index; // The top-level ci index to break. One before the return destination. +#ifndef MRB_USE_RBREAK_VALUE_UNION mrb_value val; +#elif defined(MRB_NAN_BOXING) + /* nan-boxing: mrb_value is a single 64-bit word (type encoded in NaN bits) */ + uint32_t value[sizeof(mrb_value) / sizeof(uint32_t)]; +#else + /* no-boxing: store only the union bits; tt goes in flags */ + uint32_t value[sizeof(union mrb_value_union) / sizeof(uint32_t)]; +#endif }; + +#ifndef MRB_USE_RBREAK_VALUE_UNION #define mrb_break_value_get(brk) ((brk)->val) #define mrb_break_value_set(brk, v) ((brk)->val = v) +#elif defined(MRB_NAN_BOXING) +static inline mrb_value +mrb_break_value_get(struct RBreak *brk) +{ + mrb_value val; + memcpy(&val, brk->value, sizeof(val)); + return val; +} +static inline void +mrb_break_value_set(struct RBreak *brk, mrb_value val) +{ + memcpy(brk->value, &val, sizeof(val)); +} #else -struct RBreak { - MRB_OBJECT_HEADER; - const struct RProc *proc; - union mrb_value_union value; -}; #define RBREAK_VALUE_TT_MASK ((1 << 8) - 1) static inline mrb_value mrb_break_value_get(struct RBreak *brk) { mrb_value val; - val.value = brk->value; + memcpy(&val.value, brk->value, sizeof(val.value)); val.tt = (enum mrb_vtype)(brk->flags & RBREAK_VALUE_TT_MASK); return val; } static inline void mrb_break_value_set(struct RBreak *brk, mrb_value val) { - brk->value = val.value; + memcpy(brk->value, &val.value, sizeof(val.value)); brk->flags &= ~RBREAK_VALUE_TT_MASK; brk->flags |= val.tt; } -#endif /* MRB_64BIT || MRB_USE_FLOAT32 || MRB_NAN_BOXING || MRB_WORD_BOXING */ -#define mrb_break_proc_get(brk) ((brk)->proc) -#define mrb_break_proc_set(brk, p) ((brk)->proc = p) +#endif /* MRB_USE_RBREAK_VALUE_UNION */ + +/** + * Error check + * + */ +/* clear error status in the mrb_state structure */ +MRB_API void mrb_clear_error(mrb_state *mrb); +/* returns TRUE if error in the previous call; internally calls mrb_clear_error() */ +MRB_API mrb_bool mrb_check_error(mrb_state *mrb); /** * Protect @@ -113,6 +153,42 @@ MRB_API mrb_value mrb_rescue_exceptions(mrb_state *mrb, mrb_func_t body, mrb_val mrb_func_t rescue, mrb_value r_data, mrb_int len, struct RClass **classes); +/** + * Calls `func` via `mrb_protect_error()` and then always executes the user block exactly once. + * Even if a global jump (similar to a Ruby exception) occurs within `func`, the block will be executed, + * and after the block's completion, the global jump will be re-thrown. + * + * By checking `mrb->exc != NULL` within the block, you can determine if a global jump occurred in `func`. + * + * If you want to suppress the global jump and continue processing, use `mrb_clear_error(mrb); break;`. + * + * - `mrb`: The mruby state reference + * - `result_var`: Pre-defined mrb_value type variable (to receive `func`'s return value) + * - `func`: Function to call (compatible with `mrb_protect_error_func`) + * - `data`: User data to pass to `func` + * + * Example: + * + * mrb_value result; + * MRB_ENSURE(mrb, result, body_func, userdata) { + * // This block is always executed (equivalent to Ruby's ensure) + * + * if (mrb->exc) { + * // Post-processing when an exception occurs + * } + * + * // To ignore the global jump, use `mrb_clear_error(mrb); break;` here + * } + */ +#define MRB_ENSURE(mrb, result_var, func, data) \ + for (mrb_bool MRB_UNIQNAME(_break_) = FALSE; \ + !MRB_UNIQNAME(_break_) && \ + (((result_var) = mrb_protect_error(mrb, func, data, &MRB_UNIQNAME(_break_))), \ + ((mrb)->exc = (MRB_UNIQNAME(_break_) ? mrb_obj_ptr((result_var)) : NULL)), \ + TRUE); \ + (void)(MRB_UNIQNAME(_break_) && (mrb)->jmp && (mrb_exc_raise(mrb, result_var), TRUE)), \ + MRB_UNIQNAME(_break_) = TRUE) + MRB_END_DECL #endif /* MRUBY_ERROR_H */ diff --git a/include/mruby/gc.h b/include/mruby/gc.h index ba56cefd7a..43eb46d50e 100644 --- a/include/mruby/gc.h +++ b/include/mruby/gc.h @@ -14,78 +14,69 @@ */ MRB_BEGIN_DECL - -struct mrb_state; - #define MRB_EACH_OBJ_OK 0 #define MRB_EACH_OBJ_BREAK 1 -typedef int (mrb_each_object_callback)(struct mrb_state *mrb, struct RBasic *obj, void *data); -void mrb_objspace_each_objects(struct mrb_state *mrb, mrb_each_object_callback *callback, void *data); +typedef int (mrb_each_object_callback)(mrb_state *mrb, struct RBasic *obj, void *data); +void mrb_objspace_each_objects(mrb_state *mrb, mrb_each_object_callback *callback, void *data); size_t mrb_objspace_page_slot_size(void); -MRB_API void mrb_free_context(struct mrb_state *mrb, struct mrb_context *c); +MRB_API void mrb_free_context(mrb_state *mrb, struct mrb_context *c); #ifndef MRB_GC_ARENA_SIZE #define MRB_GC_ARENA_SIZE 100 #endif +#ifndef MRB_GRAY_STACK_SIZE +#define MRB_GRAY_STACK_SIZE 1024 +#endif + typedef enum { MRB_GC_STATE_ROOT = 0, MRB_GC_STATE_MARK, MRB_GC_STATE_SWEEP } mrb_gc_state; -/* Disable MSVC warning "C4200: nonstandard extension used: zero-sized array - * in struct/union" when in C++ mode */ -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4200) -#endif - -typedef struct mrb_heap_page { - struct RBasic *freelist; - struct mrb_heap_page *prev; - struct mrb_heap_page *next; - struct mrb_heap_page *free_next; - struct mrb_heap_page *free_prev; - mrb_bool old:1; - /* Flexible array members area a C99 feature, not C++ compatible */ - /* void* objects[]; */ -} mrb_heap_page; - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - typedef struct mrb_gc { - mrb_heap_page *heaps; /* heaps for GC */ - mrb_heap_page *sweeps; - mrb_heap_page *free_heaps; - size_t live; /* count of live objects */ + struct mrb_heap_page *heaps; /* all heaps pages */ + struct mrb_heap_page *free_heaps;/* heaps for allocation */ + struct mrb_heap_page *sweeps; /* page where sweep starts */ + struct mrb_heap_region *regions; /* contiguous heap regions */ + struct RBasic *gray_stack[MRB_GRAY_STACK_SIZE]; /* stack of gray objects */ + size_t gray_stack_top; /* top index of gray stack */ + mrb_bool gray_overflow:1; /* gray stack overflowed; needs heap rescan */ + size_t live; /* count of live objects */ + size_t live_after_mark; /* old generation objects */ + mrb_int gc_debt; /* <0:credit, >0:needs GC */ + size_t oldgen_threshold; /* threshold to kick major GC */ + mrb_gc_state state; /* current state of gc */ + int interval_ratio; + int step_ratio; + int current_white_part :2; /* make white object by white_part */ + mrb_bool iterating :1; /* currently iterating over objects */ + mrb_bool disabled :1; /* GC disabled */ + mrb_bool generational :1; /* generational GC mode */ + mrb_bool full :1; /* major GC mode */ + mrb_bool out_of_memory :1; /* out-of-memory error occurred */ + size_t step_limit; /* 0=unlimited, >0=absolute step cap */ + size_t malloc_increase; /* malloc bytes since last GC cycle */ + size_t malloc_threshold; /* 0=disabled, >0=bytes to trigger GC */ + #ifdef MRB_GC_FIXED_ARENA struct RBasic *arena[MRB_GC_ARENA_SIZE]; /* GC protection array */ #else struct RBasic **arena; /* GC protection array */ - int arena_capa; + int arena_capa; /* size of protection array */ #endif int arena_idx; - mrb_gc_state state; /* state of gc */ - int current_white_part; /* make white object by white_part */ - struct RBasic *gray_list; /* list of gray objects to be traversed incrementally */ - struct RBasic *atomic_gray_list; /* list of objects to be traversed atomically */ - size_t live_after_mark; - size_t threshold; - int interval_ratio; - int step_ratio; - mrb_bool iterating :1; - mrb_bool disabled :1; - mrb_bool full :1; - mrb_bool generational :1; - mrb_bool out_of_memory :1; - size_t majorgc_old_threshold; +#ifdef MRB_GC_STATS + uint32_t gc_total_count; /* total GC invocations */ + uint32_t minor_gc_count; /* minor GC count */ + uint32_t major_gc_count; /* major GC count */ +#endif } mrb_gc; -MRB_API mrb_bool mrb_object_dead_p(struct mrb_state *mrb, struct RBasic *object); +MRB_API mrb_bool mrb_object_dead_p(mrb_state *mrb, struct RBasic *object); +MRB_API int mrb_gc_add_region(mrb_state *mrb, void *start, size_t size); #define MRB_GC_RED 7 diff --git a/include/mruby/hash.h b/include/mruby/hash.h index e591172ea8..44d870555b 100644 --- a/include/mruby/hash.h +++ b/include/mruby/hash.h @@ -14,7 +14,7 @@ */ MRB_BEGIN_DECL -/* offset of `iv` must be 3 words */ +/* offset of `iv` must match struct RObject */ struct RHash { MRB_OBJECT_HEADER; #ifdef MRB_64BIT diff --git a/include/mruby/internal.h b/include/mruby/internal.h index 30b1edf522..0b5a88af82 100644 --- a/include/mruby/internal.h +++ b/include/mruby/internal.h @@ -22,11 +22,18 @@ mrb_value mrb_class_find_path(mrb_state*, struct RClass*); mrb_value mrb_mod_to_s(mrb_state *, mrb_value); void mrb_method_added(mrb_state *mrb, struct RClass *c, mrb_sym mid); mrb_noreturn void mrb_method_missing(mrb_state *mrb, mrb_sym name, mrb_value self, mrb_value args); +mrb_method_t mrb_vm_find_method(mrb_state *mrb, struct RClass *c, struct RClass **cp, mrb_sym mid); +mrb_value mrb_mod_const_missing(mrb_state *mrb, mrb_value mod); +mrb_value mrb_const_missing(mrb_state *mrb, mrb_value mod, mrb_sym sym); +size_t mrb_class_mt_memsize(mrb_state*, struct RClass*); +mrb_value mrb_obj_extend(mrb_state*, mrb_value obj); #endif +mrb_value mrb_obj_equal_m(mrb_state *mrb, mrb_value); + /* debug */ size_t mrb_packed_int_len(uint32_t num); -size_t mrb_packed_int_encode(uint32_t num, uint8_t *p, uint8_t *pend); +size_t mrb_packed_int_encode(uint32_t num, uint8_t *p); uint32_t mrb_packed_int_decode(const uint8_t *p, const uint8_t **newpos); /* dump */ @@ -38,29 +45,49 @@ int mrb_dump_irep_cstruct(mrb_state *mrb, const mrb_irep*, uint8_t flags, FILE * #endif #endif +/* codedump */ +void mrb_codedump_all(mrb_state *mrb, struct RProc *proc); +#ifndef MRB_NO_STDIO +void mrb_codedump_all_file(mrb_state *mrb, struct RProc *proc, FILE *out); +#endif + /* error */ mrb_value mrb_exc_inspect(mrb_state *mrb, mrb_value exc); mrb_value mrb_exc_backtrace(mrb_state *mrb, mrb_value exc); mrb_value mrb_get_backtrace(mrb_state *mrb); void mrb_exc_mesg_set(mrb_state *mrb, struct RException *exc, mrb_value mesg); mrb_value mrb_exc_mesg_get(mrb_state *mrb, struct RException *exc); +mrb_value mrb_f_raise(mrb_state*, mrb_value); +mrb_value mrb_make_exception(mrb_state *mrb, mrb_value exc, mrb_value mesg); +mrb_value mrb_exc_get_output(mrb_state *mrb, struct RObject *exc); + +struct RBacktrace { + MRB_OBJECT_HEADER; + size_t len; + struct mrb_backtrace_location *locations; +}; + +struct mrb_backtrace_location { + mrb_sym method_id; + int32_t idx; + const mrb_irep *irep; +}; /* gc */ -void mrb_gc_mark_mt(mrb_state*, struct RClass*); -size_t mrb_gc_mark_mt_size(mrb_state*, struct RClass*); +size_t mrb_gc_mark_mt(mrb_state*, struct RClass*); void mrb_gc_free_mt(mrb_state*, struct RClass*); /* hash */ size_t mrb_hash_memsize(mrb_value obj); -void mrb_gc_mark_hash(mrb_state*, struct RHash*); -size_t mrb_gc_mark_hash_size(mrb_state*, struct RHash*); +size_t mrb_gc_mark_hash(mrb_state*, struct RHash*); void mrb_gc_free_hash(mrb_state*, struct RHash*); +mrb_value mrb_hash_first_key(mrb_state*, mrb_value); +uint32_t mrb_obj_hash_code(mrb_state *mrb, mrb_value key); /* irep */ struct mrb_insn_data mrb_decode_insn(const mrb_code *pc); #ifdef MRUBY_IREP_H void mrb_irep_free(mrb_state*, struct mrb_irep*); -void mrb_irep_remove_lv(mrb_state *mrb, mrb_irep *irep); static inline const struct mrb_irep_catch_handler * mrb_irep_catch_handler_table(const struct mrb_irep *irep) @@ -82,6 +109,9 @@ mrb_value mrb_int_sub(mrb_state *mrb, mrb_value x, mrb_value y); mrb_value mrb_int_mul(mrb_state *mrb, mrb_value x, mrb_value y); mrb_noreturn void mrb_int_zerodiv(mrb_state *mrb); mrb_noreturn void mrb_int_overflow(mrb_state *mrb, const char *reason); +#ifndef MRB_NO_FLOAT +void mrb_check_num_exact(mrb_state *mrb, mrb_float num); +#endif #ifdef MRB_USE_COMPLEX mrb_value mrb_complex_new(mrb_state *mrb, mrb_float x, mrb_float y); @@ -97,19 +127,32 @@ mrb_value mrb_rational_add(mrb_state *mrb, mrb_value x, mrb_value y); mrb_value mrb_rational_sub(mrb_state *mrb, mrb_value x, mrb_value y); mrb_value mrb_rational_mul(mrb_state *mrb, mrb_value x, mrb_value y); mrb_value mrb_rational_div(mrb_state *mrb, mrb_value x, mrb_value y); +mrb_value mrb_as_rational(mrb_state *mrb, mrb_value x); void mrb_rational_copy(mrb_state *mrb, mrb_value x, mrb_value y); +int mrb_rational_mark(mrb_state *mrb, struct RBasic *rat); +#endif +#ifdef MRB_USE_SET +size_t mrb_gc_mark_set(mrb_state *mrb, struct RBasic *set); +void mrb_gc_free_set(mrb_state *mrb, struct RBasic *set); +size_t mrb_set_memsize(mrb_value); #endif #ifdef MRUBY_PROC_H struct RProc *mrb_closure_new(mrb_state*, const mrb_irep*); -void mrb_proc_copy(mrb_state *mrb, struct RProc *a, struct RProc *b); +void mrb_proc_copy(mrb_state *mrb, struct RProc *a, const struct RProc *b); mrb_int mrb_proc_arity(const struct RProc *p); +struct REnv *mrb_env_new(mrb_state *mrb, struct mrb_context *c, mrb_callinfo *ci, int nstacks, mrb_value *stack, struct RClass *tc); +void mrb_proc_merge_lvar(mrb_state *mrb, mrb_irep *irep, struct REnv *env, int num, const mrb_sym *lv, const mrb_value *stack); +mrb_value mrb_proc_local_variables(mrb_state *mrb, const struct RProc *proc); +const struct RProc *mrb_proc_get_caller(mrb_state *mrb, struct REnv **env); +mrb_value mrb_proc_get_self(mrb_state *mrb, const struct RProc *p, struct RClass **target_class_p); +mrb_bool mrb_proc_eql(mrb_state *mrb, mrb_value self, mrb_value other); #endif /* range */ #ifdef MRUBY_RANGE_H mrb_value mrb_get_values_at(mrb_state *mrb, mrb_value obj, mrb_int olen, mrb_int argc, const mrb_value *argv, mrb_value (*func)(mrb_state*, mrb_value, mrb_int)); -void mrb_gc_mark_range(mrb_state *mrb, struct RRange *r); +size_t mrb_gc_mark_range(mrb_state *mrb, struct RRange *r); #endif /* string */ @@ -120,9 +163,13 @@ mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str); mrb_bool mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp); mrb_value mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len); mrb_value mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value idx, mrb_value len); +mrb_bool mrb_strcasecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2); +#define MRB_STR_CASECMP_P(str, lit) \ + mrb_strcasecmp_p(RSTRING_PTR(str), RSTRING_LEN(str), lit, sizeof(lit"")-1) uint32_t mrb_byte_hash(const uint8_t*, mrb_int); uint32_t mrb_byte_hash_step(const uint8_t*, mrb_int, uint32_t); +mrb_int mrb_utf8_to_buf(char *buf, uint32_t cp); #ifdef MRB_UTF8_STRING mrb_int mrb_utf8len(const char *str, const char *end); mrb_int mrb_utf8_strlen(const char *str, mrb_int byte_len); @@ -134,50 +181,69 @@ void mrb_vm_special_set(mrb_state*, mrb_sym, mrb_value); mrb_value mrb_vm_cv_get(mrb_state*, mrb_sym); void mrb_vm_cv_set(mrb_state*, mrb_sym, mrb_value); mrb_value mrb_vm_const_get(mrb_state*, mrb_sym); -void mrb_vm_const_set(mrb_state*, mrb_sym, mrb_value); size_t mrb_obj_iv_tbl_memsize(mrb_value); -mrb_value mrb_obj_iv_inspect(mrb_state*, struct RObject*); void mrb_obj_iv_set_force(mrb_state *mrb, struct RObject *obj, mrb_sym sym, mrb_value v); mrb_value mrb_mod_constants(mrb_state *mrb, mrb_value mod); +mrb_value mrb_mod_const_at(mrb_state *mrb, struct RClass *c, mrb_value ary); mrb_value mrb_f_global_variables(mrb_state *mrb, mrb_value self); mrb_value mrb_obj_instance_variables(mrb_state*, mrb_value); mrb_value mrb_mod_class_variables(mrb_state*, mrb_value); -mrb_value mrb_mod_cv_get(mrb_state *mrb, struct RClass * c, mrb_sym sym); -mrb_bool mrb_mod_cv_defined(mrb_state *mrb, struct RClass * c, mrb_sym sym); +mrb_value mrb_mod_cv_get(mrb_state *mrb, struct RClass *c, mrb_sym sym); +mrb_bool mrb_mod_cv_defined(mrb_state *mrb, struct RClass *c, mrb_sym sym); mrb_bool mrb_ident_p(const char *s, mrb_int len); +mrb_value mrb_exc_const_get(mrb_state *mrb, mrb_sym sym); /* GC functions */ void mrb_gc_mark_gv(mrb_state*); void mrb_gc_free_gv(mrb_state*); -void mrb_gc_mark_iv(mrb_state*, struct RObject*); -size_t mrb_gc_mark_iv_size(mrb_state*, struct RObject*); +size_t mrb_gc_mark_iv(mrb_state*, struct RObject*); void mrb_gc_free_iv(mrb_state*, struct RObject*); +/* IV shape tree */ +void mrb_init_shape(mrb_state*); +void mrb_free_shape(mrb_state*); + /* VM */ +#define MRB_CI_VISIBILITY(ci) MRB_FLAGS_GET((ci)->vis, 0, 2) +#define MRB_CI_SET_VISIBILITY(ci, visi) MRB_FLAGS_SET((ci)->vis, 0, 2, visi) +#define MRB_CI_VISIBILITY_BREAK_P(ci) MRB_FLAG_CHECK((ci)->vis, 2) +#define MRB_CI_SET_VISIBILITY_BREAK(ci) MRB_FLAG_ON((ci)->vis, 2) mrb_int mrb_ci_bidx(mrb_callinfo *ci); -mrb_value mrb_exec_irep(mrb_state *mrb, mrb_value self, struct RProc *p); +mrb_int mrb_ci_nregs(mrb_callinfo *ci); +mrb_value mrb_exec_irep(mrb_state *mrb, mrb_value self, const struct RProc *p); mrb_value mrb_obj_instance_eval(mrb_state*, mrb_value); +mrb_value mrb_object_exec(mrb_state *mrb, mrb_value self, struct RClass *target_class); mrb_value mrb_mod_module_eval(mrb_state*, mrb_value); +mrb_value mrb_f_send(mrb_state *mrb, mrb_value self); +mrb_value mrb_f_public_send(mrb_state *mrb, mrb_value self); #ifdef MRB_USE_BIGINT mrb_value mrb_bint_new_int(mrb_state *mrb, mrb_int x); +#ifdef MRB_INT64 +#define mrb_bint_new_int64(mrb,x) mrb_bint_new_int((mrb),(mrb_int)(x)) +#else +mrb_value mrb_bint_new_int64(mrb_state *mrb, int64_t x); +#endif +mrb_value mrb_bint_new_uint64(mrb_state *mrb, uint64_t x); mrb_value mrb_bint_new_str(mrb_state *mrb, const char *x, mrb_int len, mrb_int base); mrb_value mrb_as_bint(mrb_state *mrb, mrb_value x); mrb_value mrb_bint_add(mrb_state *mrb, mrb_value x, mrb_value y); mrb_value mrb_bint_sub(mrb_state *mrb, mrb_value x, mrb_value y); +mrb_value mrb_bint_add_n(mrb_state *mrb, mrb_value x, mrb_value y); +mrb_value mrb_bint_sub_n(mrb_state *mrb, mrb_value x, mrb_value y); mrb_value mrb_bint_mul(mrb_state *mrb, mrb_value x, mrb_value y); mrb_value mrb_bint_div(mrb_state *mrb, mrb_value x, mrb_value y); mrb_value mrb_bint_divmod(mrb_state *mrb, mrb_value x, mrb_value y); mrb_value mrb_bint_add_ii(mrb_state *mrb, mrb_int x, mrb_int y); mrb_value mrb_bint_sub_ii(mrb_state *mrb, mrb_int x, mrb_int y); mrb_value mrb_bint_mul_ii(mrb_state *mrb, mrb_int x, mrb_int y); -mrb_value mrb_bint_div_ii(mrb_state *mrb, mrb_int x, mrb_int y); mrb_value mrb_bint_mod(mrb_state *mrb, mrb_value x, mrb_value y); mrb_value mrb_bint_rem(mrb_state *mrb, mrb_value x, mrb_value y); mrb_value mrb_bint_pow(mrb_state *mrb, mrb_value x, mrb_value y); -mrb_value mrb_bint_powm(mrb_state *mrb, mrb_value x, mrb_int y, mrb_value z); +mrb_value mrb_bint_powm(mrb_state *mrb, mrb_value x, mrb_value y, mrb_value z); mrb_value mrb_bint_and(mrb_state *mrb, mrb_value x, mrb_value y); mrb_value mrb_bint_or(mrb_state *mrb, mrb_value x, mrb_value y); +mrb_value mrb_bint_neg(mrb_state *mrb, mrb_value x); mrb_value mrb_bint_xor(mrb_state *mrb, mrb_value x, mrb_value y); mrb_value mrb_bint_rev(mrb_state *mrb, mrb_value x); mrb_value mrb_bint_lshift(mrb_state *mrb, mrb_value x, mrb_int width); @@ -188,11 +254,32 @@ mrb_value mrb_bint_new_float(mrb_state *mrb, mrb_float x); mrb_float mrb_bint_as_float(mrb_state *mrb, mrb_value x); #endif mrb_int mrb_bint_as_int(mrb_state *mrb, mrb_value x); +#ifdef MRB_INT64 +#define mrb_bint_as_int64(mrb, x) mrb_bint_as_int((mrb), (x)) +#else +int64_t mrb_bint_as_int64(mrb_state *mrb, mrb_value x); +#endif +uint64_t mrb_bint_as_uint64(mrb_state *mrb, mrb_value x); mrb_int mrb_bint_cmp(mrb_state *mrb, mrb_value x, mrb_value y); void mrb_gc_free_bint(mrb_state *mrb, struct RBasic *x); void mrb_bint_copy(mrb_state *mrb, mrb_value x, mrb_value y); size_t mrb_bint_memsize(mrb_value x); mrb_value mrb_bint_hash(mrb_state *mrb, mrb_value x); +mrb_value mrb_bint_sqrt(mrb_state *mrb, mrb_value x); +mrb_int mrb_bint_size(mrb_state *mrb, mrb_value bint); +mrb_value mrb_bint_from_bytes(mrb_state *mrb, const uint8_t *bytes, mrb_int len); +mrb_int mrb_bint_sign(mrb_state *mrb, mrb_value bint); +mrb_value mrb_bint_gcd(mrb_state *mrb, mrb_value x, mrb_value y); +mrb_value mrb_bint_lcm(mrb_state *mrb, mrb_value x, mrb_value y); +mrb_value mrb_bint_abs(mrb_state *mrb, mrb_value x); #endif +#ifdef MRB_USE_TASK_SCHEDULER +/* GC marking for task scheduler */ +void mrb_task_mark_all(mrb_state *mrb); +#endif + +/* Internal object allocation without type validation (gc.c) */ +struct RBasic* mrb_obj_alloc_core(mrb_state*, enum mrb_vtype, struct RClass*); + #endif /* MRUBY_INTERNAL_H */ diff --git a/include/mruby/irep.h b/include/mruby/irep.h index ba390a05de..ab0b125c37 100644 --- a/include/mruby/irep.h +++ b/include/mruby/irep.h @@ -16,19 +16,19 @@ MRB_BEGIN_DECL enum irep_pool_type { - IREP_TT_STR = 0, /* string (need free) */ - IREP_TT_SSTR = 2, /* string (static) */ - IREP_TT_INT32 = 1, /* 32bit integer */ - IREP_TT_INT64 = 3, /* 64bit integer */ - IREP_TT_BIGINT = 7, /* big integer (not yet supported) */ - IREP_TT_FLOAT = 5, /* float (double/float) */ + IREP_TT_STR = 0, /* string (need free) */ + IREP_TT_SSTR = 2, /* string (static) */ + IREP_TT_INT32 = 1, /* 32-bit integer */ + IREP_TT_INT64 = 3, /* 64-bit integer */ + IREP_TT_BIGINT = 7, /* big integer */ + IREP_TT_FLOAT = 5, /* float (double/float) */ }; -#define IREP_TT_NFLAG 1 /* number (non string) flag */ -#define IREP_TT_SFLAG 2 /* static string flag */ +#define IREP_TT_NFLAG 1 /* number (non string) flag */ +#define IREP_TT_SFLAG 2 /* static string flag */ -typedef struct mrb_pool_value { - uint32_t tt; /* packed type and length (for string) */ +typedef struct mrb_irep_pool { + uint32_t tt; /* packed type and length (for string) */ union { const char *str; int32_t i32; @@ -37,7 +37,7 @@ typedef struct mrb_pool_value { mrb_float f; #endif } u; -} mrb_pool_value; +} mrb_irep_pool; enum mrb_catch_type { MRB_CATCH_RESCUE = 0, @@ -45,32 +45,33 @@ enum mrb_catch_type { }; struct mrb_irep_catch_handler { - uint8_t type; /* enum mrb_catch_type */ - uint8_t begin[4]; /* The starting address to match the handler. Includes this. */ - uint8_t end[4]; /* The endpoint address that matches the handler. Not Includes this. */ - uint8_t target[4]; /* The address to jump to if a match is made. */ + uint8_t type; /* enum mrb_catch_type */ + uint8_t begin[4]; /* The starting address to match the handler. Includes this. */ + uint8_t end[4]; /* The endpoint address that matches the handler. Not Includes this. */ + uint8_t target[4]; /* The address to jump to if a match is made. */ }; /* Program data array struct */ struct mrb_irep { - uint16_t nlocals; /* Number of local variables */ - uint16_t nregs; /* Number of register variables */ - uint16_t clen; /* Number of catch handlers */ + uint16_t nlocals; /* Number of local variables */ + uint16_t nregs; /* Number of register variables */ + uint16_t clen; /* Number of catch handlers */ uint8_t flags; const mrb_code *iseq; /* * A catch handler table is placed after the iseq entity. - * The reason it doesn't add fields to the structure is to keep the mrb_irep structure from bloating. - * The catch handler table can be obtained with `mrb_irep_catch_handler_table(irep)`. + * The reason it doesn't add fields to the structure is to keep the mrb_irep + * structure from bloating. The catch handler table can be obtained with + * `mrb_irep_catch_handler_table(irep)`. */ - const mrb_pool_value *pool; + const mrb_irep_pool *pool; const mrb_sym *syms; - const struct mrb_irep * const *reps; + const struct mrb_irep *const *reps; const mrb_sym *lv; /* debug info */ - struct mrb_irep_debug_info* debug_info; + struct mrb_irep_debug_info *debug_info; uint32_t ilen; uint16_t plen, slen; @@ -80,37 +81,47 @@ struct mrb_irep { #define MRB_ISEQ_NO_FREE 1 #define MRB_IREP_NO_FREE 2 -#define MRB_IREP_STATIC (MRB_ISEQ_NO_FREE | MRB_IREP_NO_FREE) +#define MRB_IREP_STATIC (MRB_ISEQ_NO_FREE | MRB_IREP_NO_FREE) +#define MRB_IREP_CONSOLIDATED 4 /* pool/syms/reps packed with irep struct */ MRB_API mrb_irep *mrb_add_irep(mrb_state *mrb); -/** load mruby bytecode functions -* Please note! Currently due to interactions with the GC calling these functions will -* leak one RProc object per function call. -* To prevent this save the current memory arena before calling and restore the arena -* right after, like so -* int ai = mrb_gc_arena_save(mrb); -* mrb_value status = mrb_load_irep(mrb, buffer); -* mrb_gc_arena_restore(mrb, ai); -*/ +/** + * load mruby bytecode functions + * + * Please note! Currently due to interactions with the GC calling these + * functions will leak one RProc object per function call. To prevent this save + * the current memory arena before calling and restore the arena right after, + * like so + * + * int ai = mrb_gc_arena_save(mrb); + * mrb_value status = mrb_load_irep(mrb, buffer); + * mrb_gc_arena_restore(mrb, ai); + * + * Also, when called from a C function defined as a method, the current stack is + * destroyed. If processing continues after this function, the objects obtained + * from the arguments must be protected as needed before this function. + */ /* @param [const uint8_t*] irep code, expected as a literal */ -MRB_API mrb_value mrb_load_irep(mrb_state*, const uint8_t*); +MRB_API mrb_value mrb_load_irep(mrb_state *, const uint8_t *); /* * @param [const void*] irep code * @param [size_t] size of irep buffer. */ -MRB_API mrb_value mrb_load_irep_buf(mrb_state*, const void*, size_t); +MRB_API mrb_value mrb_load_irep_buf(mrb_state *, const void *, size_t); /* @param [const uint8_t*] irep code, expected as a literal */ -MRB_API mrb_value mrb_load_irep_cxt(mrb_state*, const uint8_t*, mrbc_context*); +MRB_API mrb_value mrb_load_irep_cxt(mrb_state *, const uint8_t *, + mrbc_context *); /* * @param [const void*] irep code * @param [size_t] size of irep buffer. */ -MRB_API mrb_value mrb_load_irep_buf_cxt(mrb_state*, const void*, size_t, mrbc_context*); +MRB_API mrb_value mrb_load_irep_buf_cxt(mrb_state *, const void *, size_t, + mrbc_context *); struct mrb_insn_data { uint8_t insn; @@ -120,13 +131,13 @@ struct mrb_insn_data { const mrb_code *addr; }; -#define mrb_irep_catch_handler_pack(n, v) uint32_to_bin(n, v) -#define mrb_irep_catch_handler_unpack(v) bin_to_uint32(v) +#define mrb_irep_catch_handler_pack(n, v) uint32_to_bin(n, v) +#define mrb_irep_catch_handler_unpack(v) bin_to_uint32(v) -void mrb_irep_incref(mrb_state*, struct mrb_irep*); -void mrb_irep_decref(mrb_state*, struct mrb_irep*); -void mrb_irep_cutref(mrb_state*, struct mrb_irep*); +void mrb_irep_incref(mrb_state *, struct mrb_irep *); +void mrb_irep_decref(mrb_state *, struct mrb_irep *); +void mrb_irep_cutref(mrb_state *, struct mrb_irep *); MRB_END_DECL -#endif /* MRUBY_IREP_H */ +#endif /* MRUBY_IREP_H */ diff --git a/include/mruby/istruct.h b/include/mruby/istruct.h index d6b6116a7c..d36d2807d5 100644 --- a/include/mruby/istruct.h +++ b/include/mruby/istruct.h @@ -30,7 +30,7 @@ struct RIStruct { #define RISTRUCT(obj) ((struct RIStruct*)(mrb_ptr(obj))) #define ISTRUCT_PTR(obj) (RISTRUCT(obj)->inline_data) -MRB_INLINE mrb_int mrb_istruct_size() +MRB_INLINE mrb_int mrb_istruct_size(void) { return ISTRUCT_DATA_SIZE; } diff --git a/include/mruby/khash.h b/include/mruby/khash.h index 1fb6eecbba..8d7139e87d 100644 --- a/include/mruby/khash.h +++ b/include/mruby/khash.h @@ -20,12 +20,13 @@ MRB_BEGIN_DECL typedef uint32_t khint_t; typedef khint_t khiter_t; -#ifndef KHASH_DEFAULT_SIZE -# define KHASH_DEFAULT_SIZE 32 +#ifndef KHASH_INITIAL_SIZE +# define KHASH_INITIAL_SIZE 32 #endif #define KHASH_MIN_SIZE 8 +#define KHASH_SMALL_LIMIT 4 -#define UPPER_BOUND(x) ((x)>>2|(x)>>1) +#define KH_UPPER_BOUND(x) ((x) - ((x)>>3)) /* 87.5% load factor */ /* extern uint8_t __m[]; */ @@ -48,7 +49,21 @@ static const uint8_t __m_either[] = {0x03, 0x0c, 0x30, 0xc0}; v++;\ } while (0) #define khash_mask(h) ((h)->n_buckets-1) -#define khash_upper_bound(h) (UPPER_BOUND((h)->n_buckets)) +#define khash_upper_bound(h) (KH_UPPER_BOUND((h)->n_buckets)) + +/* BREAKING CHANGE: khash structure optimized for 50% memory reduction + * + * The structure now uses a single data pointer instead of separate keys, + * vals, and ed_flags pointers, reducing size from 32 to 16 bytes. + * + * MIGRATION REQUIRED for field access macros: + * - OLD: kh_key(h, x) NEW: kh_key(typename, h, x) + * - OLD: kh_val(h, x) NEW: kh_val(typename, h, x) + * - OLD: kh_exist(h, x) NEW: kh_exist(typename, h, x) + * - OLD: KHASH_FOREACH() NEW: KHASH_FOREACH(typename, ...) + * + * Function-style macros (kh_get, kh_put, etc.) remain unchanged. + */ /* declare struct kh_xxx and kh_xxx_funcs @@ -59,13 +74,22 @@ static const uint8_t __m_either[] = {0x03, 0x0c, 0x30, 0xc0}; */ #define KHASH_DECLARE(name, khkey_t, khval_t, kh_is_map) \ typedef struct kh_##name { \ - khint_t n_buckets; \ - khint_t size; \ - uint8_t *ed_flags; \ - khkey_t *keys; \ - khval_t *vals; \ + void *data; /* Single allocation: [keys][vals][flags] */ \ + khint_t n_buckets; /* Number of buckets (power of 2) */ \ + khint_t size; /* Number of elements */ \ } kh_##name##_t; \ - void kh_alloc_##name(mrb_state *mrb, kh_##name##_t *h); \ + /* Address calculation functions for optimized memory layout */ \ + static inline khkey_t* kh_keys_##name(const kh_##name##_t *h) { \ + return (khkey_t*)(h)->data; \ + } \ + static inline khval_t* kh_vals_##name(const kh_##name##_t *h) { \ + return kh_is_map ? \ + (khval_t*)((uint8_t*)(h)->data + sizeof(khkey_t) * (h)->n_buckets) : NULL; \ + } \ + static inline uint8_t* kh_flags_##name(const kh_##name##_t *h) { \ + return (uint8_t*)(h)->data + sizeof(khkey_t) * (h)->n_buckets + \ + (kh_is_map ? sizeof(khval_t) * (h)->n_buckets : 0); \ + } \ kh_##name##_t *kh_init_##name##_size(mrb_state *mrb, khint_t size); \ kh_##name##_t *kh_init_##name(mrb_state *mrb); \ void kh_destroy_##name(mrb_state *mrb, kh_##name##_t *h); \ @@ -74,15 +98,10 @@ static const uint8_t __m_either[] = {0x03, 0x0c, 0x30, 0xc0}; khint_t kh_put_##name(mrb_state *mrb, kh_##name##_t *h, khkey_t key, int *ret); \ void kh_resize_##name(mrb_state *mrb, kh_##name##_t *h, khint_t new_n_buckets); \ void kh_del_##name(mrb_state *mrb, kh_##name##_t *h, khint_t x); \ - kh_##name##_t *kh_copy_##name(mrb_state *mrb, kh_##name##_t *h); - -static inline void -kh_fill_flags(uint8_t *p, uint8_t c, size_t len) -{ - while (len-- > 0) { - *p++ = c; - } -} + kh_##name##_t *kh_copy_##name(mrb_state *mrb, kh_##name##_t *h); \ + void kh_init_data_##name(mrb_state *mrb, kh_##name##_t *h, khint_t size); \ + void kh_destroy_data_##name(mrb_state *mrb, kh_##name##_t *h); \ + void kh_replace_##name(mrb_state *mrb, kh_##name##_t *dst, const kh_##name##_t *src); /* define kh_xxx_funcs @@ -95,64 +114,136 @@ kh_fill_flags(uint8_t *p, uint8_t c, size_t len) */ #define KHASH_DEFINE(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ mrb_noreturn void mrb_raise_nomemory(mrb_state *mrb); \ - int kh_alloc_simple_##name(mrb_state *mrb, kh_##name##_t *h) \ - { \ + /* Internal helper functions */ \ + static inline size_t kh__kv_size_##name(khint_t count) { \ + return sizeof(khkey_t) * count + \ + (kh_is_map ? sizeof(khval_t) * count : 0); \ + } \ + static inline size_t kh__htable_size_##name(khint_t n_buckets) { \ + return kh__kv_size_##name(n_buckets) + n_buckets / 4; \ + } \ + static inline void kh__mark_occupied_##name(kh_##name##_t *h, khint_t i) { \ + uint8_t *flags = kh_flags_##name(h); \ + flags[i/4] &= ~__m_either[i%4]; /* Clear both empty and deleted bits */ \ + } \ + static inline void kh__mark_deleted_##name(kh_##name##_t *h, khint_t i) { \ + uint8_t *flags = kh_flags_##name(h); \ + flags[i/4] |= __m_del[i%4]; /* Set deleted bit */ \ + } \ + static inline khint_t kh__key_idx_##name(mrb_state *mrb, khkey_t key, kh_##name##_t *h) { \ + return __hash_func(mrb, key) & khash_mask(h); \ + } \ + static inline khint_t kh__next_probe_##name(khint_t k, khint_t *step, kh_##name##_t *h) { \ + return (k+(++(*step))) & khash_mask(h); \ + } \ + static inline khint_t kh__insert_key_##name(kh_##name##_t *h, khint_t index, khkey_t key) { \ + khkey_t *keys = kh_keys_##name(h); \ + keys[index] = key; \ + kh__mark_occupied_##name(h, index); \ + h->size++; \ + return index; \ + } \ + static inline void kh__clear_flags_##name(kh_##name##_t *h, khint_t n_buckets) { \ + memset(kh_flags_##name(h), 0xaa, n_buckets/4); \ + } \ + static inline void kh__alloc_##name(mrb_state *mrb, kh_##name##_t *h) { \ khint_t sz = h->n_buckets; \ - size_t len = sizeof(khkey_t) + (kh_is_map ? sizeof(khval_t) : 0); \ - uint8_t *p = (uint8_t*)mrb_malloc_simple(mrb, sizeof(uint8_t)*sz/4+len*sz); \ - if (!p) { return 1; } \ + uint8_t *p = (uint8_t*)mrb_malloc(mrb, kh__htable_size_##name(sz)); \ h->size = 0; \ - h->keys = (khkey_t *)p; \ - h->vals = kh_is_map ? (khval_t *)(p+sizeof(khkey_t)*sz) : NULL; \ - h->ed_flags = p+len*sz; \ - kh_fill_flags(h->ed_flags, 0xaa, sz/4); \ - return 0; \ + h->data = p; /* Single data pointer for optimized layout */ \ + kh__clear_flags_##name(h, sz); \ } \ - void kh_alloc_##name(mrb_state *mrb, kh_##name##_t *h) \ - { \ - if (kh_alloc_simple_##name(mrb, h)) { \ - mrb_raise_nomemory(mrb); \ + /* Small table optimization functions */ \ + static inline int kh__is_small_##name(const kh_##name##_t *h) { \ + return h->n_buckets == 0; /* Small table marker */ \ + } \ + static inline khint_t kh__get_small_##name(mrb_state *mrb, kh_##name##_t *h, khkey_t key) { \ + khkey_t *keys = kh_keys_##name(h); \ + for (khint_t i = 0; i < h->size; i++) { \ + if (__hash_equal(mrb, keys[i], key)) return i; \ + } \ + return h->size; /* Not found - return end position */ \ + } \ + static inline void kh__rebuild_##name(mrb_state *mrb, kh_##name##_t *h, khint_t new_n_buckets) { \ + kh_##name##_t hh; \ + hh.data = NULL; \ + hh.size = 0; \ + kh_init_data_##name(mrb, &hh, new_n_buckets); \ + /* Rehash from old 'h' to 'hh' */ \ + khkey_t *old_keys = kh_keys_##name(h); \ + khval_t *old_vals = kh_vals_##name(h); \ + uint8_t *old_flags = kh__is_small_##name(h) ? NULL : kh_flags_##name(h); \ + khint_t limit = old_flags ? h->n_buckets : h->size; \ + for (khint_t i = 0; i < limit; i++) { \ + if (old_flags && __ac_iseither(old_flags, i)) continue; \ + khint_t k = kh_put_##name(mrb, &hh, old_keys[i], NULL); \ + if (kh_is_map) { \ + kh_val(name, &hh, k) = old_vals[i]; \ + } \ } \ + /* Final Swap */ \ + mrb_free(mrb, h->data); \ + h->data = hh.data; \ + h->n_buckets = hh.n_buckets; \ + h->size = hh.size; \ + } \ + static inline khint_t kh__put_small_##name(mrb_state *mrb, kh_##name##_t *h, khkey_t key, int *ret) { \ + /* First check if key exists */ \ + khint_t pos = kh__get_small_##name(mrb, h, key); \ + if (pos < h->size) { \ + if (ret) *ret = 0; /* Key exists */ \ + return pos; \ + } \ + /* Check if we need to convert to hash table */ \ + if (h->size >= KHASH_SMALL_LIMIT) { \ + /* Convert from small table to hash table */ \ + kh__rebuild_##name(mrb, h, KHASH_MIN_SIZE); \ + /* Now add the new key using regular hash table */ \ + return kh_put_##name(mrb, h, key, ret); \ + } \ + /* Add new element to small table */ \ + khkey_t *keys = kh_keys_##name(h); \ + keys[h->size] = key; \ + h->size++; \ + if (ret) *ret = 1; /* New key */ \ + return h->size - 1; \ } \ kh_##name##_t *kh_init_##name##_size(mrb_state *mrb, khint_t size) { \ kh_##name##_t *h = (kh_##name##_t*)mrb_calloc(mrb, 1, sizeof(kh_##name##_t)); \ - if (size < KHASH_MIN_SIZE) \ - size = KHASH_MIN_SIZE; \ - khash_power2(size); \ - h->n_buckets = size; \ - if (kh_alloc_simple_##name(mrb, h)) { \ - mrb_free(mrb, h); \ - mrb_raise_nomemory(mrb); \ - } \ + kh_init_data_##name(mrb, h, size); \ return h; \ } \ kh_##name##_t *kh_init_##name(mrb_state *mrb) { \ - return kh_init_##name##_size(mrb, KHASH_DEFAULT_SIZE); \ + return kh_init_##name##_size(mrb, KHASH_INITIAL_SIZE); \ } \ void kh_destroy_##name(mrb_state *mrb, kh_##name##_t *h) \ { \ - if (h) { \ - mrb_free(mrb, h->keys); \ - mrb_free(mrb, h); \ - } \ + kh_destroy_data_##name(mrb, h); \ + mrb_free(mrb, h); \ } \ void kh_clear_##name(mrb_state *mrb, kh_##name##_t *h) \ { \ (void)mrb; \ - if (h && h->ed_flags) { \ - kh_fill_flags(h->ed_flags, 0xaa, h->n_buckets/4); \ + if (h && h->data) { \ + kh__clear_flags_##name(h, h->n_buckets); \ h->size = 0; \ } \ } \ khint_t kh_get_##name(mrb_state *mrb, kh_##name##_t *h, khkey_t key) \ { \ - khint_t k = __hash_func(mrb,key) & khash_mask(h), step = 0; \ + if (kh__is_small_##name(h)) { \ + return kh__get_small_##name(mrb, h, key); \ + } \ + /* Cache calculated pointers for performance */ \ + khkey_t *keys = kh_keys_##name(h); \ + uint8_t *ed_flags = kh_flags_##name(h); \ + khint_t k = kh__key_idx_##name(mrb, key, h), step = 0; \ (void)mrb; \ - while (!__ac_isempty(h->ed_flags, k)) { \ - if (!__ac_isdel(h->ed_flags, k)) { \ - if (__hash_equal(mrb,h->keys[k], key)) return k; \ + while (!__ac_isempty(ed_flags, k)) { \ + if (!__ac_isdel(ed_flags, k)) { \ + if (__hash_equal(mrb, keys[k], key)) return k; \ } \ - k = (k+(++step)) & khash_mask(h); \ + k = kh__next_probe_##name(k, &step, h); \ } \ return kh_end(h); \ } \ @@ -161,38 +252,25 @@ kh_fill_flags(uint8_t *p, uint8_t c, size_t len) if (new_n_buckets < KHASH_MIN_SIZE) \ new_n_buckets = KHASH_MIN_SIZE; \ khash_power2(new_n_buckets); \ - { \ - kh_##name##_t hh; \ - uint8_t *old_ed_flags = h->ed_flags; \ - khkey_t *old_keys = h->keys; \ - khval_t *old_vals = h->vals; \ - khint_t old_n_buckets = h->n_buckets; \ - khint_t i; \ - hh.n_buckets = new_n_buckets; \ - kh_alloc_##name(mrb, &hh); \ - /* relocate */ \ - for (i=0 ; isize >= khash_upper_bound(h)) { \ kh_resize_##name(mrb, h, h->n_buckets*2); \ } \ - k = __hash_func(mrb,key) & khash_mask(h); \ + /* Cache calculated pointers for performance */ \ + khkey_t *keys = kh_keys_##name(h); \ + uint8_t *ed_flags = kh_flags_##name(h); \ + k = kh__key_idx_##name(mrb, key, h); \ del_k = kh_end(h); \ - while (!__ac_isempty(h->ed_flags, k)) { \ - if (!__ac_isdel(h->ed_flags, k)) { \ - if (__hash_equal(mrb,h->keys[k], key)) { \ + while (!__ac_isempty(ed_flags, k)) { \ + if (!__ac_isdel(ed_flags, k)) { \ + if (__hash_equal(mrb, keys[k], key)) { \ if (ret) *ret = 0; \ return k; \ } \ @@ -200,21 +278,17 @@ kh_fill_flags(uint8_t *p, uint8_t c, size_t len) else if (del_k == kh_end(h)) { \ del_k = k; \ } \ - k = (k+(++step)) & khash_mask(h); \ + k = kh__next_probe_##name(k, &step, h); \ } \ if (del_k != kh_end(h)) { \ /* put at del */ \ - h->keys[del_k] = key; \ - h->ed_flags[del_k/4] &= ~__m_del[del_k%4]; \ - h->size++; \ + kh__insert_key_##name(h, del_k, key); \ if (ret) *ret = 2; \ return del_k; \ } \ else { \ /* put at empty */ \ - h->keys[k] = key; \ - h->ed_flags[k/4] &= ~__m_empty[k%4]; \ - h->size++; \ + kh__insert_key_##name(h, k, key); \ if (ret) *ret = 1; \ return k; \ } \ @@ -222,23 +296,81 @@ kh_fill_flags(uint8_t *p, uint8_t c, size_t len) void kh_del_##name(mrb_state *mrb, kh_##name##_t *h, khint_t x) \ { \ (void)mrb; \ - mrb_assert(x != h->n_buckets && !__ac_iseither(h->ed_flags, x)); \ - h->ed_flags[x/4] |= __m_del[x%4]; \ - h->size--; \ + if (kh__is_small_##name(h)) { \ + /* Small table deletion: shift elements down */ \ + mrb_assert(x < h->size); \ + khkey_t *keys = kh_keys_##name(h); \ + khval_t *vals = kh_vals_##name(h); \ + for (khint_t i = x; i < h->size - 1; i++) { \ + keys[i] = keys[i + 1]; \ + if (kh_is_map) vals[i] = vals[i + 1]; \ + } \ + h->size--; \ + } \ + else { \ + /* Regular hash table deletion */ \ + mrb_assert(x != h->n_buckets && !__ac_iseither(kh_flags_##name(h), x)); \ + kh__mark_deleted_##name(h, x); \ + h->size--; \ + } \ } \ kh_##name##_t *kh_copy_##name(mrb_state *mrb, kh_##name##_t *h) \ { \ - kh_##name##_t *h2; \ - khiter_t k, k2; \ - \ - h2 = kh_init_##name(mrb); \ - for (k = kh_begin(h); k != kh_end(h); k++) { \ - if (kh_exist(h, k)) { \ - k2 = kh_put_##name(mrb, h2, kh_key(h, k), NULL); \ - if (kh_is_map) kh_value(h2, k2) = kh_value(h, k); \ - } \ - } \ + kh_##name##_t *h2 = (kh_##name##_t*)mrb_calloc(mrb, 1, sizeof(kh_##name##_t)); \ + kh_replace_##name(mrb, h2, h); \ return h2; \ + } \ + void kh_init_data_##name(mrb_state *mrb, kh_##name##_t *h, khint_t size) { \ + if (size <= KHASH_SMALL_LIMIT) { \ + /* Start as small table */ \ + h->n_buckets = 0; /* Small table marker */ \ + h->data = mrb_malloc(mrb, kh__kv_size_##name(KHASH_SMALL_LIMIT)); \ + h->size = 0; \ + } \ + else { \ + /* Start as regular hash table */ \ + if (size < KHASH_MIN_SIZE) \ + size = KHASH_MIN_SIZE; \ + khash_power2(size); \ + h->n_buckets = size; \ + kh__alloc_##name(mrb, h); \ + } \ + } \ + void kh_destroy_data_##name(mrb_state *mrb, kh_##name##_t *h) \ + { \ + if (h && h->data) { \ + mrb_free(mrb, h->data); /* Free only the data allocation */ \ + h->data = NULL; \ + } \ + } \ + void kh_replace_##name(mrb_state *mrb, kh_##name##_t *dst, const kh_##name##_t *src) \ + { \ + if (!src || (src->n_buckets == 0 && src->size == 0)) { \ + /* Empty source */ \ + kh_destroy_data_##name(mrb, dst); \ + dst->data = NULL; \ + dst->n_buckets = 0; \ + dst->size = 0; \ + } \ + else if (src->n_buckets == 0) { \ + /* Small table case */ \ + size_t data_size = kh__kv_size_##name(KHASH_SMALL_LIMIT); \ + dst->data = mrb_realloc(mrb, dst->data, data_size); \ + dst->size = src->size; \ + dst->n_buckets = 0; \ + /* Copy only the used portion of keys and values */ \ + size_t copy_size = kh__kv_size_##name(src->size); \ + memcpy(dst->data, src->data, copy_size); \ + } \ + else { \ + /* Regular hash table case */ \ + size_t data_size = kh__htable_size_##name(src->n_buckets); \ + dst->data = mrb_realloc(mrb, dst->data, data_size); \ + dst->size = src->size; \ + dst->n_buckets = src->n_buckets; \ + /* Copy the entire data block: [keys][vals][flags] */ \ + memcpy(dst->data, src->data, data_size); \ + } \ } @@ -254,24 +386,39 @@ kh_fill_flags(uint8_t *p, uint8_t c, size_t len) #define kh_get(name, mrb, h, k) kh_get_##name(mrb, h, k) #define kh_del(name, mrb, h, k) kh_del_##name(mrb, h, k) #define kh_copy(name, mrb, h) kh_copy_##name(mrb, h) +#define kh_init_data(name, mrb, h, size) kh_init_data_##name(mrb, h, size) +#define kh_destroy_data(name, mrb, h) kh_destroy_data_##name(mrb, h) +#define kh_replace(name, mrb, dst, src) kh_replace_##name(mrb, dst, src) + +/* BREAKING CHANGE: Field access macros now require type name as first parameter + * The macros keep their familiar names but now need the hash type name. + * + * MIGRATION: Add type name as first parameter: + * kh_key(h, x) -> kh_key(typename, h, x) + * kh_val(h, x) -> kh_val(typename, h, x) + * kh_exist(h, x) -> kh_exist(typename, h, x) + * kh_value(h, x) -> kh_value(typename, h, x) + */ -#define kh_exist(h, x) (!__ac_iseither((h)->ed_flags, (x))) -#define kh_key(h, x) ((h)->keys[x]) -#define kh_val(h, x) ((h)->vals[x]) -#define kh_value(h, x) ((h)->vals[x]) +/* Type-aware access macros - same names, now with type parameter */ +#define kh_exist(name, h, x) ((h)->n_buckets == 0 ? ((x) < (h)->size) : (!__ac_iseither(kh_flags_##name(h), (x)))) +#define kh_key(name, h, x) (kh_keys_##name(h)[x]) +#define kh_val(name, h, x) (kh_vals_##name(h)[x]) +#define kh_value(name, h, x) (kh_vals_##name(h)[x]) #define kh_begin(h) (khint_t)(0) -#define kh_end(h) ((h)->n_buckets) +#define kh_end(h) ((h)->n_buckets == 0 ? (h)->size : (h)->n_buckets) +#define kh_is_end(h, i) ((i) >= kh_end(h)) #define kh_size(h) ((h)->size) #define kh_n_buckets(h) ((h)->n_buckets) -#define kh_int_hash_func(mrb,key) (khint_t)((key)^((key)<<2)^((key)>>2)) +#define kh_int_hash_func(mrb,key) mrb_int_hash_func(mrb,key) #define kh_int_hash_equal(mrb,a, b) (a == b) #define kh_int64_hash_func(mrb,key) (khint_t)((key)>>33^(key)^(key)<<11) #define kh_int64_hash_equal(mrb,a, b) (a == b) static inline khint_t __ac_X31_hash_string(const char *s) { khint_t h = *s; - if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s; + if (h) for (++s; *s; ++s) h = (h << 5) - h + *s; return h; } #define kh_str_hash_func(mrb,key) __ac_X31_hash_string(key) @@ -281,4 +428,28 @@ typedef const char *kh_cstr_t; MRB_END_DECL +/** + * Macro for iterating over all elements in a khash. + * + * Usage: + * KHASH_FOREACH(typename, kh, k) { + * // k is the khiter_t iterator + * // Access the key with kh_key(typename, kh, k) + * // Access the value with kh_val(typename, kh, k) if applicable + * // Your code here + * } + * + * @param name The hash type name + * @param kh The khash to iterate over + * @param k The name to use for the khiter_t iterator variable + */ +/* BREAKING CHANGE: KHASH_FOREACH now requires type name as first parameter + * OLD: KHASH_FOREACH(mrb, kh, k) + * NEW: KHASH_FOREACH(name, kh, k) + */ +#define KHASH_FOREACH(name, kh, k) \ + if (kh) \ + for (khiter_t k = kh_begin(kh); !kh_is_end(kh, k); k++) \ + if (kh_exist(name, kh, k)) + #endif /* MRUBY_KHASH_H */ diff --git a/include/mruby/mempool.h b/include/mruby/mempool.h new file mode 100644 index 0000000000..6a4cd7811b --- /dev/null +++ b/include/mruby/mempool.h @@ -0,0 +1,19 @@ +/* +** mempool.h - memory pool +** +** See Copyright Notice in mruby.h +*/ + +/* memory pool implementation */ +typedef struct mempool mempool; +MRB_API struct mempool* mempool_open(void); +MRB_API void mempool_close(struct mempool*); +MRB_API void* mempool_alloc(struct mempool*, size_t); +MRB_API void* mempool_realloc(struct mempool*, void*, size_t oldlen, size_t newlen); + +/* compatibility layer */ +typedef struct mempool mrb_mempool; +#define mrb_mempool_open(m) mempool_open() +#define mrb_mempool_close(m) mempool_close(m) +#define mrb_mempool_alloc(m, size) mempool_alloc((m),(size)) +#define mrb_mempool_realloc(m, ptr, oldlen, newlen) mempool_realloc((m),(ptr),(oldlen),(newlen)) diff --git a/include/mruby/numeric.h b/include/mruby/numeric.h index 48adea3d1a..bd64a2bb6d 100644 --- a/include/mruby/numeric.h +++ b/include/mruby/numeric.h @@ -119,12 +119,12 @@ mrb_int_mul_overflow(mrb_int a, mrb_int b, mrb_int *c) *c = (mrb_int)n; return n > MRB_INT_MAX || n < MRB_INT_MIN; #else /* MRB_INT64 */ + *c = a * b; if (a > 0 && b > 0 && a > MRB_INT_MAX / b) return TRUE; if (a < 0 && b > 0 && a < MRB_INT_MIN / b) return TRUE; if (a > 0 && b < 0 && b < MRB_INT_MIN / a) return TRUE; if (a < 0 && b < 0 && (a <= MRB_INT_MIN || b <= MRB_INT_MIN || -a > MRB_INT_MAX / -b)) return TRUE; - *c = a * b; return FALSE; #endif } diff --git a/include/mruby/object.h b/include/mruby/object.h index a6ecab1ebd..0540644147 100644 --- a/include/mruby/object.h +++ b/include/mruby/object.h @@ -9,10 +9,10 @@ #define MRB_OBJECT_HEADER \ struct RClass *c; \ - struct RBasic *gcnext; \ enum mrb_vtype tt:8; \ - uint32_t color:3; \ - uint32_t flags:21 + unsigned int gc_color:3; \ + unsigned int frozen:1; \ + uint32_t flags:20 #define MRB_FLAG_TEST(obj, flag) ((obj)->flags & (flag)) @@ -21,11 +21,15 @@ struct RBasic { }; #define mrb_basic_ptr(v) ((struct RBasic*)(mrb_ptr(v))) -#define MRB_FL_OBJ_IS_FROZEN (1 << 20) -#define MRB_FROZEN_P(o) ((o)->flags & MRB_FL_OBJ_IS_FROZEN) -#define MRB_SET_FROZEN_FLAG(o) ((o)->flags |= MRB_FL_OBJ_IS_FROZEN) -#define MRB_UNSET_FROZEN_FLAG(o) ((o)->flags &= ~MRB_FL_OBJ_IS_FROZEN) -#define mrb_frozen_p(o) MRB_FROZEN_P(o) +#define MRB_OBJ_IS_FROZEN 1 +#define mrb_frozen_p(o) ((o)->frozen) + +/* Object shape flag -- when set, obj->iv is shaped, not iv_tbl* */ +/* Bit 5: avoids conflict with MRB_INSTANCE_TT_MASK (bits 0-4); + but conflicts with MRB_HASH_AR_EA_N_USED on 32-bit, so the + predicate must also check tt to avoid false positives */ +#define MRB_FL_OBJ_SHAPED (1 << 5) +#define MRB_OBJ_SHAPED_P(o) ((o)->tt == MRB_TT_OBJECT && ((o)->flags & MRB_FL_OBJ_SHAPED)) struct RObject { MRB_OBJECT_HEADER; @@ -41,7 +45,7 @@ struct RFiber { }; #define mrb_static_assert_object_size(st) \ - mrb_static_assert(sizeof(st) <= sizeof(void*) * 6, \ - #st " size must be within 6 words") + mrb_static_assert(sizeof(st) <= sizeof(void*) * 5, \ + #st " size must be within 5 words") #endif /* MRUBY_OBJECT_H */ diff --git a/include/mruby/opcode.h b/include/mruby/opcode.h index 2495981617..0a954d96b7 100644 --- a/include/mruby/opcode.h +++ b/include/mruby/opcode.h @@ -9,20 +9,20 @@ enum mrb_insn { #define OPCODE(x,_) OP_ ## x, -#include "mruby/ops.h" +#include #undef OPCODE }; +/* backward compatibility aliases */ +#define OP_LOADT OP_LOADTRUE +#define OP_LOADF OP_LOADFALSE + #define OP_L_STRICT 1 #define OP_L_CAPTURE 2 #define OP_L_METHOD OP_L_STRICT #define OP_L_LAMBDA (OP_L_STRICT|OP_L_CAPTURE) #define OP_L_BLOCK OP_L_CAPTURE -#define OP_R_NORMAL 0 -#define OP_R_BREAK 1 -#define OP_R_RETURN 2 - #define PEEK_B(pc) (*(pc)) #define PEEK_S(pc) ((pc)[0]<<8|(pc)[1]) #define PEEK_W(pc) ((pc)[0]<<16|(pc)[1]<<8|(pc)[2]) diff --git a/include/mruby/ops.h b/include/mruby/ops.h index 7f3b77bf6f..5ded349e88 100644 --- a/include/mruby/ops.h +++ b/include/mruby/ops.h @@ -15,7 +15,7 @@ operation code operands semantics OPCODE(NOP, Z) /* no operation */ OPCODE(MOVE, BB) /* R[a] = R[b] */ OPCODE(LOADL, BB) /* R[a] = Pool[b] */ -OPCODE(LOADI, BB) /* R[a] = mrb_int(b) */ +OPCODE(LOADI8, BB) /* R[a] = mrb_int(b) */ OPCODE(LOADINEG, BB) /* R[a] = mrb_int(-b) */ OPCODE(LOADI__1, B) /* R[a] = mrb_int(-1) */ OPCODE(LOADI_0, B) /* R[a] = mrb_int(0) */ @@ -31,8 +31,8 @@ OPCODE(LOADI32, BSS) /* R[a] = mrb_int((b<<16)+c) */ OPCODE(LOADSYM, BB) /* R[a] = Syms[b] */ OPCODE(LOADNIL, B) /* R[a] = nil */ OPCODE(LOADSELF, B) /* R[a] = self */ -OPCODE(LOADT, B) /* R[a] = true */ -OPCODE(LOADF, B) /* R[a] = false */ +OPCODE(LOADTRUE, B) /* R[a] = true */ +OPCODE(LOADFALSE, B) /* R[a] = false */ OPCODE(GETGV, BB) /* R[a] = getglobal(Syms[b]) */ OPCODE(SETGV, BB) /* setglobal(Syms[b], R[a]) */ OPCODE(GETSV, BB) /* R[a] = Special[Syms[b]] */ @@ -48,6 +48,7 @@ OPCODE(SETMCNST, BB) /* R[a+1]::Syms[b] = R[a] */ OPCODE(GETUPVAR, BBB) /* R[a] = uvget(b,c) */ OPCODE(SETUPVAR, BBB) /* uvset(b,c,R[a]) */ OPCODE(GETIDX, B) /* R[a] = R[a][R[a+1]] */ +OPCODE(GETIDX0, BB) /* R[a] = R[b][0]; a+1 for method call */ OPCODE(SETIDX, B) /* R[a][R[a+1]] = R[a+2] */ OPCODE(JMP, S) /* pc+=a */ OPCODE(JMPIF, BS) /* if R[a] pc+=b */ @@ -57,25 +58,35 @@ OPCODE(JMPUW, S) /* unwind_and_jump_to(a) */ OPCODE(EXCEPT, B) /* R[a] = exc */ OPCODE(RESCUE, BB) /* R[b] = R[a].isa?(R[b]) */ OPCODE(RAISEIF, B) /* raise(R[a]) if R[a] */ +OPCODE(MATCHERR, B) /* raise NoMatchingPatternError unless R[a] */ OPCODE(SSEND, BBB) /* R[a] = self.send(Syms[b],R[a+1]..,R[a+n+1]:R[a+n+2]..) (c=n|k<<4) */ +OPCODE(SSEND0, BB) /* R[a] = self.send(Syms[b]) (no args) */ OPCODE(SSENDB, BBB) /* R[a] = self.send(Syms[b],R[a+1]..,R[a+n+1]:R[a+n+2]..,&R[a+n+2k+1]) */ OPCODE(SEND, BBB) /* R[a] = R[a].send(Syms[b],R[a+1]..,R[a+n+1]:R[a+n+2]..) (c=n|k<<4) */ +OPCODE(SEND0, BB) /* R[a] = R[a].send(Syms[b]) (no args) */ OPCODE(SENDB, BBB) /* R[a] = R[a].send(Syms[b],R[a+1]..,R[a+n+1]:R[a+n+2]..,&R[a+n+2k+1]) */ -OPCODE(CALL, Z) /* R[0] = self.call(frame.argc, frame.argv) */ +OPCODE(CALL, Z) /* self.call(*, **, &) (But overlay the current call frame; tailcall) */ +OPCODE(BLKCALL, BB) /* R[a] = R[a].call(R[a+1],... ,R[a+b]); direct block call */ OPCODE(SUPER, BB) /* R[a] = super(R[a+1],... ,R[a+b+1]) */ OPCODE(ARGARY, BS) /* R[a] = argument array (16=m5:r1:m5:d1:lv4) */ -OPCODE(ENTER, W) /* arg setup according to flags (23=m5:o5:r1:m5:k5:d1:b1) */ +OPCODE(ENTER, W) /* arg setup according to flags (24=n1:m5:o5:r1:m5:k5:d1:b1) */ OPCODE(KEY_P, BB) /* R[a] = kdict.key?(Syms[b]) */ OPCODE(KEYEND, Z) /* raise unless kdict.empty? */ OPCODE(KARG, BB) /* R[a] = kdict[Syms[b]]; kdict.delete(Syms[b]) */ OPCODE(RETURN, B) /* return R[a] (normal) */ OPCODE(RETURN_BLK, B) /* return R[a] (in-block return) */ +OPCODE(RETSELF, Z) /* return self */ +OPCODE(RETNIL, Z) /* return nil */ +OPCODE(RETTRUE, Z) /* return true */ +OPCODE(RETFALSE, Z) /* return false */ OPCODE(BREAK, B) /* break R[a] */ OPCODE(BLKPUSH, BS) /* R[a] = block (16=m5:r1:m5:d1:lv4) */ OPCODE(ADD, B) /* R[a] = R[a]+R[a+1] */ OPCODE(ADDI, BB) /* R[a] = R[a]+mrb_int(b) */ OPCODE(SUB, B) /* R[a] = R[a]-R[a+1] */ OPCODE(SUBI, BB) /* R[a] = R[a]-mrb_int(b) */ +OPCODE(ADDILV, BBB) /* R[a] = R[a]+mrb_int(c); R[b],R[b+1] for method call */ +OPCODE(SUBILV, BBB) /* R[a] = R[a]-mrb_int(c); R[b],R[b+1] for method call */ OPCODE(MUL, B) /* R[a] = R[a]*R[a+1] */ OPCODE(DIV, B) /* R[a] = R[a]/R[a+1] */ OPCODE(EQ, B) /* R[a] = R[a]==R[a+1] */ @@ -87,7 +98,7 @@ OPCODE(ARRAY, BB) /* R[a] = ary_new(R[a],R[a+1]..R[a+b]) */ OPCODE(ARRAY2, BBB) /* R[a] = ary_new(R[b],R[b+1]..R[b+c]) */ OPCODE(ARYCAT, B) /* ary_cat(R[a],R[a+1]) */ OPCODE(ARYPUSH, BB) /* ary_push(R[a],R[a+1]..R[a+b]) */ -OPCODE(ARYDUP, B) /* R[a] = ary_dup(R[a]) */ +OPCODE(ARYSPLAT, B) /* R[a] = ary_splat(R[a]) */ OPCODE(AREF, BBB) /* R[a] = R[b][c] */ OPCODE(ASET, BBB) /* R[b][c] = R[a] */ OPCODE(APOST, BBB) /* *R[a],R[a+1]..R[a+c] = R[a][b..] */ @@ -108,6 +119,8 @@ OPCODE(CLASS, BB) /* R[a] = newclass(R[a],Syms[b],R[a+1]) */ OPCODE(MODULE, BB) /* R[a] = newmodule(R[a],Syms[b]) */ OPCODE(EXEC, BB) /* R[a] = blockexec(R[a],Irep[b]) */ OPCODE(DEF, BB) /* R[a].newmethod(Syms[b],R[a+1]); R[a] = Syms[b] */ +OPCODE(TDEF, BBB) /* target_class.newmethod(Syms[b],Irep[c]); R[a] = Syms[b] */ +OPCODE(SDEF, BBB) /* R[a].singleton_class.newmethod(Syms[b],Irep[c]); R[a] = Syms[b] */ OPCODE(ALIAS, BB) /* alias_method(target_class,Syms[a],Syms[b]) */ OPCODE(UNDEF, B) /* undef_method(target_class,Syms[a]) */ OPCODE(SCLASS, B) /* R[a] = R[a].singleton_class */ diff --git a/include/mruby/presym.h b/include/mruby/presym.h index 066b675e79..a677cf4f5b 100644 --- a/include/mruby/presym.h +++ b/include/mruby/presym.h @@ -7,18 +7,16 @@ #ifndef MRUBY_PRESYM_H #define MRUBY_PRESYM_H -#if defined(MRB_NO_PRESYM) -# include -#elif !defined(MRB_PRESYM_SCANNING) -# include -#endif +#if !defined(MRB_PRESYM_SCANNING) + +#include /* * Where `mrb_intern_lit` is allowed for symbol interning, it is directly - * replaced by the symbol ID if presym is enabled by using the following - * macros. + * replaced by the symbol ID using the following macros. * * MRB_OPSYM(xor) //=> ^ (Operator) + * MRB_GVSYM(xor) //=> $xor (Global Variable) * MRB_CVSYM(xor) //=> @@xor (Class Variable) * MRB_IVSYM(xor) //=> @xor (Instance Variable) * MRB_SYM_B(xor) //=> xor! (Method with Bang) @@ -29,12 +27,39 @@ * For `MRB_OPSYM`, specify the names corresponding to operators (see * `MRuby::Presym::OPERATORS` in `lib/mruby/presym.rb` for the names that * can be specified for it). Other than that, describe only word characters - * excluding leading and ending punctuations. + * excluding leading and ending punctuation. * - * These macros are expanded to `mrb_intern_lit` if presym is disabled, - * therefore the mruby state variable is required. The above macros can be - * used when the variable name is `mrb`. If you want to use other variable - * names, you need to use macros with `_2` suffix, such as `MRB_SYM_2`. + * These macros are expanded to compile-time integer constants. */ +#define MRB_OPSYM(name) MRB_OPSYM__##name +#define MRB_GVSYM(name) MRB_GVSYM__##name +#define MRB_CVSYM(name) MRB_CVSYM__##name +#define MRB_IVSYM(name) MRB_IVSYM__##name +#define MRB_SYM_B(name) MRB_SYM_B__##name +#define MRB_SYM_Q(name) MRB_SYM_Q__##name +#define MRB_SYM_E(name) MRB_SYM_E__##name +#define MRB_SYM(name) MRB_SYM__##name + +/* backward compatibility: _2 variants accept but ignore mrb_state* */ +#define MRB_OPSYM_2(mrb, name) MRB_OPSYM(name) +#define MRB_GVSYM_2(mrb, name) MRB_GVSYM(name) +#define MRB_CVSYM_2(mrb, name) MRB_CVSYM(name) +#define MRB_IVSYM_2(mrb, name) MRB_IVSYM(name) +#define MRB_SYM_B_2(mrb, name) MRB_SYM_B(name) +#define MRB_SYM_Q_2(mrb, name) MRB_SYM_Q(name) +#define MRB_SYM_E_2(mrb, name) MRB_SYM_E(name) +#define MRB_SYM_2(mrb, name) MRB_SYM(name) + +#define MRB_PRESYM_DEFINE_VAR_AND_INITER(name, size, ...) \ + static const mrb_sym name[] = {__VA_ARGS__}; + +#define MRB_PRESYM_INIT_SYMBOLS(mrb, name) (void)(mrb) + +/* use MRB_SYM() for E_RUNTIME_ERROR etc. */ +#undef MRB_ERROR_SYM +#define MRB_ERROR_SYM(sym) MRB_SYM(sym) + +#endif /* !MRB_PRESYM_SCANNING */ + #endif /* MRUBY_PRESYM_H */ diff --git a/include/mruby/presym/disable.h b/include/mruby/presym/disable.h deleted file mode 100644 index 45843fbf8c..0000000000 --- a/include/mruby/presym/disable.h +++ /dev/null @@ -1,70 +0,0 @@ -/** -** @file mruby/presym/disable.h - Disable Preallocated Symbols -** -** See Copyright Notice in mruby.h -*/ - -#ifndef MRUBY_PRESYM_DISABLE_H -#define MRUBY_PRESYM_DISABLE_H - -#include - -#define MRB_PRESYM_MAX 0 - -#define MRB_OPSYM(name) MRB_OPSYM__##name(mrb) -#define MRB_CVSYM(name) mrb_intern_lit(mrb, "@@" #name) -#define MRB_IVSYM(name) mrb_intern_lit(mrb, "@" #name) -#define MRB_SYM_B(name) mrb_intern_lit(mrb, #name "!") -#define MRB_SYM_Q(name) mrb_intern_lit(mrb, #name "?") -#define MRB_SYM_E(name) mrb_intern_lit(mrb, #name "=") -#define MRB_SYM(name) mrb_intern_lit(mrb, #name) - -#define MRB_OPSYM_2(mrb, name) MRB_OPSYM__##name(mrb) -#define MRB_CVSYM_2(mrb, name) mrb_intern_lit(mrb, "@@" #name) -#define MRB_IVSYM_2(mrb, name) mrb_intern_lit(mrb, "@" #name) -#define MRB_SYM_B_2(mrb, name) mrb_intern_lit(mrb, #name "!") -#define MRB_SYM_Q_2(mrb, name) mrb_intern_lit(mrb, #name "?") -#define MRB_SYM_E_2(mrb, name) mrb_intern_lit(mrb, #name "=") -#define MRB_SYM_2(mrb, name) mrb_intern_lit(mrb, #name) - -#define MRB_OPSYM__not(mrb) mrb_intern_lit(mrb, "!") -#define MRB_OPSYM__mod(mrb) mrb_intern_lit(mrb, "%") -#define MRB_OPSYM__and(mrb) mrb_intern_lit(mrb, "&") -#define MRB_OPSYM__mul(mrb) mrb_intern_lit(mrb, "*") -#define MRB_OPSYM__add(mrb) mrb_intern_lit(mrb, "+") -#define MRB_OPSYM__sub(mrb) mrb_intern_lit(mrb, "-") -#define MRB_OPSYM__div(mrb) mrb_intern_lit(mrb, "/") -#define MRB_OPSYM__lt(mrb) mrb_intern_lit(mrb, "<") -#define MRB_OPSYM__gt(mrb) mrb_intern_lit(mrb, ">") -#define MRB_OPSYM__xor(mrb) mrb_intern_lit(mrb, "^") -#define MRB_OPSYM__tick(mrb) mrb_intern_lit(mrb, "`") -#define MRB_OPSYM__or(mrb) mrb_intern_lit(mrb, "|") -#define MRB_OPSYM__neg(mrb) mrb_intern_lit(mrb, "~") -#define MRB_OPSYM__neq(mrb) mrb_intern_lit(mrb, "!=") -#define MRB_OPSYM__nmatch(mrb) mrb_intern_lit(mrb, "!~") -#define MRB_OPSYM__andand(mrb) mrb_intern_lit(mrb, "&&") -#define MRB_OPSYM__pow(mrb) mrb_intern_lit(mrb, "**") -#define MRB_OPSYM__plus(mrb) mrb_intern_lit(mrb, "+@") -#define MRB_OPSYM__minus(mrb) mrb_intern_lit(mrb, "-@") -#define MRB_OPSYM__lshift(mrb) mrb_intern_lit(mrb, "<<") -#define MRB_OPSYM__le(mrb) mrb_intern_lit(mrb, "<=") -#define MRB_OPSYM__eq(mrb) mrb_intern_lit(mrb, "==") -#define MRB_OPSYM__match(mrb) mrb_intern_lit(mrb, "=~") -#define MRB_OPSYM__ge(mrb) mrb_intern_lit(mrb, ">=") -#define MRB_OPSYM__rshift(mrb) mrb_intern_lit(mrb, ">>") -#define MRB_OPSYM__aref(mrb) mrb_intern_lit(mrb, "[]") -#define MRB_OPSYM__oror(mrb) mrb_intern_lit(mrb, "||") -#define MRB_OPSYM__cmp(mrb) mrb_intern_lit(mrb, "<=>") -#define MRB_OPSYM__eqq(mrb) mrb_intern_lit(mrb, "===") -#define MRB_OPSYM__aset(mrb) mrb_intern_lit(mrb, "[]=") - -#define MRB_PRESYM_DEFINE_VAR_AND_INITER(name, size, ...) \ - static mrb_sym name[size]; \ - static void presym_init_##name(mrb_state *mrb) { \ - mrb_sym name__[] = {__VA_ARGS__}; \ - memcpy(name, name__, sizeof(name)); \ - } - -#define MRB_PRESYM_INIT_SYMBOLS(mrb, name) presym_init_##name(mrb) - -#endif /* MRUBY_PRESYM_DISABLE_H */ diff --git a/include/mruby/presym/enable.h b/include/mruby/presym/enable.h deleted file mode 100644 index 8ca0c3cc64..0000000000 --- a/include/mruby/presym/enable.h +++ /dev/null @@ -1,37 +0,0 @@ -/** -** @file mruby/presym/enable.h - Enable Preallocated Symbols -** -** See Copyright Notice in mruby.h -*/ - -#ifndef MRUBY_PRESYM_ENABLE_H -#define MRUBY_PRESYM_ENABLE_H - -#include - -#define MRB_OPSYM(name) MRB_OPSYM__##name -#define MRB_CVSYM(name) MRB_CVSYM__##name -#define MRB_IVSYM(name) MRB_IVSYM__##name -#define MRB_SYM_B(name) MRB_SYM_B__##name -#define MRB_SYM_Q(name) MRB_SYM_Q__##name -#define MRB_SYM_E(name) MRB_SYM_E__##name -#define MRB_SYM(name) MRB_SYM__##name - -#define MRB_OPSYM_2(mrb, name) MRB_OPSYM__##name -#define MRB_CVSYM_2(mrb, name) MRB_CVSYM__##name -#define MRB_IVSYM_2(mrb, name) MRB_IVSYM__##name -#define MRB_SYM_B_2(mrb, name) MRB_SYM_B__##name -#define MRB_SYM_Q_2(mrb, name) MRB_SYM_Q__##name -#define MRB_SYM_E_2(mrb, name) MRB_SYM_E__##name -#define MRB_SYM_2(mrb, name) MRB_SYM__##name - -#define MRB_PRESYM_DEFINE_VAR_AND_INITER(name, size, ...) \ - static const mrb_sym name[] = {__VA_ARGS__}; - -#define MRB_PRESYM_INIT_SYMBOLS(mrb, name) (void)(mrb) - -/* use MRB_SYM() for E_RUNTIME_ERROR etc. */ -#undef MRB_ERROR_SYM -#define MRB_ERROR_SYM(sym) MRB_SYM(sym) - -#endif /* MRUBY_PRESYM_ENABLE_H */ diff --git a/include/mruby/presym/scanning.h b/include/mruby/presym/scanning.h index 20fe1c48ba..0359ebf81c 100644 --- a/include/mruby/presym/scanning.h +++ b/include/mruby/presym/scanning.h @@ -30,6 +30,7 @@ #define mrb_funcall(mrb, v, name, ...) MRB_PRESYM_SCANNING_TAGGED(name) (v) (__VA_ARGS__) #define MRB_OPSYM(name) MRB_OPSYM__##name(mrb) +#define MRB_GVSYM(name) MRB_PRESYM_SCANNING_TAGGED("$" #name) #define MRB_CVSYM(name) MRB_PRESYM_SCANNING_TAGGED("@@" #name) #define MRB_IVSYM(name) MRB_PRESYM_SCANNING_TAGGED("@" #name) #define MRB_SYM_B(name) MRB_PRESYM_SCANNING_TAGGED(#name "!") @@ -37,13 +38,15 @@ #define MRB_SYM_E(name) MRB_PRESYM_SCANNING_TAGGED(#name "=") #define MRB_SYM(name) MRB_PRESYM_SCANNING_TAGGED(#name) -#define MRB_OPSYM_2(mrb, name) MRB_OPSYM__##name(mrb) -#define MRB_CVSYM_2(mrb, name) MRB_PRESYM_SCANNING_TAGGED("@@" #name) -#define MRB_IVSYM_2(mrb, name) MRB_PRESYM_SCANNING_TAGGED("@" #name) -#define MRB_SYM_B_2(mrb, name) MRB_PRESYM_SCANNING_TAGGED(#name "!") -#define MRB_SYM_Q_2(mrb, name) MRB_PRESYM_SCANNING_TAGGED(#name "?") -#define MRB_SYM_E_2(mrb, name) MRB_PRESYM_SCANNING_TAGGED(#name "=") -#define MRB_SYM_2(mrb, name) MRB_PRESYM_SCANNING_TAGGED(#name) +/* backward compatibility: _2 variants accept but ignore mrb_state* */ +#define MRB_OPSYM_2(mrb, name) MRB_OPSYM(name) +#define MRB_GVSYM_2(mrb, name) MRB_GVSYM(name) +#define MRB_CVSYM_2(mrb, name) MRB_CVSYM(name) +#define MRB_IVSYM_2(mrb, name) MRB_IVSYM(name) +#define MRB_SYM_B_2(mrb, name) MRB_SYM_B(name) +#define MRB_SYM_Q_2(mrb, name) MRB_SYM_Q(name) +#define MRB_SYM_E_2(mrb, name) MRB_SYM_E(name) +#define MRB_SYM_2(mrb, name) MRB_SYM(name) #define MRB_OPSYM__not(mrb) MRB_PRESYM_SCANNING_TAGGED("!") #define MRB_OPSYM__mod(mrb) MRB_PRESYM_SCANNING_TAGGED("%") diff --git a/include/mruby/proc.h b/include/mruby/proc.h index d4e576f28d..01bc0f75f1 100644 --- a/include/mruby/proc.h +++ b/include/mruby/proc.h @@ -9,27 +9,38 @@ #include "common.h" #include +#include /** * Proc class */ MRB_BEGIN_DECL +/* + * env object (for internal used) + * + * - don't create multiple envs on one ci. + * - don't share a env to different ci. + * - don't attach a closed env to any ci. + */ struct REnv { MRB_OBJECT_HEADER; mrb_value *stack; - struct mrb_context *cxt; + struct mrb_context *cxt; /* if not null, it means that the stack is shared with the call frame */ mrb_sym mid; }; -/* flags (21bits): 1(close):1(touched):1(heap):8(cioff/bidx):8(stack_len) */ +/* flags (20bits): 1(ZERO):1(separate module):2(visibility):8(cioff/bidx):8(stack_len) */ #define MRB_ENV_SET_LEN(e,len) ((e)->flags = (((e)->flags & ~0xff)|((unsigned int)(len) & 0xff))) #define MRB_ENV_LEN(e) ((mrb_int)((e)->flags & 0xff)) -#define MRB_ENV_CLOSED (1<<20) -#define MRB_ENV_CLOSE(e) ((e)->flags |= MRB_ENV_CLOSED) -#define MRB_ENV_ONSTACK_P(e) (((e)->flags & MRB_ENV_CLOSED) == 0) +#define MRB_ENV_CLOSE(e) ((e)->cxt = NULL) +#define MRB_ENV_ONSTACK_P(e) ((e)->cxt != NULL) #define MRB_ENV_BIDX(e) (((e)->flags >> 8) & 0xff) #define MRB_ENV_SET_BIDX(e,idx) ((e)->flags = (((e)->flags & ~(0xff<<8))|((unsigned int)(idx) & 0xff)<<8)) +#define MRB_ENV_SET_VISIBILITY(e, vis) MRB_FLAGS_SET((e)->flags, 16, 2, vis) +#define MRB_ENV_VISIBILITY(e) MRB_FLAGS_GET((e)->flags, 16, 2) +#define MRB_ENV_VISIBILITY_BREAK_P(e) MRB_FLAG_CHECK((e)->flags, 18) +#define MRB_ENV_COPY_FLAGS_FROM_CI(e, ci) MRB_FLAGS_SET((e)->flags, 16, 3, (ci)->vis) /* * Returns TRUE on success. @@ -44,6 +55,7 @@ struct RProc { union { const mrb_irep *irep; mrb_func_t func; + mrb_sym mid; } body; const struct RProc *upper; union { @@ -60,6 +72,7 @@ struct RProc { #define MRB_ASPEC_KEY(a) (((a) >> 2) & 0x1f) #define MRB_ASPEC_KDICT(a) (((a) >> 1) & 0x1) #define MRB_ASPEC_BLOCK(a) ((a) & 1) +#define MRB_ASPEC_NOBLOCK(a) (((a) >> 23) & 0x1) #define MRB_PROC_CFUNC_FL 128 #define MRB_PROC_CFUNC_P(p) (((p)->flags & MRB_PROC_CFUNC_FL) != 0) @@ -84,8 +97,64 @@ struct RProc { } while (0) #define MRB_PROC_SCOPE 2048 #define MRB_PROC_SCOPE_P(p) (((p)->flags & MRB_PROC_SCOPE) != 0) -#define MRB_PROC_NOARG 4096 /* for MRB_PROC_CFUNC_FL, it would be something like MRB_ARGS_NONE() or MRB_METHOD_NOARG_FL */ +#define MRB_PROC_NOARG 4096 /* for MRB_PROC_CFUNC_FL, aspec == MRB_ARGS_NONE() */ #define MRB_PROC_NOARG_P(p) (((p)->flags & MRB_PROC_NOARG) != 0) +#define MRB_PROC_ALIAS 8192 +#define MRB_PROC_ALIAS_P(p) (((p)->flags & MRB_PROC_ALIAS) != 0) + +/* Compressed aspec for cfunc procs (13 bits in RProc.flags). + * Uses free bits 0-6 and 14-19 to store a compressed argument spec. + * Layout: block(0) kdict(1) key(2-3) post(4-5) rest(6) opt(14-16) req(17-19) + * Field widths are smaller than the full 24-bit aspec: req/opt max 7, post/key max 3. + * Values exceeding the compressed range are clamped and rest is forced to 1. */ +#define MRB_PROC_CASPEC_MASK 0xfc07fu /* bits 0-6 and 14-19 */ + +static inline uint32_t +mrb_proc_compress_aspec(mrb_aspec aspec) +{ + uint32_t req = MRB_ASPEC_REQ(aspec); + uint32_t opt = MRB_ASPEC_OPT(aspec); + uint32_t rest = MRB_ASPEC_REST(aspec); + uint32_t post = MRB_ASPEC_POST(aspec); + uint32_t key = MRB_ASPEC_KEY(aspec); + uint32_t kdict = MRB_ASPEC_KDICT(aspec); + uint32_t block = MRB_ASPEC_BLOCK(aspec); + + if (req > 7 || opt > 7 || post > 3 || key > 3) { + if (req > 7) req = 7; + if (opt > 7) opt = 7; + if (post > 3) post = 3; + if (key > 3) key = 3; + rest = 1; + } + + return block | (kdict << 1) | (key << 2) | (post << 4) | (rest << 6) + | (opt << 14) | (req << 17); +} + +static inline mrb_aspec +mrb_proc_decompress_caspec(uint32_t flags) +{ + return (((flags >> 17) & 0x7) << 18) /* req */ + | (((flags >> 14) & 0x7) << 13) /* opt */ + | (((flags >> 6) & 0x1) << 12) /* rest */ + | (((flags >> 4) & 0x3) << 7) /* post */ + | (((flags >> 2) & 0x3) << 2) /* key */ + | (((flags >> 1) & 0x1) << 1) /* kdict */ + | (flags & 0x1); /* block */ +} + +static inline void +mrb_proc_set_cfunc_aspec(struct RProc *p, mrb_aspec aspec) +{ + p->flags &= ~(MRB_PROC_NOARG | MRB_PROC_CASPEC_MASK); + if (aspec == 0) { + p->flags |= MRB_PROC_NOARG; + } + else { + p->flags |= mrb_proc_compress_aspec(aspec); + } +} #define mrb_proc_ptr(v) ((struct RProc*)(mrb_ptr(v))) @@ -99,43 +168,65 @@ MRB_API mrb_value mrb_proc_cfunc_env_get(mrb_state *mrb, mrb_int idx); /* old name */ #define mrb_cfunc_env_get(mrb, idx) mrb_proc_cfunc_env_get(mrb, idx) -#define MRB_METHOD_FUNC_FL 1 -#define MRB_METHOD_NOARG_FL 2 - -#ifndef MRB_USE_METHOD_T_STRUCT - -#define MRB_METHOD_FUNC_P(m) (((uintptr_t)(m))&MRB_METHOD_FUNC_FL) -#define MRB_METHOD_NOARG_P(m) ((((uintptr_t)(m))&MRB_METHOD_NOARG_FL)?1:0) -#define MRB_METHOD_NOARG_SET(m) ((m)=(mrb_method_t)(((uintptr_t)(m))|MRB_METHOD_NOARG_FL)) -#define MRB_METHOD_FUNC(m) ((mrb_func_t)((uintptr_t)(m)>>2)) -#define MRB_METHOD_FROM_FUNC(m,fn) ((m)=(mrb_method_t)((((uintptr_t)(fn))<<2)|MRB_METHOD_FUNC_FL)) -#define MRB_METHOD_FROM_PROC(m,pr) ((m)=(mrb_method_t)(pr)) -#define MRB_METHOD_PROC_P(m) (!MRB_METHOD_FUNC_P(m)) -#define MRB_METHOD_PROC(m) ((struct RProc*)(m)) -#define MRB_METHOD_UNDEF_P(m) ((m)==0) - -#else +#define MRB_METHOD_FUNC_FL (1 << 24) +#define MRB_METHOD_PUBLIC_FL 0 +#define MRB_METHOD_PRIVATE_FL (1 << 25) +#define MRB_METHOD_PROTECTED_FL (1 << 26) +#define MRB_METHOD_VDEFAULT_FL ((1 << 25) | (1 << 26)) +#define MRB_METHOD_VISIBILITY_MASK ((1 << 25) | (1 << 26)) #define MRB_METHOD_FUNC_P(m) ((m).flags&MRB_METHOD_FUNC_FL) -#define MRB_METHOD_NOARG_P(m) (((m).flags&MRB_METHOD_NOARG_FL)?1:0) -#define MRB_METHOD_FUNC(m) ((m).func) -#define MRB_METHOD_NOARG_SET(m) do{(m).flags|=MRB_METHOD_NOARG_FL;}while(0) -#define MRB_METHOD_FROM_FUNC(m,fn) do{(m).flags=MRB_METHOD_FUNC_FL;(m).func=(fn);}while(0) -#define MRB_METHOD_FROM_PROC(m,pr) do{(m).flags=0;(m).proc=(struct RProc*)(pr);}while(0) +#define MRB_METHOD_FUNC(m) ((m).as.func) +#define MRB_METHOD_FROM_FUNC(m,fn) do{(m).flags=MRB_METHOD_FUNC_FL;(m).as.func=(fn);}while(0) +#define MRB_METHOD_FROM_PROC(m,pr) do{(m).flags=0;(m).as.proc=(pr);}while(0) #define MRB_METHOD_PROC_P(m) (!MRB_METHOD_FUNC_P(m)) -#define MRB_METHOD_PROC(m) ((m).proc) -#define MRB_METHOD_UNDEF_P(m) ((m).proc==NULL) +#define MRB_METHOD_PROC(m) ((m).as.proc) +#define MRB_METHOD_UNDEF_P(m) ((m).as.proc==NULL) +#define MRB_METHOD_VISIBILITY(m) ((m).flags & MRB_METHOD_VISIBILITY_MASK) +#define MRB_SET_VISIBILITY_FLAGS(f,v) ((f)=(((f)&~MRB_METHOD_VISIBILITY_MASK)|(v))) +#define MRB_METHOD_SET_VISIBILITY(m,v) MRB_SET_VISIBILITY_FLAGS((m).flags,(v)) -#endif /* MRB_USE_METHOD_T_STRUCT */ - -#define MRB_METHOD_CFUNC_P(m) (MRB_METHOD_FUNC_P(m)?TRUE:(MRB_METHOD_PROC(m)?(MRB_PROC_CFUNC_P(MRB_METHOD_PROC(m))):FALSE)) -#define MRB_METHOD_CFUNC(m) (MRB_METHOD_FUNC_P(m)?MRB_METHOD_FUNC(m):((MRB_METHOD_PROC(m)&&MRB_PROC_CFUNC_P(MRB_METHOD_PROC(m)))?MRB_PROC_CFUNC(MRB_METHOD_PROC(m)):NULL)) - - -#include +#define MRB_METHOD_CFUNC_P(m) (MRB_METHOD_FUNC_P(m) || (MRB_METHOD_PROC(m)?(MRB_PROC_CFUNC_P(MRB_METHOD_PROC(m))):FALSE)) +/* use MRB_METHOD_CFUNC(m) only when MRB_METHOD_CFUNC_P(m) is true */ +#define MRB_METHOD_CFUNC(m) (MRB_METHOD_FUNC_P(m)?MRB_METHOD_FUNC(m):MRB_PROC_CFUNC(MRB_METHOD_PROC(m))) MRB_API mrb_value mrb_load_proc(mrb_state *mrb, const struct RProc *proc); +/** + * It can be used to isolate top-level scopes referenced by blocks generated by + * `mrb_load_string_cxt()` or similar called before entering the mruby VM (e.g. from `main()`). + * In that case, the `ci` parameter should be `mrb->c->cibase`. + * + * #include + * #include + * #include + * + * int + * main(int argc, char **argv) + * { + * mrb_state *mrb; + * mrb_ccontext *cxt; + * mrb_value blk, ret; + * + * mrb = mrb_open(); + * cxt = mrb_ccontext_new(mrb); + * blk = mrb_load_string_cxt(mrb, "x, y, z = 1, 2, 3; proc { [x, y, z] }", cxt); + * mrb_vm_ci_env_clear(mrb, mrb->c->cibase); + * mrb_load_string_cxt(mrb, "x, y, z = 4, 5, 6", cxt); + * ret = mrb_funcall(mrb, blk, "call", 0); + * mrb_p(mrb, ret); // => [1, 2, 3] + * // => [4, 5, 6] if `mrb_vm_ci_env_clear()` is commented out + * mrb_ccontext_free(mrb, cxt); + * mrb_close(mrb); + * + * return 0; + * } + * + * The top-level local variable names stored in `mrb_ccontext` are retained. + * Use also `mrb_ccontext_cleanup_local_variables()` at the same time, if necessary. + */ +MRB_API void mrb_vm_ci_env_clear(mrb_state *mrb, mrb_callinfo *ci); + void mrb_vm_ci_proc_set(mrb_callinfo *ci, const struct RProc *p); struct RClass * mrb_vm_ci_target_class(const mrb_callinfo *ci); void mrb_vm_ci_target_class_set(mrb_callinfo *ci, struct RClass *tc); diff --git a/include/mruby/string.h b/include/mruby/string.h index cd300f8bd1..22683b89a2 100644 --- a/include/mruby/string.h +++ b/include/mruby/string.h @@ -38,12 +38,25 @@ struct RStringEmbed { char ary[RSTRING_EMBED_LEN_MAX+1]; }; -#define RSTR_SET_TYPE_FLAG(s, type) (RSTR_UNSET_TYPE_FLAG(s), (s)->flags |= MRB_STR_##type) -#define RSTR_UNSET_TYPE_FLAG(s) ((s)->flags &= ~(MRB_STR_TYPE_MASK|MRB_STR_EMBED_LEN_MASK)) +#define RSTR_SET_TYPE(s, type) ((s)->flags = ((s)->flags & ~(MRB_STR_TYPE_MASK|MRB_STR_EMBED_LEN_MASK)) | MRB_STR_##type) + +#define MRB_STR_NORMAL 0 +#define MRB_STR_SHARED 1 +#define MRB_STR_FSHARED 2 +#define MRB_STR_NOFREE 4 +#define MRB_STR_EMBED 8 +#define MRB_STR_TYPE_MASK 15 + +#define MRB_STR_EMBED_LEN_SHIFT 6 +#define MRB_STR_EMBED_LEN_BITS 5 +#define MRB_STR_EMBED_LEN_MASK (((1 << MRB_STR_EMBED_LEN_BITS) - 1) << MRB_STR_EMBED_LEN_SHIFT) + +#define MRB_STR_BINARY 16 +#define MRB_STR_SINGLE_BYTE 32 +#define MRB_STR_STATE_MASK 48 #define RSTR_EMBED_P(s) ((s)->flags & MRB_STR_EMBED) #define RSTR_SET_EMBED_FLAG(s) ((s)->flags |= MRB_STR_EMBED) -#define RSTR_UNSET_EMBED_FLAG(s) ((s)->flags &= ~(MRB_STR_EMBED|MRB_STR_EMBED_LEN_MASK)) #define RSTR_SET_EMBED_LEN(s, n) do {\ size_t tmp_n = (n);\ (s)->flags &= ~MRB_STR_EMBED_LEN_MASK;\ @@ -67,30 +80,24 @@ struct RStringEmbed { #define RSTR_CAPA(s) (RSTR_EMBED_P(s) ? RSTRING_EMBED_LEN_MAX : (s)->as.heap.aux.capa) #define RSTR_SHARED_P(s) ((s)->flags & MRB_STR_SHARED) -#define RSTR_SET_SHARED_FLAG(s) ((s)->flags |= MRB_STR_SHARED) -#define RSTR_UNSET_SHARED_FLAG(s) ((s)->flags &= ~MRB_STR_SHARED) - #define RSTR_FSHARED_P(s) ((s)->flags & MRB_STR_FSHARED) -#define RSTR_SET_FSHARED_FLAG(s) ((s)->flags |= MRB_STR_FSHARED) -#define RSTR_UNSET_FSHARED_FLAG(s) ((s)->flags &= ~MRB_STR_FSHARED) - #define RSTR_NOFREE_P(s) ((s)->flags & MRB_STR_NOFREE) -#define RSTR_SET_NOFREE_FLAG(s) ((s)->flags |= MRB_STR_NOFREE) -#define RSTR_UNSET_NOFREE_FLAG(s) ((s)->flags &= ~MRB_STR_NOFREE) #ifdef MRB_UTF8_STRING -# define RSTR_ASCII_P(s) ((s)->flags & MRB_STR_ASCII) -# define RSTR_SET_ASCII_FLAG(s) ((s)->flags |= MRB_STR_ASCII) -# define RSTR_UNSET_ASCII_FLAG(s) ((s)->flags &= ~MRB_STR_ASCII) -# define RSTR_WRITE_ASCII_FLAG(s, v) (RSTR_UNSET_ASCII_FLAG(s), (s)->flags |= v) -# define RSTR_COPY_ASCII_FLAG(dst, src) RSTR_WRITE_ASCII_FLAG(dst, RSTR_ASCII_P(src)) +# define RSTR_SINGLE_BYTE_P(s) ((s)->flags & MRB_STR_SINGLE_BYTE) +# define RSTR_SET_SINGLE_BYTE_FLAG(s) ((s)->flags |= MRB_STR_SINGLE_BYTE) +# define RSTR_UNSET_SINGLE_BYTE_FLAG(s) ((s)->flags &= ~MRB_STR_SINGLE_BYTE) +# define RSTR_WRITE_SINGLE_BYTE_FLAG(s, v) (RSTR_UNSET_SINGLE_BYTE_FLAG(s), (s)->flags |= v) +# define RSTR_COPY_SINGLE_BYTE_FLAG(dst, src) RSTR_WRITE_SINGLE_BYTE_FLAG(dst, RSTR_SINGLE_BYTE_P(src)) #else -# define RSTR_ASCII_P(s) (void)0 -# define RSTR_SET_ASCII_FLAG(s) (void)0 -# define RSTR_UNSET_ASCII_FLAG(s) (void)0 -# define RSTR_WRITE_ASCII_FLAG(s, v) (void)0 -# define RSTR_COPY_ASCII_FLAG(dst, src) (void)0 +# define RSTR_SINGLE_BYTE_P(s) TRUE +# define RSTR_SET_SINGLE_BYTE_FLAG(s) (void)0 +# define RSTR_UNSET_SINGLE_BYTE_FLAG(s) (void)0 +# define RSTR_WRITE_SINGLE_BYTE_FLAG(s, v) (void)0 +# define RSTR_COPY_SINGLE_BYTE_FLAG(dst, src) (void)0 #endif +#define RSTR_SET_ASCII_FLAG(s) RSTR_SET_SINGLE_BYTE_FLAG(s) +#define RSTR_BINARY_P(s) ((s)->flags & MRB_STR_BINARY) /** * Returns a pointer from a Ruby string @@ -104,16 +111,6 @@ struct RStringEmbed { #define RSTRING_END(s) (RSTRING_PTR(s) + RSTRING_LEN(s)) #define RSTRING_CSTR(mrb,s) mrb_string_cstr(mrb, s) -#define MRB_STR_SHARED 1 -#define MRB_STR_FSHARED 2 -#define MRB_STR_NOFREE 4 -#define MRB_STR_EMBED 8 /* type flags up to here */ -#define MRB_STR_ASCII 16 -#define MRB_STR_EMBED_LEN_SHIFT 6 -#define MRB_STR_EMBED_LEN_BIT 5 -#define MRB_STR_EMBED_LEN_MASK (((1 << MRB_STR_EMBED_LEN_BIT) - 1) << MRB_STR_EMBED_LEN_SHIFT) -#define MRB_STR_TYPE_MASK 15 - MRB_API void mrb_str_modify(mrb_state *mrb, struct RString *s); /* mrb_str_modify() with keeping ASCII flag if set */ MRB_API void mrb_str_modify_keep_ascii(mrb_state *mrb, struct RString *s); @@ -345,6 +342,16 @@ MRB_API const char *mrb_string_value_cstr(mrb_state *mrb, mrb_value *str); */ MRB_API mrb_value mrb_str_dup(mrb_state *mrb, mrb_value str); +/** + * Returns a frozen string object. + * The string will be duplicated and frozen if it is not already frozen. + * + * @param mrb The current mruby state. + * @param str An original Ruby string. + * @return [mrb_value] Ruby frozen string. + */ +MRB_API mrb_value mrb_str_dup_frozen(mrb_state *mrb, mrb_value str); + /** * Returns a symbol from a passed in Ruby string. * diff --git a/include/mruby/throw.h b/include/mruby/throw.h index 52171e9b0e..a56bf00796 100644 --- a/include/mruby/throw.h +++ b/include/mruby/throw.h @@ -2,15 +2,37 @@ ** @file mruby/throw.h - mruby exception throwing handler ** ** See Copyright Notice in mruby.h +** +** WARNING: This header is for mruby core internal use only. +** Do not include this header in user code or mrbgems. +** +** When MRB_USE_CXX_EXCEPTION is defined, this header requires C++ +** compilation. C source files that include this header will fail +** to compile when linked into C++ projects using MRB_USE_CXX_EXCEPTION. +** +** For exception-safe code in mrbgems and user code, use the +** mrb_protect_error() API from instead: +** +** #include +** +** mrb_value my_func_body(mrb_state *mrb, void *data) { +** // code that may raise exceptions +** return result; +** } +** +** void my_func(mrb_state *mrb) { +** mrb_bool error; +** mrb_value result = mrb_protect_error(mrb, my_func_body, data, &error); +** // cleanup code runs here regardless of exception +** if (error) mrb_exc_raise(mrb, result); +** } */ #ifndef MRB_THROW_H #define MRB_THROW_H -#if defined(MRB_USE_CXX_ABI) -# if !defined(__cplusplus) +#if defined(MRB_USE_CXX_ABI) && !defined(__cplusplus) # error Trying to use C++ exception handling in C code -# endif #endif #if defined(MRB_USE_CXX_EXCEPTION) @@ -18,11 +40,11 @@ # if defined(__cplusplus) #define MRB_TRY(buf) try { -#define MRB_CATCH(buf) } catch(mrb_jmpbuf_impl e) { if (e != (buf)->impl) { throw e; } +#define MRB_CATCH(buf) } catch(mrb_jmpbuf *e) { if (e != (buf)) { throw e; } #define MRB_END_EXC(buf) } -#define MRB_THROW(buf) throw((buf)->impl) -typedef mrb_int mrb_jmpbuf_impl; +#define MRB_THROW(buf) throw(buf) +typedef void *mrb_jmpbuf_impl; # else # error "need to be compiled with C++ compiler" @@ -35,7 +57,7 @@ typedef mrb_int mrb_jmpbuf_impl; #if defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) #define MRB_SETJMP _setjmp #define MRB_LONGJMP _longjmp -#elif defined(__MINGW64__) && defined(__GNUC__) && __GNUC__ >= 4 +#elif defined(__MINGW64__) && !defined(_M_ARM64) && defined(__GNUC__) && __GNUC__ >= 4 #define MRB_SETJMP __builtin_setjmp #define MRB_LONGJMP __builtin_longjmp #else @@ -54,13 +76,6 @@ typedef mrb_int mrb_jmpbuf_impl; struct mrb_jmpbuf { mrb_jmpbuf_impl impl; - -#if defined(MRB_USE_CXX_EXCEPTION) - static mrb_int jmpbuf_id; -# if defined(__cplusplus) - mrb_jmpbuf() : impl(jmpbuf_id++) {} -# endif -#endif }; #endif /* MRB_THROW_H */ diff --git a/include/mruby/value.h b/include/mruby/value.h index 846452ce68..d6cd0a08a2 100644 --- a/include/mruby/value.h +++ b/include/mruby/value.h @@ -10,7 +10,7 @@ #include "common.h" /* - * MRuby Value definition functions and macros. + * mruby Value definition functions and macros. */ MRB_BEGIN_DECL @@ -55,8 +55,6 @@ typedef uint8_t mrb_bool; # endif #endif -struct mrb_state; - #if defined _MSC_VER && _MSC_VER < 1800 # define PRIo64 "llo" # define PRId64 "lld" @@ -94,15 +92,22 @@ struct mrb_state; # define MRB_PRIx PRIx32 #endif -#ifdef MRB_ENDIAN_BIG -# define MRB_ENDIAN_LOHI(a,b) a b -#else -# define MRB_ENDIAN_LOHI(a,b) b a -#endif +#define MRB_FLAGS_MASK(shift, width) (~(~0U << (width)) << (shift)) +#define MRB_FLAGS_GET(b, s, w) (((b) >> (s)) & MRB_FLAGS_MASK(0, w)) +#define MRB_FLAGS_SET(b, s, w, n) ((b) = MRB_FLAGS_ZERO(b, s, w) | MRB_FLAGS_MAKE(s, w, n)) +#define MRB_FLAGS_ZERO(b, s, w) ((b) & ~MRB_FLAGS_MASK(s, w)) +#define MRB_FLAGS_MAKE(s, w, n) (((n) & MRB_FLAGS_MASK(0, w)) << (s)) +#define MRB_FLAG_ON(b, s) ((b) |= MRB_FLAGS_MASK(s, 1)) +#define MRB_FLAG_OFF(b, s) ((b) &= ~MRB_FLAGS_MASK(s, 1)) +#define MRB_FLAG_CHECK(b, s) (!!((b) & MRB_FLAGS_MASK(s, 1))) -MRB_API mrb_int mrb_int_read(const char *p, const char *e, char **endp); +MRB_API mrb_bool mrb_read_int(const char *p, const char *e, char **endp, mrb_int *np); +/* obsolete; do not use mrb_int_read() */ +MRB_API mrb_int mrb_int_read(const char*, const char*, char**); #ifndef MRB_NO_FLOAT -MRB_API double mrb_float_read(const char*, char**); +MRB_API mrb_bool mrb_read_float(const char *p, char **endp, double *fp); +/* obsolete; do not use mrb_float_read() */ +MRB_API double mrb_float_read(const char *p, char **endp); #ifdef MRB_USE_FLOAT32 typedef float mrb_float; #else @@ -121,13 +126,13 @@ MRB_API int mrb_msvc_snprintf(char *s, size_t n, const char *format, ...); # define isinf(n) (!_finite(n) && !_isnan(n)) # define signbit(n) (_copysign(1.0, (n)) < 0.0) static const unsigned int IEEE754_INFINITY_BITS_SINGLE = 0x7F800000; -# define INFINITY (*(float *)&IEEE754_INFINITY_BITS_SINGLE) +# define INFINITY (*(float*)&IEEE754_INFINITY_BITS_SINGLE) # define NAN ((float)(INFINITY - INFINITY)) # endif #endif #define MRB_VTYPE_FOREACH(f) \ - /* mrb_vtype */ /* c type */ /* ruby class */ \ + /* mrb_vtype */ /* C type */ /* Ruby class */ \ f(MRB_TT_FALSE, void, "false") \ f(MRB_TT_TRUE, void, "true") \ f(MRB_TT_SYMBOL, void, "Symbol") \ @@ -139,23 +144,25 @@ static const unsigned int IEEE754_INFINITY_BITS_SINGLE = 0x7F800000; f(MRB_TT_OBJECT, struct RObject, "Object") \ f(MRB_TT_CLASS, struct RClass, "Class") \ f(MRB_TT_MODULE, struct RClass, "Module") \ - f(MRB_TT_ICLASS, struct RClass, "iClass") \ f(MRB_TT_SCLASS, struct RClass, "SClass") \ + f(MRB_TT_HASH, struct RHash, "Hash") \ + f(MRB_TT_CDATA, struct RData, "C data") \ + f(MRB_TT_EXCEPTION, struct RException, "Exception") \ + f(MRB_TT_ICLASS, struct RClass, "iClass") \ f(MRB_TT_PROC, struct RProc, "Proc") \ f(MRB_TT_ARRAY, struct RArray, "Array") \ - f(MRB_TT_HASH, struct RHash, "Hash") \ f(MRB_TT_STRING, struct RString, "String") \ f(MRB_TT_RANGE, struct RRange, "Range") \ - f(MRB_TT_EXCEPTION, struct RException, "Exception") \ f(MRB_TT_ENV, struct REnv, "env") \ - f(MRB_TT_DATA, struct RData, "Data") \ f(MRB_TT_FIBER, struct RFiber, "Fiber") \ f(MRB_TT_STRUCT, struct RArray, "Struct") \ f(MRB_TT_ISTRUCT, struct RIStruct, "istruct") \ f(MRB_TT_BREAK, struct RBreak, "break") \ f(MRB_TT_COMPLEX, struct RComplex, "Complex") \ f(MRB_TT_RATIONAL, struct RRational, "Rational") \ - f(MRB_TT_BIGINT, struct RBigint, "Integer") + f(MRB_TT_BIGINT, struct RBigint, "Integer") \ + f(MRB_TT_BACKTRACE, struct RBacktrace, "backtrace") \ + f(MRB_TT_SET, struct RSet, "Set") enum mrb_vtype { #define MRB_VTYPE_DEFINE(tt, type, name) tt, @@ -164,6 +171,9 @@ enum mrb_vtype { MRB_TT_MAXDEFINE }; +/* obsolete name for MRB_TT_CDATA */ +#define MRB_TT_DATA MRB_TT_CDATA + #define MRB_VTYPE_TYPEOF(tt) MRB_TYPEOF_##tt #define MRB_VTYPE_TYPEDEF(tt, type, name) typedef type MRB_VTYPE_TYPEOF(tt); @@ -179,7 +189,7 @@ MRB_VTYPE_FOREACH(MRB_VTYPE_TYPEDEF) /** * @abstract - * MRuby value boxing. + * mruby value boxing. * * Actual implementation depends on configured boxing type. * @@ -238,9 +248,11 @@ struct RCptr { #ifndef mrb_true_p #define mrb_true_p(o) (mrb_type(o) == MRB_TT_TRUE) #endif -#ifndef MRB_NO_FLOAT #ifndef mrb_float_p +#ifndef MRB_NO_FLOAT #define mrb_float_p(o) (mrb_type(o) == MRB_TT_FLOAT) +#else +#define mrb_float_p(o) FALSE #endif #endif #ifndef mrb_array_p @@ -286,7 +298,7 @@ struct RCptr { #define mrb_env_p(o) (mrb_type(o) == MRB_TT_ENV) #endif #ifndef mrb_data_p -#define mrb_data_p(o) (mrb_type(o) == MRB_TT_DATA) +#define mrb_data_p(o) (mrb_type(o) == MRB_TT_CDATA) #endif #ifndef mrb_fiber_p #define mrb_fiber_p(o) (mrb_type(o) == MRB_TT_FIBER) @@ -302,7 +314,11 @@ struct RCptr { #endif #define mrb_test(o) mrb_bool(o) #ifndef mrb_bigint_p +#ifdef MRB_USE_BIGINT #define mrb_bigint_p(o) (mrb_type(o) == MRB_TT_BIGINT) +#else +#define mrb_bigint_p(o) FALSE +#endif #endif /** @@ -311,7 +327,8 @@ struct RCptr { * Takes a float and boxes it into an mrb_value */ #ifndef MRB_NO_FLOAT -MRB_INLINE mrb_value mrb_float_value(struct mrb_state *mrb, mrb_float f) +MRB_INLINE mrb_value +mrb_float_value(mrb_state *mrb, mrb_float f) { mrb_value v; (void) mrb; @@ -321,7 +338,7 @@ MRB_INLINE mrb_value mrb_float_value(struct mrb_state *mrb, mrb_float f) #endif MRB_INLINE mrb_value -mrb_cptr_value(struct mrb_state *mrb, void *p) +mrb_cptr_value(mrb_state *mrb, void *p) { mrb_value v; (void) mrb; @@ -332,14 +349,16 @@ mrb_cptr_value(struct mrb_state *mrb, void *p) /** * Returns an integer in Ruby. */ -MRB_INLINE mrb_value mrb_int_value(struct mrb_state *mrb, mrb_int i) +MRB_INLINE mrb_value +mrb_int_value(mrb_state *mrb, mrb_int i) { mrb_value v; SET_INT_VALUE(mrb, v, i); return v; } -MRB_INLINE mrb_value mrb_fixnum_value(mrb_int i) +MRB_INLINE mrb_value +mrb_fixnum_value(mrb_int i) { mrb_value v; SET_FIXNUM_VALUE(v, i); @@ -368,7 +387,8 @@ mrb_obj_value(void *p) * @return * nil mrb_value object reference. */ -MRB_INLINE mrb_value mrb_nil_value(void) +MRB_INLINE mrb_value +mrb_nil_value(void) { mrb_value v; SET_NIL_VALUE(v); @@ -378,7 +398,8 @@ MRB_INLINE mrb_value mrb_nil_value(void) /** * Returns false in Ruby. */ -MRB_INLINE mrb_value mrb_false_value(void) +MRB_INLINE mrb_value +mrb_false_value(void) { mrb_value v; SET_FALSE_VALUE(v); @@ -388,7 +409,8 @@ MRB_INLINE mrb_value mrb_false_value(void) /** * Returns true in Ruby. */ -MRB_INLINE mrb_value mrb_true_value(void) +MRB_INLINE mrb_value +mrb_true_value(void) { mrb_value v; SET_TRUE_VALUE(v); @@ -426,10 +448,19 @@ mrb_ro_data_p(const char *p) #elif defined(__APPLE__) #define MRB_LINK_TIME_RO_DATA_P #include +#include // for _NSGetMachExecuteHeader static inline mrb_bool mrb_ro_data_p(const char *p) { - return (char*)get_etext() < p && p < (char*)get_edata(); +#ifdef __LP64__ + struct mach_header_64 *mhp; +#else + struct mach_header *mhp; +#endif + mhp = _NSGetMachExecuteHeader(); + unsigned long textsize; + char *text = (char*)getsegmentdata(mhp, SEG_TEXT, &textsize); + return text <= p && p < text + textsize; } #endif /* Linux or macOS */ #endif /* MRB_NO_DEFAULT_RO_DATA_P */ diff --git a/include/mruby/variable.h b/include/mruby/variable.h index 0a7b7b42e8..2af62f048c 100644 --- a/include/mruby/variable.h +++ b/include/mruby/variable.h @@ -7,6 +7,12 @@ #ifndef MRUBY_VARIABLE_H #define MRUBY_VARIABLE_H +#if defined(__GNUC__) || defined(__clang__) +#define MRB_MEM_PREFETCH(addr) __builtin_prefetch(addr, 0, 1) +#else +#define MRB_MEM_PREFETCH(addr) +#endif + #include "common.h" /** diff --git a/include/mruby/version.h b/include/mruby/version.h index cf713da2bc..cdb5efe26a 100644 --- a/include/mruby/version.h +++ b/include/mruby/version.h @@ -27,7 +27,7 @@ MRB_BEGIN_DECL /* * The version of Ruby used by mruby. */ -#define MRUBY_RUBY_VERSION "3.1" +#define MRUBY_RUBY_VERSION "4.0" /* * Ruby engine. @@ -37,12 +37,12 @@ MRB_BEGIN_DECL /* * Major release version number. */ -#define MRUBY_RELEASE_MAJOR 3 +#define MRUBY_RELEASE_MAJOR 4 /* * Minor release version number. */ -#define MRUBY_RELEASE_MINOR 1 +#define MRUBY_RELEASE_MINOR 0 /* * Tiny release version number. @@ -80,17 +80,17 @@ MRB_BEGIN_DECL /* * Release year. */ -#define MRUBY_RELEASE_YEAR 2022 +#define MRUBY_RELEASE_YEAR 2026 /* * Release month. */ -#define MRUBY_RELEASE_MONTH 5 +#define MRUBY_RELEASE_MONTH 4 /* * Release day. */ -#define MRUBY_RELEASE_DAY 12 +#define MRUBY_RELEASE_DAY 20 /* * Release date as a string. @@ -117,7 +117,7 @@ MRB_BEGIN_DECL #define MRUBY_BIRTH_YEAR 2010 /* - * MRuby's authors. + * mruby's authors. */ #define MRUBY_AUTHOR "mruby developers" diff --git a/lib/mruby/amalgam.rb b/lib/mruby/amalgam.rb new file mode 100644 index 0000000000..0aec03c75a --- /dev/null +++ b/lib/mruby/amalgam.rb @@ -0,0 +1,567 @@ +module MRuby + class Amalgam + # Top-level headers to include (internal dependencies are inlined recursively) + # mruby.h is the main header - it includes value.h, gc.h, version.h with proper macro order + # Note: internal.h is NOT included here - it goes in mruby.c + HEADER_ORDER = %w[ + mruby.h + mruby/array.h + mruby/string.h + mruby/hash.h + mruby/class.h + mruby/proc.h + mruby/range.h + mruby/variable.h + mruby/numeric.h + mruby/error.h + mruby/data.h + mruby/istruct.h + mruby/mempool.h + mruby/debug.h + mruby/dump.h + mruby/irep.h + mruby/opcode.h + mruby/re.h + mruby/throw.h + mruby/khash.h + mruby/endian.h + mruby/presym.h + mruby/compile.h + ].freeze + + # Boxing headers are conditionally included + BOXING_HEADERS = %w[ + mruby/boxing_no.h + mruby/boxing_word.h + mruby/boxing_nan.h + ].freeze + + # Core sources in recommended order + CORE_SOURCE_ORDER = %w[ + allocf.c + readnum.c + readint.c + fp_uscale.c + state.c + symbol.c + class.c + object.c + gc.c + mempool.c + variable.c + array.c + hash.c + string.c + range.c + numeric.c + numops.c + proc.c + kernel.c + enum.c + error.c + backtrace.c + vm.c + load.c + dump.c + cdump.c + codedump.c + print.c + debug.c + etc.c + version.c + init.c + ].freeze + + def initialize(build) + @build = build + @processed_guards = {} + @processed_headers = [] # Track header paths for include transformation + # Pre-collect gem header names for source include transformation + @gem_header_names = collect_gem_header_names + end + + def collect_gem_header_names + names = [] + library_gems.each do |gem| + gem_include = "#{gem.dir}/include" + next unless File.directory?(gem_include) + Dir.glob("#{gem_include}/**/*.h").each do |path| + rel_path = path.sub("#{gem_include}/", "") + names << rel_path + # Also track basename for simple includes like "io_hal.h" + names << File.basename(rel_path) + end + end + names.uniq + end + + def generate_header(output_path) + FileUtils.mkdir_p(File.dirname(output_path)) + _pp "GEN", output_path.relative_path + + File.open(output_path, "w:binary") do |f| + write_header_preamble(f) + write_ordered_headers(f) + # Boxing headers are inlined at their include point in value.h + # Presym headers are inlined via presym.h -> enable.h -> id.h + write_gem_headers(f) + write_header_postamble(f) + end + end + + def generate_source(output_path) + FileUtils.mkdir_p(File.dirname(output_path)) + _pp "GEN", output_path.relative_path + + File.open(output_path, "w:binary") do |f| + write_source_preamble(f) + write_internal_headers(f) + write_core_sources(f) + write_generated_sources(f) + write_gem_sources(f) + end + end + + private + + def include_dir + "#{MRUBY_ROOT}/include" + end + + def build_include_dir + "#{@build.build_dir}/include" + end + + def src_dir + "#{MRUBY_ROOT}/src" + end + + # Filter out binary gems (they have main() functions) + def library_gems + @build.gems.reject { |gem| gem.name.start_with?("mruby-bin-") } + end + + # ========== Header Generation ========== + + def write_header_preamble(f) + f.puts <<~PREAMBLE + /* + ** mruby amalgamated header + ** Generated from mruby source files + ** + ** This file is auto-generated. Do not edit directly. + */ + + #ifndef MRUBY_AMALGAM_H + #define MRUBY_AMALGAM_H + + #ifdef __cplusplus + #define __STDC_LIMIT_MACROS + #define __STDC_CONSTANT_MACROS + #define __STDC_FORMAT_MACROS + #endif + + #include + #include + #include + #include + #include + + PREAMBLE + + # Add build-level defines from gems (e.g., MRB_USE_TASK_SCHEDULER) + gem_defines = collect_gem_defines + unless gem_defines.empty? + f.puts "/* Gem-required defines */" + gem_defines.each do |d| + f.puts "#define #{d}" + end + f.puts + end + end + + # Collect defines added by gems that affect core headers + def collect_gem_defines + defines = [] + @build.defines.each do |d| + # Include defines that affect mrb_state or core functionality + defines << d if d =~ /^MRB_USE_|^MRB_UTF8_|^HAVE_MRUBY_/ + end + defines.uniq.sort + end + + def write_header_postamble(f) + f.puts <<~POSTAMBLE + + #endif /* MRUBY_AMALGAM_H */ + POSTAMBLE + end + + def write_ordered_headers(f) + # Process top-level headers; internal includes are recursively inlined + HEADER_ORDER.each do |header| + path = "#{include_dir}/#{header}" + next unless File.exist?(path) + write_header_content(f, header, path) + end + end + + def write_boxing_headers(f) + f.puts "\n/* Boxing type selection */" + f.puts "#if defined(MRB_NAN_BOXING)" + write_header_content(f, "mruby/boxing_nan.h", "#{include_dir}/mruby/boxing_nan.h") + f.puts "#elif defined(MRB_WORD_BOXING)" + write_header_content(f, "mruby/boxing_word.h", "#{include_dir}/mruby/boxing_word.h") + f.puts "#else" + write_header_content(f, "mruby/boxing_no.h", "#{include_dir}/mruby/boxing_no.h") + f.puts "#endif" + end + + def write_presym_headers(f) + presym_dir = "#{@build.build_dir}/include/mruby/presym" + return unless File.directory?(presym_dir) + + %w[id.h table.h].each do |header| + path = "#{presym_dir}/#{header}" + next unless File.exist?(path) + write_header_content(f, "mruby/presym/#{header}", path) + end + end + + def write_gem_headers(f) + library_gems.each do |gem| + gem_include = "#{gem.dir}/include" + next unless File.directory?(gem_include) + + Dir.glob("#{gem_include}/**/*.h").sort.each do |path| + rel_path = path.sub("#{gem_include}/", "") + write_header_content(f, "#{gem.name}: #{rel_path}", path) + # Also track the relative path for source include transformation + # (handles includes like #include "io_hal.h") + @processed_headers << rel_path unless @processed_headers.include?(rel_path) + end + end + end + + def write_header_content(f, name, path) + return unless File.exist?(path) + + content = File.read(path, mode: "rb") + guard = extract_include_guard(content) + + # Skip if already processed + if guard && @processed_guards[guard] + f.puts "/* #{name} - already included */" + return + end + @processed_guards[guard] = true if guard + @processed_headers << name # Track header path for include transformation + + f.puts "\n/* ======== #{name} ======== */" + content = strip_include_guard(content, guard) if guard + content = transform_includes(content, inline: true) + f.puts content + end + + # ========== Source Generation ========== + + def write_source_preamble(f) + f.puts <<~PREAMBLE + /* + ** mruby amalgamated source + ** Generated from mruby source files + ** + ** This file is auto-generated. Do not edit directly. + */ + + #include "mruby.h" + + PREAMBLE + end + + def write_internal_headers(f) + f.puts "/* ======== Internal headers ======== */" + + # Forward declarations needed for amalgamation + # (functions called before defined due to source file ordering) + # Note: mrb_irep_catch_handler_table is static inline in internal.h, no forward decl needed + f.puts <<~FORWARD + /* Forward declarations for amalgamation */ + static void mrb_irep_free(mrb_state *mrb, mrb_irep *irep); + static mrb_value mrb_class_find_path(mrb_state *mrb, struct RClass *c); + static void mrb_method_added(mrb_state *mrb, struct RClass *c, mrb_sym mid); + static void mrb_proc_copy(mrb_state *mrb, struct RProc *a, const struct RProc *b); + static size_t mrb_gc_mark_range(mrb_state *mrb, struct RRange *r); + static void mrb_ary_decref(mrb_state *mrb, mrb_shared_array *shared); + static mrb_int mrb_proc_arity(const struct RProc *p); + FORWARD + + # internal.h + internal_path = "#{include_dir}/mruby/internal.h" + if File.exist?(internal_path) + content = File.read(internal_path, mode: "rb") + content = strip_include_guard(content, extract_include_guard(content)) + content = transform_source_includes(content) + f.puts "\n/* mruby/internal.h */" + f.puts content + end + + # presym/table.h (generated, needed by symbol.c) + table_path = "#{build_include_dir}/mruby/presym/table.h" + if File.exist?(table_path) + content = File.read(table_path, mode: "rb") + content = transform_source_includes(content) + f.puts "\n/* mruby/presym/table.h */" + f.puts content + end + + # value_array.h (internal src header) + value_array_path = "#{src_dir}/value_array.h" + if File.exist?(value_array_path) + content = File.read(value_array_path, mode: "rb") + content = strip_include_guard(content, extract_include_guard(content)) + content = transform_source_includes(content) + f.puts "\n/* src/value_array.h */" + f.puts content + end + end + + def write_core_sources(f) + f.puts "\n/* ======== Core sources ======== */" + + CORE_SOURCE_ORDER.each do |source| + path = "#{src_dir}/#{source}" + next unless File.exist?(path) + write_source_content(f, "src/#{source}", path) + end + + # Clear potentially conflicting macros from core sources + write_macro_cleanup(f, "core") + end + + def write_generated_sources(f) + # mrblib.c - compiled Ruby stdlib + mrblib_path = "#{@build.build_dir}/mrblib/mrblib.c" + if File.exist?(mrblib_path) + write_source_content(f, "mrblib.c", mrblib_path) + end + end + + # Macros that may conflict between source files in amalgamation + # These are #undef-ed after each source file within a gem + CONFLICTING_MACROS = %w[ + mrb_stat + mrb_lstat + mrb_fstat + lesser + greater + CASE + NEXT + JUMP + CALL + node_type + push + pop + peek + ].freeze + + def write_gem_sources(f) + f.puts "\n/* ======== Gem sources ======== */" + + library_gems.each do |gem| + # Some gems use 'core/' instead of 'src/' (mruby-compiler, mruby-bigint) + source_dirs = ["#{gem.dir}/src", "#{gem.dir}/core"].select { |d| File.directory?(d) } + + # Include C sources if the gem has any + unless source_dirs.empty? + sources = source_dirs.flat_map { |d| Dir.glob("#{d}/**/*.c") }.sort + sources.each_with_index do |path, idx| + rel_path = path.sub("#{gem.dir}/", "") + write_source_content(f, "#{gem.name}: #{rel_path}", path) + # Clear macros between source files to avoid conflicts + # (e.g., mrb_stat macro in file.c vs function in file_test.c) + write_macro_cleanup(f, "#{gem.name}/#{File.basename(path)}") if idx < sources.size - 1 + end + end + + # Gem's compiled mrblib (Ruby-only gems like mruby-enum-ext have this) + gem_mrblib = "#{gem.build_dir}/gem_mrblib.c" + if File.exist?(gem_mrblib) + write_source_content(f, "#{gem.name}: gem_mrblib.c", gem_mrblib) + end + + # Gem's init functions (GENERATED_TMP_mrb_*_gem_init/final) + # Required for both C and Ruby-only gems + gem_init = "#{gem.build_dir}/gem_init.c" + if File.exist?(gem_init) + write_source_content(f, "#{gem.name}: gem_init.c", gem_init) + end + + # Clear potentially conflicting macros after each gem + write_macro_cleanup(f, gem.name) + end + + # gem_init.c - gem registration + gem_init_path = "#{@build.build_dir}/mrbgems/gem_init.c" + if File.exist?(gem_init_path) + write_source_content(f, "gem_init.c", gem_init_path) + end + end + + def write_source_content(f, name, path) + return unless File.exist?(path) + + content = File.read(path, mode: "rb") + # For source files, comment out all mruby includes (they're in the header) + # and inline local includes (like .cstub files) + source_dir = File.dirname(path) + content = transform_source_includes(content, source_dir) + + f.puts "\n/* ======== #{name} ======== */" + f.puts content + end + + def write_macro_cleanup(f, gem_name) + f.puts "\n/* Cleanup macros from #{gem_name} to avoid conflicts */" + CONFLICTING_MACROS.each do |macro| + f.puts "#ifdef #{macro}" + f.puts "#undef #{macro}" + f.puts "#endif" + end + end + + # X-macro pattern headers that must be inlined every time they're included + # (not commented out) because they expand differently based on macro definitions + XMACRO_HEADERS = %w[ + mruby/ops.h + ].freeze + + def transform_source_includes(content, source_dir = nil) + content.gsub(/^(\s*)(#\s*include\s+([<"])([^>"]+)[>"])/m) do |match| + prefix = $1 + include_stmt = $2 + quote_type = $3 # < or " + header = $4 + + # X-macro headers must be inlined every time (not commented out) + # because they expand differently based on surrounding macro definitions + if XMACRO_HEADERS.include?(header) + xmacro_path = "#{include_dir}/#{header}" + if File.exist?(xmacro_path) + xmacro_content = File.read(xmacro_path, mode: "rb") + "#{prefix}/* Inlined X-macro: #{header} */\n#{xmacro_content}" + else + match + end + # Comment out all mruby-related includes and any header already in amalgam + elsif mruby_header?(header) || header == "mruby.h" || + @processed_headers.include?(header) || @gem_header_names.include?(header) + "#{prefix}// #{include_stmt} - in amalgam header" + elsif source_dir && quote_type == '"' && !header.include?("/") + # Check for local includes like "known_errors_def.cstub" + local_path = "#{source_dir}/#{header}" + if File.exist?(local_path) + local_content = File.read(local_path, mode: "rb") + "#{prefix}/* Inlined: #{header} */\n#{local_content}" + else + match + end + else + match + end + end + end + + # ========== Content Transformation ========== + + def extract_include_guard(content) + # Match #ifndef GUARD_NAME at start of file (after comments) + if content =~ /\A(?:\/\*.*?\*\/\s*|\/\/[^\n]*\n)*\s*#ifndef\s+(\w+)\s*\n\s*#define\s+\1/m + $1 + end + end + + def strip_include_guard(content, guard) + return content unless guard + + # Remove opening #ifndef GUARD but KEEP #define GUARD (needed for #ifdef checks) + content = content.sub(/\A((?:\/\*.*?\*\/\s*|\/\/[^\n]*\n)*\s*)#ifndef\s+#{guard}\s*\n/m, '\1') + + # Remove closing #endif (any comment is ok, guard name may not match exactly) + content = content.sub(/\n#endif\s*(?:\/\*[^*]*\*\/|\/\/[^\n]*)?\s*\z/m, "\n") + + content + end + + def transform_includes(content, inline: false) + # Match both "#include" and "# include" (preprocessor allows spaces) + # Only match includes at the start of a line (real preprocessor directives) + content.gsub(/^(\s*)(#\s*include\s+[<"]([^>"]+)[>"])/m) do |match| + prefix = $1 + include_stmt = $2 + header = $3 + + if already_included?(header) + # Keep original whitespace, comment out the include + "#{prefix}// #{include_stmt} - in amalgam" + elsif inline && mruby_header?(header) + # Recursively inline this header + "#{prefix}#{inline_header(header)}" + else + match + end + end + end + + def already_included?(header) + # Check if header was already processed + # Only use end_with? matching for mruby headers to avoid matching + # system headers like against mruby/time.h + if mruby_header?(header) + @processed_headers.any? { |h| h == header || h.end_with?("/#{header}") } + else + @processed_headers.include?(header) + end + end + + def mruby_header?(header) + # Check if this is an mruby header that should be inlined/transformed + header == "mruby.h" || header.start_with?("mruby/") || + %w[mrbconf.h boxing_nan.h boxing_word.h boxing_no.h common.h object.h value_array.h].include?(header) + end + + def inline_header(header) + # Find the full path for this header + if header == "mruby.h" || header == "mrbconf.h" || header.start_with?("mruby/") + full_header = header + # Check both source include dir and build include dir (for generated headers) + path = "#{include_dir}/#{header}" + path = "#{build_include_dir}/#{header}" unless File.exist?(path) + else + # Relative includes like "boxing_word.h" -> "mruby/boxing_word.h" + full_header = "mruby/#{header}" + path = "#{include_dir}/#{full_header}" + path = "#{build_include_dir}/#{full_header}" unless File.exist?(path) + end + + return "/* #{header} - not found */" unless File.exist?(path) + + # Mark as processed to avoid infinite recursion + @processed_headers << header + @processed_headers << full_header + + content = File.read(path, mode: "rb") + guard = extract_include_guard(content) + + if guard && @processed_guards[guard] + return "/* #{header} - already included */" + end + @processed_guards[guard] = true if guard + + content = strip_include_guard(content, guard) if guard + content = transform_includes(content, inline: true) + + "\n/* ======== #{full_header} (inlined) ======== */\n#{content}" + end + end +end diff --git a/lib/mruby/build.rb b/lib/mruby/build.rb index 4925cee33c..995611a4a7 100644 --- a/lib/mruby/build.rb +++ b/lib/mruby/build.rb @@ -1,12 +1,16 @@ require "mruby/core_ext" require "mruby/build/load_gems" require "mruby/build/command" +autoload :Find, "find" module MRuby autoload :Gem, "mruby/gem" autoload :Lockfile, "mruby/lockfile" autoload :Presym, "mruby/presym" + INSTALL_PREFIX = ENV['PREFIX'] || ENV['INSTALL_PREFIX'] || '/usr/local' + INSTALL_DESTDIR = ENV['DESTDIR'] || '' + class << self def targets @targets ||= {} @@ -56,7 +60,11 @@ class << self def mruby_config_path path = ENV['MRUBY_CONFIG'] || ENV['CONFIG'] if path.nil? || path.empty? - path = "#{MRUBY_ROOT}/build_config/default.rb" + path = if Dir.pwd != MRUBY_ROOT && File.file?("./build_config.rb") + "./build_config.rb" + else + "#{MRUBY_ROOT}/build_config/default.rb" + end elsif !File.file?(path) && !Pathname.new(path).absolute? f = "#{MRUBY_ROOT}/build_config/#{path}.rb" path = File.exist?(f) ? f : File.extname(path).empty? ? f : path @@ -71,8 +79,9 @@ def install_dir include Rake::DSL include LoadGems - attr_accessor :name, :bins, :exts, :file_separator, :build_dir, :gem_clone_dir, :defines + attr_accessor :name, :bins, :exts, :file_separator, :build_dir, :gem_clone_dir, :defines, :libdir_name attr_reader :products, :libmruby_core_objs, :libmruby_objs, :gems, :toolchains, :presym, :mrbc_build, :gem_dir_to_repo_url + attr_reader :install_excludes, :port_names alias libmruby libmruby_objs @@ -97,6 +106,9 @@ def initialize(name='host', build_dir=nil, internal: false, &block) @file_separator = '/' @build_dir = "#{build_dir}/#{@name}" @gem_clone_dir = "#{build_dir}/repos/#{@name}" + @libdir_name = (self.kind_of?(MRuby::CrossBuild) ? nil : ENV["MRUBY_SYSTEM_LIBDIR_NAME"]) || "lib" + @install_prefix = nil + @install_excludes = [] @defines = [] @cc = Command::Compiler.new(self, %w(.c), label: "CC") @cxx = Command::Compiler.new(self, %w(.cc .cxx .cpp), label: "CXX") @@ -122,13 +134,20 @@ def initialize(name='host', build_dir=nil, internal: false, &block) @enable_bintest = false @enable_test = false @enable_lock = true - @enable_presym = true @enable_benchmark = true @mrbcfile_external = false @internal = internal @toolchains = [] + @port_names = nil @gem_dir_to_repo_url = {} + # Add lambda instead of string because libdir_name or lib may be changed by user configuration + libmruby_core_name = nil + @install_excludes << ->(file) { + libmruby_core_name ||= File.join(libdir_name, libfile("libmruby_core")) + file == libmruby_core_name + } + MRuby.targets[@name] = current = self end @@ -137,13 +156,9 @@ def initialize(name='host', build_dir=nil, internal: false, &block) current.instance_eval(&block) ensure if current.libmruby_enabled? && !current.mrbcfile_external? - if current.presym_enabled? - current.create_mrbc_build if current.host? || current.gems["mruby-bin-mrbc"] - elsif current.host? - current.build_mrbc_exec - end + current.create_mrbc_build if current.host? || current.gems["mruby-bin-mrbc"] end - current.presym = Presym.new(current) if current.presym_enabled? + current.presym = Presym.new(current) end end @@ -162,23 +177,34 @@ def debug_enabled? def enable_debug compilers.each do |c| c.defines += %w(MRB_DEBUG) - if toolchains.any? { |toolchain| toolchain == "gcc" } - c.flags += %w(-g3 -O0) - end + c.setup_debug(self) end @mrbc.compile_options += ' -g' @enable_debug = true end - def presym_enabled? - @enable_presym - end - - def disable_presym - if @enable_presym - @enable_presym = false - compilers.each{|c| c.defines << "MRB_NO_PRESYM"} + # Set target port names for this build. + # Each gem compiles the first matching ports// directory; + # later names in the list act as fallbacks for gems that don't + # ship a port for the earlier names. + # conf.ports :esp32 + # conf.ports :rp2040, :posix # use rp2040 if available, else posix + def ports(*names) + @port_names = names.map { |n| n.to_s } + end + + # Returns the effective port names for this build. + # If not explicitly set, auto-detects :posix or :win for host builds. + def effective_ports + return @port_names if @port_names + if kind_of?(MRuby::CrossBuild) + [] + elsif ENV['OS'] == 'Windows_NT' || + ('A'..'Z').any? { |v| Dir.exist?("#{v}:") } + ['win'] + else + ['posix'] end end @@ -252,7 +278,7 @@ def compile_as_cxx(src, cxx_src = nil, obj = nil, includes = []) if cxx_src obj ||= cxx_src + @exts.object dsts = [obj] - dsts << (cxx_src + @exts.presym_preprocessed) if presym_enabled? + dsts << (cxx_src + @exts.presym_preprocessed) defines = [] include_paths = ["#{MRUBY_ROOT}/src", *includes] dsts.each do |dst| @@ -366,18 +392,39 @@ def define_rules end [@cc, *(@cxx if cxx_exception_enabled?)].each do |compiler| compiler.define_rules(@build_dir, MRUBY_ROOT, @exts.object) - compiler.define_rules(@build_dir, MRUBY_ROOT, @exts.presym_preprocessed) if presym_enabled? + compiler.define_rules(@build_dir, MRUBY_ROOT, @exts.presym_preprocessed) end end - def define_installer(src) - dst = "#{self.class.install_dir}/#{File.basename(src)}" + def define_installer_outline(src, dst) file dst => src do - install_D src, dst + _pp "GEN", src.relative_path, dst.relative_path + mkdir_p(File.dirname(dst)) + yield dst end dst end + if ENV['OS'] == 'Windows_NT' + def define_installer(src) + dst = "#{self.class.install_dir}/#{File.basename(src)}".pathmap("%X.bat") + define_installer_outline(src, dst) do + File.write dst, <<~BATCHFILE + @echo off + call "#{File.expand_path(src)}" %* + BATCHFILE + end + end + else + def define_installer(src) + dst = "#{self.class.install_dir}/#{File.basename(src)}" + define_installer_outline(src, dst) do + File.unlink(dst) rescue nil + File.symlink(src.relative_path_from(self.class.install_dir), dst) + end + end + end + def define_installer_if_needed(bin) exe = exefile("#{build_dir}/bin/#{bin}") host? ? define_installer(exe) : exe @@ -440,10 +487,10 @@ def run_test def run_bintest puts ">>> Bintest #{name} <<<" targets = @gems.select { |v| File.directory? "#{v.dir}/bintest" }.map { |v| filename v.dir } - targets << filename(".") if File.directory? "./bintest" mrbc = @gems["mruby-bin-mrbc"] ? exefile("#{@build_dir}/bin/mrbc") : mrbcfile env = {"BUILD_DIR" => @build_dir, "MRBCFILE" => mrbc} - sh env, "ruby test/bintest.rb#{verbose_flag} #{targets.join ' '}" + bintest = File.join(MRUBY_ROOT, "test/bintest.rb") + sh env, "ruby #{bintest}#{verbose_flag} #{targets.join ' '}" end def print_build_summary @@ -466,11 +513,11 @@ def print_build_summary end def libmruby_static - libfile("#{build_dir}/lib/libmruby") + libfile("#{build_dir}/#{libdir_name}/libmruby") end def libmruby_core_static - libfile("#{build_dir}/lib/libmruby_core") + libfile("#{build_dir}/#{libdir_name}/libmruby_core") end def libraries @@ -485,12 +532,35 @@ def internal? @internal end + def each_header_files(&block) + return to_enum(__method__) unless block + + basedir = File.join(MRUBY_ROOT, "include") + Find.find(basedir) do |d| + next unless File.file? d + yield d + end + + @gems.each { |g| g.each_header_files(&block) } + + self + end + + def install_prefix + @install_prefix || (self.name == "host" ? MRuby::INSTALL_PREFIX : + File.join(MRuby::INSTALL_PREFIX, "mruby/#{self.name}")) + end + + def install_prefix=(dir) + @install_prefix = dir&.to_s + end + protected attr_writer :presym def create_mrbc_build - exclusions = %i[@name @build_dir @gems @enable_test @enable_bintest @internal] + exclusions = %i[@name @build_dir @gems @enable_test @enable_bintest @internal @install_excludes] name = "#{@name}/mrbc" MRuby.targets.delete(name) build = self.class.new(name, internal: true){} @@ -508,7 +578,7 @@ def create_mrbc_build end build.build_mrbc_exec build.disable_libmruby - build.disable_presym + build.presym = Presym.new(build) @mrbc_build = build self.mrbcfile = build.mrbcfile build @@ -531,7 +601,6 @@ def initialize(name, build_dir=nil, &block) conf.toolchain conf.build_mrbc_exec conf.disable_libmruby - conf.disable_presym end end end @@ -554,18 +623,15 @@ def run_test def run_bintest puts ">>> Bintest #{name} <<<" targets = @gems.select { |v| File.directory? "#{v.dir}/bintest" }.map { |v| filename v.dir } - targets << filename(".") if File.directory? "./bintest" mrbc = @gems["mruby-bin-mrbc"] ? exefile("#{@build_dir}/bin/mrbc") : mrbcfile - emulator = @test_runner.command - emulator = @test_runner.shellquote(emulator) if emulator - env = { "BUILD_DIR" => @build_dir, "MRBCFILE" => mrbc, "EMULATOR" => @test_runner.emulator, } - sh env, "ruby test/bintest.rb#{verbose_flag} #{targets.join ' '}" + bintest = File.join(MRUBY_ROOT, "test/bintest.rb") + sh env, "ruby #{bintest}#{verbose_flag} #{targets.join ' '}" end protected diff --git a/lib/mruby/build/command.rb b/lib/mruby/build/command.rb index 31a595ef04..d9b2f3636c 100644 --- a/lib/mruby/build/command.rb +++ b/lib/mruby/build/command.rb @@ -25,11 +25,7 @@ def clone end def shellquote(s) - if ENV['OS'] == 'Windows_NT' - "\"#{s}\"" - else - "#{s}" - end + "\"#{s}\"" end private @@ -103,12 +99,16 @@ def define_rules(build_dir, source_dir='', out_ext=build.exts.object) gemrake = File.join(source_dir, "mrbgem.rake") rakedep = File.exist?(gemrake) ? [ gemrake ] : [] - if build_dir.include? "mrbgems/" + bd = build_dir + if bd.start_with?(MRUBY_ROOT) + bd = bd.sub(MRUBY_ROOT, '') + end + if bd.include? "mrbgems/" generated_file_matcher = Regexp.new("^#{Regexp.escape build_dir}/(?!mrbc/)(.*)#{Regexp.escape out_ext}$") else generated_file_matcher = Regexp.new("^#{Regexp.escape build_dir}/(?!mrbc/|mrbgems/.+/)(.*)#{Regexp.escape out_ext}$") end - source_exts.each do |ext, compile| + source_exts.each do |ext| rule generated_file_matcher => [ proc { |file| file.sub(generated_file_matcher, "#{source_dir}/\\1#{ext}") @@ -133,18 +133,24 @@ def define_rules(build_dir, source_dir='', out_ext=build.exts.object) end end + # This method can be redefined as a singleton method where appropriate. + # Manipulate `flags`, `include_paths` and/or more if necessary. + def setup_debug(conf) + nil + end + private # # === Example of +.d+ file # - # ==== Without -MP compiler flag + # ==== Without `-MP` compiler flag # # /build/host/src/array.o: /src/array.c \ # /include/mruby/common.h /include/mruby/value.h \ # /src/value_array.h # - # ==== With -MP compiler flag + # ==== With `-MP` compiler flag # # /build/host/src/array.o: /src/array.c \ # /include/mruby/common.h /include/mruby/value.h \ @@ -243,7 +249,7 @@ class Command::Yacc < Command def initialize(build) super - @command = 'bison' + @command = "ruby #{MRUBY_ROOT}/tools/lrama/exe/lrama" @compile_options = %q[-o "%{outfile}" "%{infile}"] end @@ -260,7 +266,7 @@ class Command::Gperf < Command def initialize(build) super @command = 'gperf' - @compile_options = %q[-L ANSI-C -C -p -j1 -i 1 -g -o -t -N mrb_reserved_word -k"1,3,$" "%{infile}" > "%{outfile}"] + @compile_options = %q[-L ANSI-C -C -j1 -i 1 -o -t -N mrb_reserved_word -k"1,3,$" "%{infile}" > "%{outfile}"] end def run(outfile, infile) @@ -311,7 +317,7 @@ def run_reset_hard(dir, checksum_hash) end def commit_hash(dir) - `#{@command} --git-dir #{shellquote(dir +'/.git')} --work-tree #{shellquote(dir)} rev-parse --verify HEAD`.strip + `#{@command} --git-dir #{shellquote(dir + '/.git')} --work-tree #{shellquote(dir)} rev-parse --verify HEAD`.strip end def current_branch(dir) @@ -337,16 +343,21 @@ def run(out, infiles, funcname, cdump: true, static: false) opt = @compile_options % {funcname: funcname} opt << " -S" if cdump opt << " -s" if static + # Have mrbc write to a private tempfile (-o) instead of stdout (-o-) + # to avoid pipe-inheritance races with parallel rake on Windows MinGW, + # where unrelated _pp build-progress lines from sibling workers can + # leak into the captured stdout and corrupt the generated C file. + tmpout = "#{out.path}.#{funcname}.mrbcout" + opt = opt.sub(/\s-o-(?=\s|\z)/, %Q[ -o "#{filename tmpout}"]) cmd = %["#{filename @command}" #{opt} #{filename(infiles).map{|f| %["#{f}"]}.join(' ')}] puts cmd if Rake.verbose - IO.popen(cmd, 'r+') do |io| - out.puts io.read - end - # if mrbc execution fail, drop the file - unless $?.success? + unless system(cmd) + rm_f tmpout rm_f out.path fail "Command failed with status (#{$?.exitstatus}): [#{cmd[0,42]}...]" end + out.write File.binread(tmpout) + rm_f tmpout end end diff --git a/lib/mruby/build/load_gems.rb b/lib/mruby/build/load_gems.rb index 3de929bba2..8198abca5a 100644 --- a/lib/mruby/build/load_gems.rb +++ b/lib/mruby/build/load_gems.rb @@ -32,14 +32,21 @@ def gem(gem_src, &block) gemrake = File.join(checkout.full_gemdir, "mrbgem.rake") fail "Can't find #{gemrake}" unless File.exist?(gemrake) + current_build = MRuby::Build.current + build = self.is_a?(MRuby::Build) ? self : MRuby::Build.current + MRuby::Build.current = build Gem.current = nil - load gemrake + begin + load gemrake + ensure + MRuby::Build.current = current_build + end return nil unless Gem.current current = Gem.current # Add it to gems current.dir = checkout.full_gemdir - current.build = self.is_a?(MRuby::Build) ? self : MRuby::Build.current + current.build = build current.build_config_initializer = block gems << current @@ -59,7 +66,7 @@ def initialize(gemdir, repo, branch, commit, canonical, path = nil) @gemdir = gemdir # Working copy of the gem @path = path # Path to gem relative to checkout - @repo = repo # Remote gem repo + @repo = repo # Remote gem repo @branch = branch # Branch to check out @commit = commit # Commit-id to use @@ -75,7 +82,7 @@ def canonical?() return @canonical; end def git?() return !!@repo; end def gemname() return File.basename(@gemdir); end - def hash() + def hash return [@gemdir, @repo, @branch, @commit, @canonical, @path].hash end @@ -194,7 +201,7 @@ def fromGemdir! gem_src = File.expand_path(gem_src, File.dirname(MRuby::GemBox.path)) else # Otherwise, we use the path to the build_config.rb file that - # requested this gem. This path was extracted earlier and + # requested this gem. This path was extracted earlier and # stored in @build_config_dir via the second argument of # 'initialize'. root_dir = @build_config_dir @@ -299,7 +306,7 @@ def fromGit!(url, branch) @build.gem_dir_to_repo_url[repo_dir] = url @build.locks[url] = { 'url' => url, - 'branch' => @build.git.current_branch(repo_dir), + 'branch' => branch || @build.git.current_branch(repo_dir), 'commit' => @build.git.commit_hash(repo_dir), } end @@ -308,10 +315,10 @@ def fromGit!(url, branch) end - # Test if this repo can be skipped. This will happen if it's + # Test if this repo can be skipped. This will happen if it's # already in @gem_checkouts and EITHER it is identical (same # url, branch, commit-ID and subdirectory path) as the current - # checkout OR its "canonical" flag is true. If it's in + # checkout OR its "canonical" flag is true. If it's in # @gem_checkouts and neither of these conditions is true, that's # a fatal error; it means there are multiple incompatible # versions of this gem to be checked out into this directory. @@ -322,7 +329,7 @@ def skip_this?(url, repo_dir, branch, commit) return false unless prev # Canonical declarations must precede all others. - fail("Attempted to re-declare #{prev.gemname} as canonical!\n" + + fail("Attempted to redeclare #{prev.gemname} as canonical!\n" + "('canonical' can only be used on its first declaration.)") if prev && @canonical @@ -340,14 +347,14 @@ def skip_this?(url, repo_dir, branch, commit) return true end - # Otherwise, we have a checkout conflict. This is an error. + # Otherwise, we have a checkout conflict. This is an error. fail "Conflicting gem definitions for '#{repo_dir}':\n" + " #{candidate}\n" + " #{prev}\n" end - # Retrieve a git repo if it's not present. Return + # Retrieve a git repo if it's not present. Return # [path_to_checkout, did_clone] def git_clone_dependency(url, repo_dir, commit, branch) return if diff --git a/lib/mruby/core_ext.rb b/lib/mruby/core_ext.rb index 1ad528c263..ac27376431 100644 --- a/lib/mruby/core_ext.rb +++ b/lib/mruby/core_ext.rb @@ -22,6 +22,27 @@ def relative_path def remove_leading_parents Pathname.new(".#{Pathname.new("/#{self}").cleanpath}").cleanpath.to_s end + + def replace_prefix_by(dirmap) + [self].replace_prefix_by(dirmap)[0] + end +end + +class Array + # Replace the prefix of each string that is a file path that contains in its own array. + # + # dirmap is a hash whose elements are `{ "path/to/old-prefix" => "path/to/new-prefix", ... }`. + # If it does not match any element of dirmap, the file path is not replaced. + def replace_prefix_by(dirmap) + dirmap = dirmap.map { |older, newer| [File.join(older, "/"), File.join(newer, "/")] } + dirmap.sort! + dirmap.reverse! + self.flatten.map do |e| + map = dirmap.find { |older, newer| e.start_with?(older) } + e = e.sub(map[0], map[1]) if map + e + end + end end def install_D(src, dst) diff --git a/lib/mruby/doc.rb b/lib/mruby/doc.rb new file mode 100644 index 0000000000..2f4c42aaee --- /dev/null +++ b/lib/mruby/doc.rb @@ -0,0 +1,51 @@ +autoload :Pathname, 'pathname' + +module MRuby + module Documentation + def Documentation.update_opcode_md + mrubydir = Pathname(MRUBY_ROOT) + path_ops_h = mrubydir + "include/mruby/ops.h" + path_opcode_md = mrubydir + "doc/internal/opcode.md" + + opspecs = { + "Z" => { prefix: "", modified: "-" }, + "B" => { prefix: "\'" }, + "BB" => { prefix: "\"" }, + "BBB" => { prefix: "\"" }, + "BS" => { prefix: "\'" }, + "BSS" => { prefix: "\'" }, + "S" => { prefix: "" }, + "W" => { prefix: "" }, + } + + diff = "" + + spliter = <<~'SPLITER' + | No. | Instruction Name | Operand type | Semantics + | --: | ---------------- | ------------ | --------------- + SPLITER + + diff = path_opcode_md.read.sub(/^#{Regexp.escape spliter}.*?(?=\z|^$\n)/m) do + repl = spliter + + ops = path_ops_h.read + pat = /^\s*OPCODE\s*\(\s*(\w+)\s*,\s*(\w+)\s*\)\s*(?:\/\*\s*(.*?)\s*\*\/\s*)?/ + ops.scan(pat).each_with_index do |(ins, opr, cmt), no| + if cmt + cmt.sub!(/\s*#.*/, "") + cmt.sub!(/\b(?=L_\w+\b)/, "OP_") + cmt.gsub!(/\b(Irep|Pool|R|Syms)\[([^\[\]]+)\]/, "\\1(\\2)") + cmt.gsub!(/[\\\|]/) { |m| "\\#{m}" } # Ruby-2.5 is not support "Numbered block parameter" + end + spec = opspecs[opr] or raise "unknown operand type: #{opr}" + item = format("| %3d | %-16s | %-12s | %s\n", no, "`OP_#{ins}`", "`#{spec[:modified] || opr}`", cmt && "`#{cmt}`") + repl << item + end + + repl + end + + path_opcode_md.binwrite diff + end + end +end diff --git a/lib/mruby/gem.rb b/lib/mruby/gem.rb index 0be0ada351..f1a403a463 100644 --- a/lib/mruby/gem.rb +++ b/lib/mruby/gem.rb @@ -25,6 +25,7 @@ class Specification alias :author= :authors= attr_accessor :rbfiles, :objs + attr_reader :port_objs attr_writer :test_objs, :test_rbfiles attr_accessor :test_args, :test_preload @@ -36,12 +37,14 @@ class Specification attr_accessor :export_include_paths attr_reader :generate_functions + attr_writer :skip_test attr_block MRuby::Build::COMMANDS def initialize(name, &block) @name = name @initializer = block + @post_user_config = nil @version = "0.0.0" @dependencies = [] @conflicts = [] @@ -52,23 +55,41 @@ def setup return if defined?(@bins) # return if already set up MRuby::Gem.current = self - MRuby::Build::COMMANDS.each do |command| - instance_variable_set("@#{command}", @build.send(command).clone) - end - @linker.run_attrs.each(&:clear) + reset_commands # for backward compatibility, reset the commands from the beginning. + @build_settings = nil @rbfiles = Dir.glob("#{@dir}/mrblib/**/*.rb").sort @objs = srcs_to_objs("src") + # Add platform-specific sources from the first matching + # ports// directory. effective_ports is a fallback + # chain: later names act as defaults for gems that don't ship + # a port for the earlier names. These objs are tracked + # separately so List#resolve_external_hal! can drop them when + # an external HAL provider (gem named hal--*) is loaded. + @port_objs = [] + build.effective_ports.each do |port| + port_dir = "#{@dir}/ports/#{port}" + if File.directory?(port_dir) + @port_objs = srcs_to_objs("ports/#{port}") + @objs += @port_objs + break + end + end + @test_preload = nil # 'test/assert.rb' @test_args = {} + @skip_test = false @bins = [] @cdump = true @requirements = [] @export_include_paths = [] - @export_include_paths << "#{dir}/include" if File.directory? "#{dir}/include" + # Headers in include/ are for inter-gem use only + # Headers in include/export/ are exported to external users via mruby-config + export_dir = "#{dir}/include/export" + @export_include_paths << export_dir if File.directory?(export_dir) instance_eval(&@initializer) @@ -82,14 +103,19 @@ def setup build.libmruby_objs << @objs instance_eval(&@build_config_initializer) if @build_config_initializer + instance_eval(&@post_user_config) if @post_user_config repo_url = build.gem_dir_to_repo_url[dir] build.locks[repo_url]['version'] = version if repo_url end + def skip_test? + @skip_test + end + def setup_compilers (core? ? [@cc, *(@cxx if build.cxx_exception_enabled?)] : compilers).each do |compiler| - compiler.define_rules build_dir, @dir, @build.exts.presym_preprocessed if build.presym_enabled? + compiler.define_rules build_dir, @dir, @build.exts.presym_preprocessed compiler.define_rules build_dir, @dir, @build.exts.object compiler.defines << %Q[MRBGEM_#{funcname.upcase}_VERSION=#{version}] compiler.include_paths << "#{@dir}/include" if File.directory? "#{@dir}/include" @@ -102,7 +128,8 @@ def for_windows? if build.kind_of?(MRuby::CrossBuild) return %w(x86_64-w64-mingw32 i686-w64-mingw32).include?(build.host_target) elsif build.kind_of?(MRuby::Build) - return ('A'..'Z').to_a.any? { |vol| Dir.exist?("#{vol}:") } + return ('A'..'Z').to_a.any? { |vol| Dir.exist?("#{vol}:") } || + ('a'..'z').to_a.any? { |vol| Dir.exist?("/#{vol}/") } end return false end @@ -112,7 +139,7 @@ def disable_cdump end def cdump? - build.presym_enabled? && @cdump + @cdump end def core? @@ -170,7 +197,7 @@ def search_package(name, version_query=nil) end def funcname - @funcname ||= @name.gsub('-', '_') + @funcname ||= @name.tr('-', '_') end def compilers @@ -186,6 +213,32 @@ def srcs_to_objs(src_dir_from_gem_dir) end end + # Register a block that runs after the user's `build.gem` block has + # been processed. Intended for gem authors to fill in defaults that + # depend on user-supplied configuration (e.g. auto-detect a library + # only if the user didn't specify which to use). + # + # Initialization order: + # 1. block in `MRuby::Gem::Specification.new` (gem author) + # 2. block in `build.gem` (user's build_config) + # 3. block in `post_user_config` (gem author, this hook) + def post_user_config(&block) + @post_user_config = block + end + + def build_settings(&blk) + @build_settings = blk + end + + def setup_build + if @build_settings + # by this point, build.cc or other commands may have been modified. + # therefore, reset the commands again before calling build_settings. + reset_commands + @build_settings.call(self) + end + end + def define_gem_init_builder file "#{build_dir}/gem_init.c" => [build.mrbcfile, __FILE__] + [rbfiles].flatten do |t| mkdir_p build_dir @@ -209,7 +262,6 @@ def generate_gem_init(fname) f.puts %Q[void mrb_#{funcname}_gem_final(mrb_state *mrb);] f.puts %Q[] f.puts %Q[void GENERATED_TMP_mrb_#{funcname}_gem_init(mrb_state *mrb) {] - f.puts %Q[ int ai = mrb_gc_arena_save(mrb);] f.puts %Q[ gem_mrblib_#{funcname}_proc_init_syms(mrb);] if !rbfiles.empty? && cdump? f.puts %Q[ mrb_#{funcname}_gem_init(mrb);] if objs != [objfile("#{build_dir}/gem_init")] unless rbfiles.empty? @@ -218,16 +270,7 @@ def generate_gem_init(fname) else f.puts %Q[ mrb_load_irep(mrb, gem_mrblib_irep_#{funcname});] end - f.puts %Q[ if (mrb->exc) {] - f.puts %Q[ mrb_print_error(mrb);] - f.puts %Q[ mrb_close(mrb);] - f.puts %Q[ exit(EXIT_FAILURE);] - f.puts %Q[ }] - f.puts %Q[ struct REnv *e = mrb_vm_ci_env(mrb->c->cibase);] - f.puts %Q[ mrb_vm_ci_env_set(mrb->c->cibase, NULL);] - f.puts %Q[ mrb_env_unshare(mrb, e, FALSE);] end - f.puts %Q[ mrb_gc_arena_restore(mrb, ai);] f.puts %Q[}] f.puts %Q[] f.puts %Q[void GENERATED_TMP_mrb_#{funcname}_gem_final(mrb_state *mrb) {] @@ -241,6 +284,8 @@ def print_gem_comment(f) f.puts %Q[ * This file is loading the irep] f.puts %Q[ * Ruby GEM code.] f.puts %Q[ *] + f.puts %Q[ * This file was generated by mruby/#{__FILE__.relative_path_from(MRUBY_ROOT)}.] + f.puts %Q[ *] f.puts %Q[ * IMPORTANT:] f.puts %Q[ * This file was generated!] f.puts %Q[ * All manual changes will get lost.] @@ -292,6 +337,26 @@ def version_ok?(req_versions) end end.all? end + + def each_header_files(&block) + return to_enum(__method__) unless block + + self.export_include_paths.flatten.uniq.compact.each do |dir| + Find.find(dir) do |d| + next unless File.file? d + yield d + end + end + + self + end + + private def reset_commands + MRuby::Build::COMMANDS.each do |command| + instance_variable_set("@#{command}", @build.send(command).clone) + end + @linker.run_attrs.each(&:clear) + end end # Specification class Version @@ -361,6 +426,7 @@ def initialize def each(&b) @ary.each(&b) + self end def [](name) @@ -373,6 +439,7 @@ def <<(gem) else # GEM was already added to this list end + self end def empty? @@ -388,7 +455,43 @@ def default_gem_params dep end end - def generate_gem_table build + def setup(build) + gemset = nil + begin + gemset_prev = gemset + self.each(&:setup) + gemset = self.setup_dependencies(build).keys.sort + end until gemset == gemset_prev + resolve_external_hal! + end + + # A gem named `hal--` is treated as the external + # HAL provider for the gem whose name's last `-`-separated + # segment is (e.g., hal-task-glib provides the HAL for + # mruby-task). The target gem's ports/* sources are dropped + # from its object list -- the matching gem supplies the + # implementation. Two or more matches is a build error. + def resolve_external_hal! + each do |target| + next if target.port_objs.nil? || target.port_objs.empty? + short = target.name.split('-').last + pattern = /\Ahal-#{Regexp.escape(short)}-.+\z/ + overriders = select { |g| g != target && g.name =~ pattern } + next if overriders.empty? + if overriders.size > 1 + fail "Multiple HAL providers for '#{target.name}': " + + overriders.map(&:name).join(", ") + end + target.objs.reject! { |o| target.port_objs.include?(o) } + end + end + + def setup_build + each(&:setup_build) + self + end + + def setup_dependencies(build) gem_table = each_with_object({}) { |spec, h| h[spec.name] = spec } default_gems = {} @@ -411,6 +514,12 @@ def generate_gem_table build end end + gem_table + end + + def generate_gem_table(build) + gem_table = setup_dependencies(build) + each do |g| g.dependencies.each do |dep| name = dep[:gem] @@ -452,8 +561,8 @@ def tsort_dependencies ary, table, all_dependency_listed = false table.instance_variable_set :@root_gems, ary class << table include TSort - def tsort_each_node &b - @root_gems.each &b + def tsort_each_node(&b) + @root_gems.each(&b) end def tsort_each_child(n, &b) @@ -491,6 +600,16 @@ def import_include_paths(g) # as circular dependency has already detected in the caller. import_include_paths(dep_g) + # Add dependency's include/ to compiler paths (for inter-gem use) + dep_include = "#{dep_g.dir}/include" + if File.directory?(dep_include) + g.compilers.each do |compiler| + compiler.include_paths << dep_include + compiler.include_paths.uniq! + end + end + + # Propagate any explicitly set export_include_paths dep_g.export_include_paths.uniq! g.compilers.each do |compiler| compiler.include_paths += dep_g.export_include_paths diff --git a/lib/mruby/presym.rb b/lib/mruby/presym.rb index 016c2b20e7..3ede3b6a8e 100644 --- a/lib/mruby/presym.rb +++ b/lib/mruby/presym.rb @@ -36,6 +36,7 @@ class Presym SYMBOL_TO_MACRO = { # Symbol => Macro # [prefix, suffix] => [prefix, suffix] + ["$" , "" ] => ["GV" , "" ], ["@@" , "" ] => ["CV" , "" ], ["@" , "" ] => ["IV" , "" ], ["" , "!" ] => ["" , "_B" ], @@ -46,6 +47,18 @@ class Presym C_STR_LITERAL_RE = /"(?:[^\\\"]|\\.)*"/ + ESCAPE_SEQUENCE_MAP = { + "a" => "\a", + "b" => "\b", + "e" => "\e", + "f" => "\f", + "n" => "\n", + "r" => "\r", + "t" => "\t", + "v" => "\v", + } + ESCAPE_SEQUENCE_MAP.keys.each { |k| ESCAPE_SEQUENCE_MAP[ESCAPE_SEQUENCE_MAP[k]] = k } + def initialize(build) @build = build end @@ -93,7 +106,18 @@ def write_table_header(presyms) f.puts "};" f.puts f.puts "static const char * const presym_name_table[] = {" - presyms.each{|sym| f.puts %| "#{sym}",|} + presyms.each do |sym| + sym = sym.gsub(/([\x01-\x1f\x7f-\xff])|("|\\)/n) { + case + when $1 + e = ESCAPE_SEQUENCE_MAP[$1] + e ? "\\#{e}" : '\\x%02x""' % $1.ord + when $2 + "\\#$2" + end + } + f.puts %| "#{sym}",| + end f.puts "};" end end @@ -102,7 +126,7 @@ def list_path @list_path ||= "#{@build.build_dir}/presym".freeze end - def header_dir; + def header_dir @header_dir ||= "#{@build.build_dir}/include/mruby/presym".freeze end @@ -119,7 +143,20 @@ def table_header_path def read_preprocessed(presym_hash, path) File.binread(path).scan(/<@! (.*?) !@>/) do |part,| literals = part.scan(C_STR_LITERAL_RE) - presym_hash[literals.map{|l| l[1..-2]}.join] = true unless literals.empty? + unless literals.empty? + literals = literals.map{|l| l[1..-2]} + literals.each do |e| + e.gsub!(/\\x([0-9A-Fa-f]{1,2})|\\(0[0-7]{,3})|\\([abefnrtv])|\\(.)/) do + case + when $1; $1.hex.chr(Encoding::BINARY) + when $2; $2.oct.chr(Encoding::BINARY) + when $3; ESCAPE_SEQUENCE_MAP[$3] + when $4; $4 + end + end + end + presym_hash[literals.join] = true + end end end diff --git a/mrbgems/default.gembox b/mrbgems/default.gembox index ae2de2ac2b..1143b19831 100644 --- a/mrbgems/default.gembox +++ b/mrbgems/default.gembox @@ -8,6 +8,9 @@ MRuby::GemBox.new do |conf| # Generate mrbc command conf.gem :core => "mruby-bin-mrbc" + # Generate mrdb command + conf.gem :core => "mruby-bin-debugger" + # Generate mirb command conf.gem :core => "mruby-bin-mirb" diff --git a/mrbgems/full-core.gembox b/mrbgems/full-core.gembox index e0d008f062..759c13215e 100644 --- a/mrbgems/full-core.gembox +++ b/mrbgems/full-core.gembox @@ -1,6 +1,6 @@ MRuby::GemBox.new do |conf| Dir.glob("#{root}/mrbgems/mruby-*/mrbgem.rake") do |x| - g = File.basename File.dirname x - conf.gem :core => g unless g =~ /^mruby-(?:bin-debugger|test)$/ + g = File.basename(File.dirname(x)) + conf.gem :core => g unless g =~ /^mruby-(?:bin-debugger|test|sleep)$/ end end diff --git a/mrbgems/hw-adc/README.md b/mrbgems/hw-adc/README.md new file mode 100644 index 0000000000..c0d2338409 --- /dev/null +++ b/mrbgems/hw-adc/README.md @@ -0,0 +1,74 @@ +# hw-adc - ADC peripheral interface for mruby + +This gem provides the `ADC` class for reading analog input from +mruby. It is designed for embedded platforms such as ESP32 and +RP2040. + +## Architecture + +Platform-specific HAL implementations are in `ports/` directories: + +- `ports/esp32/` - ESP32 using ESP-IDF ADC oneshot driver +- `ports/rp2040/` - RP2040 using Pico SDK ADC hardware + +## Build Configuration + +```ruby +MRuby::CrossBuild.new('esp32') do |conf| + conf.ports :esp32 + conf.gem core: 'hw-adc' +end +``` + +## Ruby API + +### ADC.new + +```ruby +adc = ADC.new(pin) +``` + +- `pin` - ADC-capable GPIO pin number (Integer) +- Raises `ArgumentError` if the pin is not valid for ADC + +### ADC#read / ADC#read_voltage + +Read the analog value as voltage (Float). + +```ruby +voltage = adc.read # => 1.65 +voltage = adc.read_voltage # same +``` + +### ADC#read_raw + +Read the raw ADC value (Integer, typically 0-4095 for 12-bit). + +```ruby +raw = adc.read_raw # => 2048 +``` + +### ADC#input + +Returns the ADC input channel number assigned during initialization. + +## HAL Interface + +To add support for a new platform, create a `ports//` +directory and implement the following C functions declared in +``: + +```c +int mrb_adc_init(uint8_t pin); +uint32_t mrb_adc_read_raw(uint8_t input); +float mrb_adc_read_voltage(uint8_t input); +``` + +- `mrb_adc_init` returns the input channel number (>= 0) on + success, negative on error +- `input` parameter for read functions is the value returned by + `mrb_adc_init` + +## License + +MIT diff --git a/mrbgems/hw-adc/include/mruby/adc.h b/mrbgems/hw-adc/include/mruby/adc.h new file mode 100644 index 0000000000..cc8d974411 --- /dev/null +++ b/mrbgems/hw-adc/include/mruby/adc.h @@ -0,0 +1,19 @@ +#ifndef MRUBY_ADC_H +#define MRUBY_ADC_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* HAL functions - implemented in ports//adc.c */ +int mrb_adc_init(uint8_t pin); +uint32_t mrb_adc_read_raw(uint8_t input); +float mrb_adc_read_voltage(uint8_t input); + +#ifdef __cplusplus +} +#endif + +#endif /* MRUBY_ADC_H */ diff --git a/mrbgems/hw-adc/mrbgem.rake b/mrbgems/hw-adc/mrbgem.rake new file mode 100644 index 0000000000..b96d7b564a --- /dev/null +++ b/mrbgems/hw-adc/mrbgem.rake @@ -0,0 +1,5 @@ +MRuby::Gem::Specification.new('hw-adc') do |spec| + spec.license = 'MIT' + spec.authors = ['HASUMI Hitoshi', 'mruby developers'] + spec.summary = 'ADC peripheral interface' +end diff --git a/mrbgems/hw-adc/mrblib/adc.rb b/mrbgems/hw-adc/mrblib/adc.rb new file mode 100644 index 0000000000..0c983c0905 --- /dev/null +++ b/mrbgems/hw-adc/mrblib/adc.rb @@ -0,0 +1,7 @@ +class ADC + def initialize(pin) + @input = __init(pin) + end + + attr_reader :input +end diff --git a/mrbgems/hw-adc/ports/esp32/adc.c b/mrbgems/hw-adc/ports/esp32/adc.c new file mode 100644 index 0000000000..3b5b7f08e2 --- /dev/null +++ b/mrbgems/hw-adc/ports/esp32/adc.c @@ -0,0 +1,147 @@ +#include +#include "esp_adc/adc_oneshot.h" +#include + +#define VOLTAGE_MAX 3.3f +#define RESOLUTION 4095 +#define UNIT_NUM 2 + +static adc_oneshot_unit_handle_t adc_handles[UNIT_NUM]; +static bool adc_initialized; + +static adc_unit_t +pin_to_unit(uint8_t pin) +{ + switch (pin) { +#if CONFIG_IDF_TARGET_ESP32C3 + case 0: case 1: case 2: case 3: case 4: + return ADC_UNIT_1; + case 5: + return ADC_UNIT_2; +#elif CONFIG_IDF_TARGET_ESP32 + case 32: case 33: case 34: case 35: case 36: case 39: + return ADC_UNIT_1; + case 0: case 2: case 4: case 12: case 13: case 14: case 15: + case 25: case 26: case 27: + return ADC_UNIT_2; +#elif (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32S2) + case 1: case 2: case 3: case 4: case 5: + case 6: case 7: case 8: case 9: case 10: + return ADC_UNIT_1; + case 11: case 12: case 13: case 14: case 15: + case 16: case 17: case 18: case 19: case 20: + return ADC_UNIT_2; +#endif + } + return -1; +} + +static adc_channel_t +pin_to_channel(uint8_t pin) +{ + switch (pin) { +#if CONFIG_IDF_TARGET_ESP32C3 + case 0: return ADC_CHANNEL_0; + case 1: return ADC_CHANNEL_1; + case 2: return ADC_CHANNEL_2; + case 3: return ADC_CHANNEL_3; + case 4: return ADC_CHANNEL_4; + case 5: return ADC_CHANNEL_0; +#elif CONFIG_IDF_TARGET_ESP32 + case 36: return ADC_CHANNEL_0; + case 39: return ADC_CHANNEL_3; + case 32: return ADC_CHANNEL_4; + case 33: return ADC_CHANNEL_5; + case 34: return ADC_CHANNEL_6; + case 35: return ADC_CHANNEL_7; + case 4: return ADC_CHANNEL_0; + case 0: return ADC_CHANNEL_1; + case 2: return ADC_CHANNEL_2; + case 15: return ADC_CHANNEL_3; + case 13: return ADC_CHANNEL_4; + case 12: return ADC_CHANNEL_5; + case 14: return ADC_CHANNEL_6; + case 27: return ADC_CHANNEL_7; + case 25: return ADC_CHANNEL_8; + case 26: return ADC_CHANNEL_9; +#elif (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32S2) + case 1: return ADC_CHANNEL_0; + case 2: return ADC_CHANNEL_1; + case 3: return ADC_CHANNEL_2; + case 4: return ADC_CHANNEL_3; + case 5: return ADC_CHANNEL_4; + case 6: return ADC_CHANNEL_5; + case 7: return ADC_CHANNEL_6; + case 8: return ADC_CHANNEL_7; + case 9: return ADC_CHANNEL_8; + case 10: return ADC_CHANNEL_9; + case 11: return ADC_CHANNEL_0; + case 12: return ADC_CHANNEL_1; + case 13: return ADC_CHANNEL_2; + case 14: return ADC_CHANNEL_3; + case 15: return ADC_CHANNEL_4; + case 16: return ADC_CHANNEL_5; + case 17: return ADC_CHANNEL_6; + case 18: return ADC_CHANNEL_7; + case 19: return ADC_CHANNEL_8; + case 20: return ADC_CHANNEL_9; +#endif + } + return -1; +} + +static int +init_units(void) +{ + adc_unit_t units[] = { ADC_UNIT_1, ADC_UNIT_2 }; + for (int i = 0; i < UNIT_NUM; i++) { + adc_oneshot_unit_init_cfg_t cfg = { + .unit_id = units[i], + .ulp_mode = ADC_ULP_MODE_DISABLE, + }; + if (adc_oneshot_new_unit(&cfg, &adc_handles[i]) != ESP_OK) + return -1; + } + return 0; +} + +int +mrb_adc_init(uint8_t pin) +{ + if (!adc_initialized) { + if (init_units() != 0) return -1; + adc_initialized = true; + } + + int ch = pin_to_channel(pin); + if (ch < 0) return -1; + + adc_unit_t unit = pin_to_unit(pin); + if (unit < 0 || unit >= UNIT_NUM) return -1; + + adc_oneshot_chan_cfg_t cfg = { + .atten = ADC_ATTEN_DB_12, + .bitwidth = ADC_BITWIDTH_DEFAULT, + }; + if (adc_oneshot_config_channel(adc_handles[unit], ch, &cfg) != ESP_OK) + return -1; + + return (int)pin; +} + +uint32_t +mrb_adc_read_raw(uint8_t input) +{ + adc_unit_t unit = pin_to_unit(input); + int ch = pin_to_channel(input); + int raw = 0; + if (unit >= 0 && unit < UNIT_NUM) + adc_oneshot_read(adc_handles[unit], ch, &raw); + return (uint32_t)raw; +} + +float +mrb_adc_read_voltage(uint8_t input) +{ + return (float)mrb_adc_read_raw(input) * VOLTAGE_MAX / RESOLUTION; +} diff --git a/mrbgems/hw-adc/ports/rp2040/adc.c b/mrbgems/hw-adc/ports/rp2040/adc.c new file mode 100644 index 0000000000..8d317c58c0 --- /dev/null +++ b/mrbgems/hw-adc/ports/rp2040/adc.c @@ -0,0 +1,43 @@ +#include +#include "hardware/adc.h" +#include + +#define VOLTAGE_MAX 3.3f +#define RESOLUTION 4095 +#define TEMP_INPUT 4 + +static bool adc_initialized; + +int +mrb_adc_init(uint8_t pin) +{ + if (!adc_initialized) { + adc_init(); + adc_initialized = true; + } + + uint input; + switch (pin) { + case 26: input = 0; break; + case 27: input = 1; break; + case 28: input = 2; break; + case 29: input = 3; break; + default: return -1; + } + adc_gpio_init(pin); + return (int)input; +} + +uint32_t +mrb_adc_read_raw(uint8_t input) +{ + adc_select_input(input); + return (uint32_t)adc_read(); +} + +float +mrb_adc_read_voltage(uint8_t input) +{ + adc_select_input(input); + return (float)adc_read() * VOLTAGE_MAX / RESOLUTION; +} diff --git a/mrbgems/hw-adc/src/adc.c b/mrbgems/hw-adc/src/adc.c new file mode 100644 index 0000000000..7c12e904d9 --- /dev/null +++ b/mrbgems/hw-adc/src/adc.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include + +/* ADC#__init(pin) */ +static mrb_value +mrb_adc_m_init(mrb_state *mrb, mrb_value self) +{ + mrb_int pin; + mrb_get_args(mrb, "i", &pin); + int input = mrb_adc_init((uint8_t)pin); + if (input < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid ADC pin"); + } + return mrb_fixnum_value(input); +} + +/* ADC#read_raw */ +static mrb_value +mrb_adc_m_read_raw(mrb_state *mrb, mrb_value self) +{ + mrb_int input = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(input))); + return mrb_fixnum_value(mrb_adc_read_raw((uint8_t)input)); +} + +/* ADC#read_voltage (also aliased as read) */ +static mrb_value +mrb_adc_m_read_voltage(mrb_state *mrb, mrb_value self) +{ + mrb_int input = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(input))); + return mrb_float_value(mrb, (mrb_float)mrb_adc_read_voltage((uint8_t)input)); +} + +void +mrb_hw_adc_gem_init(mrb_state *mrb) +{ + struct RClass *cls = mrb_define_class_id(mrb, MRB_SYM(ADC), mrb->object_class); + mrb_define_method_id(mrb, cls, MRB_SYM(__init), mrb_adc_m_init, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, cls, MRB_SYM(read_raw), mrb_adc_m_read_raw, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, cls, MRB_SYM(read_voltage), mrb_adc_m_read_voltage, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, cls, MRB_SYM(read), mrb_adc_m_read_voltage, MRB_ARGS_NONE()); +} + +void +mrb_hw_adc_gem_final(mrb_state *mrb) +{ +} diff --git a/mrbgems/hw-gpio/README.md b/mrbgems/hw-gpio/README.md new file mode 100644 index 0000000000..f464a792f5 --- /dev/null +++ b/mrbgems/hw-gpio/README.md @@ -0,0 +1,144 @@ +# hw-gpio - GPIO peripheral interface for mruby + +This gem provides the `GPIO` class for controlling general-purpose +input/output pins from mruby. +It is designed for embedded platforms such as ESP32 and RP2040. + +## Architecture + +Platform-specific HAL implementations are in `ports/` directories: + +- `ports/esp32/` - ESP32 using ESP-IDF GPIO driver +- `ports/rp2040/` - RP2040 using Pico SDK + +The build system automatically compiles matching port sources based +on `conf.ports` setting. + +## Build Configuration + +```ruby +# For ESP32 +MRuby::CrossBuild.new('esp32') do |conf| + conf.ports :esp32 + conf.gem core: 'hw-gpio' +end + +# For RP2040 +MRuby::CrossBuild.new('rp2040') do |conf| + conf.ports :rp2040 + conf.gem core: 'hw-gpio' +end +``` + +## Ruby API + +### Constants (direction/mode flags) + +These flags can be combined with bitwise OR. + +| Flag | Value | Description | +| ------------------ | ------ | -------------------------- | +| `GPIO::IN` | `0x01` | Input mode | +| `GPIO::OUT` | `0x02` | Output mode | +| `GPIO::HIGH_Z` | `0x04` | High-impedance (tri-state) | +| `GPIO::PULL_UP` | `0x08` | Enable pull-up resistor | +| `GPIO::PULL_DOWN` | `0x10` | Enable pull-down resistor | +| `GPIO::OPEN_DRAIN` | `0x20` | Enable open-drain output | + +### GPIO.new + +```ruby +gpio = GPIO.new(pin, flags) +``` + +- `pin` - GPIO pin number (Integer) +- `flags` - direction and mode flags (combined with `|`) +- Exactly one of `IN`, `OUT`, or `HIGH_Z` must be specified +- `PULL_UP` and `PULL_DOWN` are mutually exclusive +- Raises `ArgumentError` on invalid flag combinations + +```ruby +# Input with pull-up +button = GPIO.new(2, GPIO::IN | GPIO::PULL_UP) + +# Output +led = GPIO.new(25, GPIO::OUT) +``` + +### Instance Methods + +#### GPIO#read + +Read the current pin value. + +```ruby +val = gpio.read # => 0 or 1 +``` + +#### GPIO#write + +Set the pin output value. + +```ruby +gpio.write(1) # set high +gpio.write(0) # set low +``` + +- Raises `ArgumentError` if value is not 0 or 1 + +#### GPIO#high? / GPIO#low? + +```ruby +gpio.high? # => true if read != 0 +gpio.low? # => true if read == 0 +``` + +#### GPIO#pin + +```ruby +gpio.pin # => pin number passed to new +``` + +#### GPIO#setmode + +Reconfigure pin direction and mode flags after initialization. + +```ruby +gpio.setmode(GPIO::OUT) +``` + +### Class Methods + +These operate directly on pin numbers without creating an instance. + +```ruby +GPIO.read_at(pin) # => 0 or 1 +GPIO.write_at(pin, val) # set pin output (0 or 1) +GPIO.set_dir_at(pin, flags) # set direction (IN/OUT/HIGH_Z) +GPIO.pull_up_at(pin) # enable pull-up +GPIO.pull_down_at(pin) # enable pull-down +GPIO.open_drain_at(pin) # enable open-drain +``` + +## HAL Interface + +To add support for a new platform, create a `ports//` +directory and implement the following C functions declared in +``: + +```c +void mrb_gpio_init(uint8_t pin); +void mrb_gpio_set_dir(uint8_t pin, uint8_t flags); +void mrb_gpio_pull_up(uint8_t pin); +void mrb_gpio_pull_down(uint8_t pin); +void mrb_gpio_open_drain(uint8_t pin); +int mrb_gpio_read(uint8_t pin); +void mrb_gpio_write(uint8_t pin, uint8_t val); +``` + +The port sources are compiled automatically when the build +configuration includes a matching `conf.ports` tag. + +## License + +MIT diff --git a/mrbgems/hw-gpio/include/mruby/gpio.h b/mrbgems/hw-gpio/include/mruby/gpio.h new file mode 100644 index 0000000000..ed1e7c10b3 --- /dev/null +++ b/mrbgems/hw-gpio/include/mruby/gpio.h @@ -0,0 +1,31 @@ +#ifndef MRUBY_GPIO_H +#define MRUBY_GPIO_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* direction/mode flags (bitmask) */ +#define MRB_GPIO_IN 0x01 +#define MRB_GPIO_OUT 0x02 +#define MRB_GPIO_HIGH_Z 0x04 +#define MRB_GPIO_PULL_UP 0x08 +#define MRB_GPIO_PULL_DOWN 0x10 +#define MRB_GPIO_OPEN_DRAIN 0x20 + +/* HAL functions - implemented by hw--gpio gems */ +void mrb_gpio_init(uint8_t pin); +void mrb_gpio_set_dir(uint8_t pin, uint8_t flags); +void mrb_gpio_pull_up(uint8_t pin); +void mrb_gpio_pull_down(uint8_t pin); +void mrb_gpio_open_drain(uint8_t pin); +int mrb_gpio_read(uint8_t pin); +void mrb_gpio_write(uint8_t pin, uint8_t val); + +#ifdef __cplusplus +} +#endif + +#endif /* MRUBY_GPIO_H */ diff --git a/mrbgems/hw-gpio/mrbgem.rake b/mrbgems/hw-gpio/mrbgem.rake new file mode 100644 index 0000000000..f117381d7f --- /dev/null +++ b/mrbgems/hw-gpio/mrbgem.rake @@ -0,0 +1,5 @@ +MRuby::Gem::Specification.new('hw-gpio') do |spec| + spec.license = 'MIT' + spec.authors = ['HASUMI Hitoshi', 'mruby developers'] + spec.summary = 'GPIO peripheral interface' +end diff --git a/mrbgems/hw-gpio/mrblib/gpio.rb b/mrbgems/hw-gpio/mrblib/gpio.rb new file mode 100644 index 0000000000..a9fd46418a --- /dev/null +++ b/mrbgems/hw-gpio/mrblib/gpio.rb @@ -0,0 +1,44 @@ +class GPIO + IN = 0x01 + OUT = 0x02 + HIGH_Z = 0x04 + PULL_UP = 0x08 + PULL_DOWN = 0x10 + OPEN_DRAIN = 0x20 + + attr_reader :pin + + def initialize(pin, flags) + @pin = pin + __init(pin) + setmode(flags) + end + + def setmode(flags) + dir = flags & (IN | OUT | HIGH_Z) + n = (flags & IN != 0 ? 1 : 0) + (flags & OUT != 0 ? 1 : 0) + (flags & HIGH_Z != 0 ? 1 : 0) + if n == 0 + raise ArgumentError, "specify one of IN, OUT, or HIGH_Z" + elsif n > 1 + raise ArgumentError, "IN, OUT, and HIGH_Z are exclusive" + end + GPIO.set_dir_at(@pin, dir) + + pull = flags & (PULL_UP | PULL_DOWN) + if pull == (PULL_UP | PULL_DOWN) + raise ArgumentError, "PULL_UP and PULL_DOWN are exclusive" + end + GPIO.pull_up_at(@pin) if pull == PULL_UP + GPIO.pull_down_at(@pin) if pull == PULL_DOWN + GPIO.open_drain_at(@pin) if flags & OPEN_DRAIN != 0 + nil + end + + def high? + read != 0 + end + + def low? + read == 0 + end +end diff --git a/mrbgems/hw-gpio/ports/esp32/gpio.c b/mrbgems/hw-gpio/ports/esp32/gpio.c new file mode 100644 index 0000000000..e3980c2772 --- /dev/null +++ b/mrbgems/hw-gpio/ports/esp32/gpio.c @@ -0,0 +1,50 @@ +#include "driver/gpio.h" +#include + +void +mrb_gpio_init(uint8_t pin) +{ + gpio_reset_pin(pin); +} + +void +mrb_gpio_set_dir(uint8_t pin, uint8_t flags) +{ + if (flags & MRB_GPIO_IN) { + gpio_set_direction(pin, GPIO_MODE_INPUT); + } + else if (flags & MRB_GPIO_OUT) { + gpio_set_direction(pin, GPIO_MODE_OUTPUT); + } + /* HIGH_Z: not yet implemented */ +} + +void +mrb_gpio_pull_up(uint8_t pin) +{ + gpio_pullup_en(pin); +} + +void +mrb_gpio_pull_down(uint8_t pin) +{ + gpio_pulldown_en(pin); +} + +void +mrb_gpio_open_drain(uint8_t pin) +{ + /* not yet implemented */ +} + +int +mrb_gpio_read(uint8_t pin) +{ + return gpio_get_level(pin); +} + +void +mrb_gpio_write(uint8_t pin, uint8_t val) +{ + gpio_set_level(pin, val); +} diff --git a/mrbgems/hw-gpio/ports/rp2040/gpio.c b/mrbgems/hw-gpio/ports/rp2040/gpio.c new file mode 100644 index 0000000000..7b69fa41e4 --- /dev/null +++ b/mrbgems/hw-gpio/ports/rp2040/gpio.c @@ -0,0 +1,51 @@ +#include +#include "hardware/gpio.h" +#include + +void +mrb_gpio_init(uint8_t pin) +{ + gpio_init(pin); +} + +void +mrb_gpio_set_dir(uint8_t pin, uint8_t flags) +{ + if (flags & MRB_GPIO_IN) { + gpio_set_dir(pin, false); + } + else if (flags & MRB_GPIO_OUT) { + gpio_set_dir(pin, true); + } + /* HIGH_Z: not yet implemented */ +} + +void +mrb_gpio_pull_up(uint8_t pin) +{ + gpio_pull_up(pin); +} + +void +mrb_gpio_pull_down(uint8_t pin) +{ + gpio_pull_down(pin); +} + +void +mrb_gpio_open_drain(uint8_t pin) +{ + /* not yet implemented */ +} + +int +mrb_gpio_read(uint8_t pin) +{ + return gpio_get(pin); +} + +void +mrb_gpio_write(uint8_t pin, uint8_t val) +{ + gpio_put(pin, val == 1); +} diff --git a/mrbgems/hw-gpio/src/gpio.c b/mrbgems/hw-gpio/src/gpio.c new file mode 100644 index 0000000000..92661926d5 --- /dev/null +++ b/mrbgems/hw-gpio/src/gpio.c @@ -0,0 +1,119 @@ +#include +#include +#include +#include + +static mrb_value +mrb_gpio_m_init(mrb_state *mrb, mrb_value self) +{ + mrb_int pin; + mrb_get_args(mrb, "i", &pin); + mrb_gpio_init((uint8_t)pin); + return mrb_nil_value(); +} + +/* GPIO.set_dir_at(pin, flags) */ +static mrb_value +mrb_gpio_s_set_dir_at(mrb_state *mrb, mrb_value klass) +{ + mrb_int pin, flags; + mrb_get_args(mrb, "ii", &pin, &flags); + mrb_gpio_set_dir((uint8_t)pin, (uint8_t)flags); + return mrb_nil_value(); +} + +/* GPIO.pull_up_at(pin) */ +static mrb_value +mrb_gpio_s_pull_up_at(mrb_state *mrb, mrb_value klass) +{ + mrb_int pin; + mrb_get_args(mrb, "i", &pin); + mrb_gpio_pull_up((uint8_t)pin); + return mrb_nil_value(); +} + +/* GPIO.pull_down_at(pin) */ +static mrb_value +mrb_gpio_s_pull_down_at(mrb_state *mrb, mrb_value klass) +{ + mrb_int pin; + mrb_get_args(mrb, "i", &pin); + mrb_gpio_pull_down((uint8_t)pin); + return mrb_nil_value(); +} + +/* GPIO.open_drain_at(pin) */ +static mrb_value +mrb_gpio_s_open_drain_at(mrb_state *mrb, mrb_value klass) +{ + mrb_int pin; + mrb_get_args(mrb, "i", &pin); + mrb_gpio_open_drain((uint8_t)pin); + return mrb_nil_value(); +} + +/* GPIO.read_at(pin) */ +static mrb_value +mrb_gpio_s_read_at(mrb_state *mrb, mrb_value klass) +{ + mrb_int pin; + mrb_get_args(mrb, "i", &pin); + return mrb_fixnum_value(mrb_gpio_read((uint8_t)pin)); +} + +/* GPIO.write_at(pin, val) */ +static mrb_value +mrb_gpio_s_write_at(mrb_state *mrb, mrb_value klass) +{ + mrb_int pin, val; + mrb_get_args(mrb, "ii", &pin, &val); + if (val != 0 && val != 1) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "value must be 0 or 1"); + } + mrb_gpio_write((uint8_t)pin, (uint8_t)val); + return mrb_nil_value(); +} + +/* GPIO#read */ +static mrb_value +mrb_gpio_m_read(mrb_state *mrb, mrb_value self) +{ + mrb_int pin = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(pin))); + return mrb_fixnum_value(mrb_gpio_read((uint8_t)pin)); +} + +/* GPIO#write(val) */ +static mrb_value +mrb_gpio_m_write(mrb_state *mrb, mrb_value self) +{ + mrb_int val; + mrb_get_args(mrb, "i", &val); + if (val != 0 && val != 1) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "value must be 0 or 1"); + } + mrb_int pin = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(pin))); + mrb_gpio_write((uint8_t)pin, (uint8_t)val); + return mrb_nil_value(); +} + +void +mrb_hw_gpio_gem_init(mrb_state *mrb) +{ + struct RClass *cls = mrb_define_class_id(mrb, MRB_SYM(GPIO), mrb->object_class); + + mrb_define_method_id(mrb, cls, MRB_SYM(__init), mrb_gpio_m_init, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, cls, MRB_SYM(read), mrb_gpio_m_read, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, cls, MRB_SYM(write), mrb_gpio_m_write, MRB_ARGS_REQ(1)); + + mrb_define_class_method_id(mrb, cls, MRB_SYM(set_dir_at), mrb_gpio_s_set_dir_at, MRB_ARGS_REQ(2)); + mrb_define_class_method_id(mrb, cls, MRB_SYM(pull_up_at), mrb_gpio_s_pull_up_at, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, cls, MRB_SYM(pull_down_at), mrb_gpio_s_pull_down_at, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, cls, MRB_SYM(open_drain_at), mrb_gpio_s_open_drain_at, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, cls, MRB_SYM(read_at), mrb_gpio_s_read_at, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, cls, MRB_SYM(write_at), mrb_gpio_s_write_at, MRB_ARGS_REQ(2)); +} + +void +mrb_hw_gpio_gem_final(mrb_state *mrb) +{ +} diff --git a/mrbgems/hw-i2c/README.md b/mrbgems/hw-i2c/README.md new file mode 100644 index 0000000000..768c07306b --- /dev/null +++ b/mrbgems/hw-i2c/README.md @@ -0,0 +1,162 @@ +# hw-i2c - I2C peripheral interface for mruby + +This gem provides the `I2C` class for communicating with I2C devices from mruby. It is designed for embedded platforms such as ESP32 and RP2040. + +## Architecture + +Platform-specific HAL implementations are in `ports/` directories: + +- `ports/esp32/` - ESP32 using ESP-IDF I2C master driver +- `ports/rp2040/` - RP2040 using Pico SDK + +The build system automatically compiles matching port sources based +on `conf.ports` setting. + +## Build Configuration + +```ruby +# For ESP32 +MRuby::CrossBuild.new('esp32') do |conf| + conf.ports :esp32 + conf.gem core: 'hw-i2c' +end + +# For RP2040 +MRuby::CrossBuild.new('rp2040') do |conf| + conf.ports :rp2040 + conf.gem core: 'hw-i2c' +end +``` + +## Ruby API + +### I2C.new + +```ruby +i2c = I2C.new( + unit: :ESP32_I2C0, # I2C unit name (platform-specific, required) + frequency: 100_000, # bus frequency in Hz (default: 100kHz) + sda_pin: 21, # SDA GPIO pin number (default: -1 for platform default) + scl_pin: 22, # SCL GPIO pin number (default: -1 for platform default) + timeout: 500 # default timeout in ms (default: 500) +) +``` + +#### Unit Names + +| Platform | Available Units | +| -------- | ------------------------------ | +| ESP32 | `:ESP32_I2C0`, `:ESP32_I2C1` | +| RP2040 | `:RP2040_I2C0`, `:RP2040_I2C1` | + +On RP2040, if `sda_pin` or `scl_pin` is -1, the Pico SDK default pins are used. + +### I2C#write + +Write data to an I2C device. + +```ruby +i2c.write(addr, *data, timeout: 500) +``` + +- `addr` - 7-bit I2C device address (Integer) +- `data` - one or more data arguments, each can be: + - **Integer** - a single byte (0-255) + - **Array of Integer** - multiple bytes + - **String** - raw bytes +- `timeout:` - optional timeout in ms (overrides instance default) +- Returns the number of bytes written (Integer) +- Raises `IOError` on failure + +```ruby +# Write a single byte +i2c.write(0x3C, 0x00) + +# Write multiple bytes +i2c.write(0x3C, 0x00, [0xAE, 0xD5, 0x80]) + +# Write a string +i2c.write(0x3C, "hello") + +# Mix data types +i2c.write(0x3C, 0x40, [0x01, 0x02], "data") +``` + +### I2C#read + +Read data from an I2C device. Optionally write data before reading (repeated START). + +```ruby +i2c.read(addr, length, *write_data, timeout: 500) +``` + +- `addr` - 7-bit I2C device address (Integer) +- `length` - number of bytes to read (Integer, must be positive) +- `write_data` - optional data to write before reading (same format as `write`). When provided, the gem performs a write-then-read transaction using I2C repeated START condition. This is the standard way to read from a specific register. +- `timeout:` - optional timeout in ms (overrides instance default) +- Returns the data read (String) +- Raises `IOError` on failure, `ArgumentError` if length <= 0 + +```ruby +# Simple read (2 bytes from device) +data = i2c.read(0x50, 2) + +# Register read: write register address 0x00, then read 2 bytes +data = i2c.read(0x50, 2, 0x00) + +# Multi-byte register address +data = i2c.read(0x50, 4, [0x00, 0x10]) +``` + +### I2C#scan + +Scan the I2C bus for responsive devices. + +```ruby +i2c.scan(timeout: 500) +``` + +- `timeout:` - optional timeout per probe in ms +- Returns an Array of 7-bit addresses (Integer) that responded + +```ruby +found = i2c.scan +# => [0x3C, 0x50, 0x68] +``` + +## HAL Interface + +To add support for a new platform, create a `ports//` +directory and implement the following C functions declared in +``: + +```c +int mrb_i2c_unit_name_to_num(const char *name); +mrb_i2c_status mrb_i2c_init(int unit, uint32_t freq, int8_t sda, int8_t scl); +int mrb_i2c_read(int unit, uint8_t addr, uint8_t *dst, size_t len, + uint32_t timeout_us); +int mrb_i2c_write(int unit, uint8_t addr, const uint8_t *src, size_t len, + uint32_t timeout_us); +int mrb_i2c_write_read(int unit, uint8_t addr, + const uint8_t *src, size_t wlen, + uint8_t *dst, size_t rlen, + uint32_t timeout_us); +``` + +The port sources are compiled automatically when the build +configuration includes a matching `conf.ports` tag. + +### Error Codes + +```c +typedef enum { + MRB_I2C_OK = 0, + MRB_I2C_ERROR_UNIT = -1, /* invalid or uninitialized unit */ + MRB_I2C_ERROR_TIMEOUT = -2, /* communication timeout */ + MRB_I2C_ERROR_NACK = -3, /* device did not acknowledge */ +} mrb_i2c_status; +``` + +## License + +MIT diff --git a/mrbgems/hw-i2c/include/mruby/i2c.h b/mrbgems/hw-i2c/include/mruby/i2c.h new file mode 100644 index 0000000000..d49a55b076 --- /dev/null +++ b/mrbgems/hw-i2c/include/mruby/i2c.h @@ -0,0 +1,31 @@ +#ifndef MRUBY_I2C_H +#define MRUBY_I2C_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + MRB_I2C_OK = 0, + MRB_I2C_ERROR_UNIT = -1, + MRB_I2C_ERROR_TIMEOUT = -2, + MRB_I2C_ERROR_NACK = -3, +} mrb_i2c_status; + +/* HAL functions - implemented by hw--i2c gems */ +mrb_i2c_status mrb_i2c_init(int unit, uint32_t freq, int8_t sda, int8_t scl); +int mrb_i2c_read(int unit, uint8_t addr, uint8_t *dst, size_t len, uint32_t timeout_us); +int mrb_i2c_write(int unit, uint8_t addr, const uint8_t *src, size_t len, uint32_t timeout_us); +int mrb_i2c_write_read(int unit, uint8_t addr, const uint8_t *src, size_t wlen, + uint8_t *dst, size_t rlen, uint32_t timeout_us); +int mrb_i2c_unit_name_to_num(const char *name); + +#ifdef __cplusplus +} +#endif + +#endif /* MRUBY_I2C_H */ diff --git a/mrbgems/hw-i2c/mrbgem.rake b/mrbgems/hw-i2c/mrbgem.rake new file mode 100644 index 0000000000..584e2b2a24 --- /dev/null +++ b/mrbgems/hw-i2c/mrbgem.rake @@ -0,0 +1,5 @@ +MRuby::Gem::Specification.new('hw-i2c') do |spec| + spec.license = 'MIT' + spec.authors = ['HASUMI Hitoshi', 'mruby developers'] + spec.summary = 'I2C peripheral interface' +end diff --git a/mrbgems/hw-i2c/mrblib/i2c.rb b/mrbgems/hw-i2c/mrblib/i2c.rb new file mode 100644 index 0000000000..49e506c216 --- /dev/null +++ b/mrbgems/hw-i2c/mrblib/i2c.rb @@ -0,0 +1,21 @@ +class I2C + DEFAULT_FREQUENCY = 100_000 # Hz + DEFAULT_TIMEOUT = 500 # ms + + def initialize(unit:, frequency: DEFAULT_FREQUENCY, sda_pin: -1, scl_pin: -1, timeout: DEFAULT_TIMEOUT) + @timeout = timeout + @unit_num = __init(unit.to_s, frequency, sda_pin, scl_pin) + end + + def scan(timeout: @timeout) + found = [] + (0x08..0x77).each do |addr| + begin + read(addr, 1, timeout: timeout) + found << addr + rescue IOError + end + end + found + end +end diff --git a/mrbgems/hw-i2c/ports/esp32/i2c.c b/mrbgems/hw-i2c/ports/esp32/i2c.c new file mode 100644 index 0000000000..cfb7aedd6f --- /dev/null +++ b/mrbgems/hw-i2c/ports/esp32/i2c.c @@ -0,0 +1,117 @@ +#include +#include "driver/i2c_master.h" +#include + +typedef struct { + i2c_master_bus_handle_t bus; + uint32_t freq; + bool initialized; +} i2c_ctx; + +/* ESP32 supports up to 2 I2C ports */ +static i2c_ctx ctx[2]; + +static bool +valid_unit(int unit) +{ + return unit >= 0 && unit <= 1 && ctx[unit].initialized; +} + +static uint32_t +us_to_ms(uint32_t timeout_us) +{ + uint32_t ms = (timeout_us + 999) / 1000; + return (ms < 10) ? 10 : ms; +} + +static i2c_master_dev_handle_t +add_device(int unit, uint8_t addr) +{ + i2c_device_config_t cfg = { + .dev_addr_length = I2C_ADDR_BIT_LEN_7, + .device_address = addr, + .scl_speed_hz = ctx[unit].freq, + }; + i2c_master_dev_handle_t dev; + if (i2c_master_bus_add_device(ctx[unit].bus, &cfg, &dev) != ESP_OK) + return NULL; + return dev; +} + +int +mrb_i2c_unit_name_to_num(const char *name) +{ + if (strcmp(name, "ESP32_I2C0") == 0) return 0; + if (strcmp(name, "ESP32_I2C1") == 0) return 1; + return MRB_I2C_ERROR_UNIT; +} + +mrb_i2c_status +mrb_i2c_init(int unit, uint32_t freq, int8_t sda, int8_t scl) +{ + if (unit < 0 || unit > 1) return MRB_I2C_ERROR_UNIT; + + if (ctx[unit].initialized) { + i2c_del_master_bus(ctx[unit].bus); + ctx[unit].initialized = false; + } + + i2c_master_bus_config_t cfg = { + .clk_source = I2C_CLK_SRC_DEFAULT, + .i2c_port = unit, + .scl_io_num = scl, + .sda_io_num = sda, + .glitch_ignore_cnt = 7, + .flags.enable_internal_pullup = true, + }; + + esp_err_t err = i2c_new_master_bus(&cfg, &ctx[unit].bus); + if (err != ESP_OK) return MRB_I2C_ERROR_UNIT; + + ctx[unit].initialized = true; + ctx[unit].freq = freq; + return MRB_I2C_OK; +} + +int +mrb_i2c_read(int unit, uint8_t addr, uint8_t *dst, size_t len, + uint32_t timeout_us) +{ + if (!valid_unit(unit)) return MRB_I2C_ERROR_UNIT; + + i2c_master_dev_handle_t dev = add_device(unit, addr); + if (!dev) return -1; + + esp_err_t err = i2c_master_receive(dev, dst, len, us_to_ms(timeout_us)); + i2c_master_bus_rm_device(dev); + return (err == ESP_OK) ? (int)len : -1; +} + +int +mrb_i2c_write(int unit, uint8_t addr, const uint8_t *src, size_t len, + uint32_t timeout_us) +{ + if (!valid_unit(unit)) return MRB_I2C_ERROR_UNIT; + + i2c_master_dev_handle_t dev = add_device(unit, addr); + if (!dev) return -1; + + esp_err_t err = i2c_master_transmit(dev, src, len, us_to_ms(timeout_us)); + i2c_master_bus_rm_device(dev); + return (err == ESP_OK) ? (int)len : -1; +} + +int +mrb_i2c_write_read(int unit, uint8_t addr, const uint8_t *src, size_t wlen, + uint8_t *dst, size_t rlen, uint32_t timeout_us) +{ + if (!valid_unit(unit)) return MRB_I2C_ERROR_UNIT; + + i2c_master_dev_handle_t dev = add_device(unit, addr); + if (!dev) return -1; + + esp_err_t err = i2c_master_transmit_receive(dev, src, wlen, dst, rlen, + us_to_ms(timeout_us)); + i2c_master_bus_rm_device(dev); + return (err == ESP_OK) ? (int)rlen : -1; +} diff --git a/mrbgems/hw-i2c/ports/rp2040/i2c.c b/mrbgems/hw-i2c/ports/rp2040/i2c.c new file mode 100644 index 0000000000..a0fac95cca --- /dev/null +++ b/mrbgems/hw-i2c/ports/rp2040/i2c.c @@ -0,0 +1,64 @@ +#include +#include "pico/stdlib.h" +#include "hardware/i2c.h" +#include + +#define UNIT_SELECT(u) \ + i2c_inst_t *inst; \ + switch (u) { \ + case 0: inst = i2c0; break; \ + case 1: inst = i2c1; break; \ + default: return MRB_I2C_ERROR_UNIT; \ + } + +int +mrb_i2c_unit_name_to_num(const char *name) +{ + if (strcmp(name, "RP2040_I2C0") == 0) return 0; + if (strcmp(name, "RP2040_I2C1") == 0) return 1; + return MRB_I2C_ERROR_UNIT; +} + +mrb_i2c_status +mrb_i2c_init(int unit, uint32_t freq, int8_t sda, int8_t scl) +{ + UNIT_SELECT(unit); + i2c_init(inst, freq); + + if (sda < 0) sda = PICO_DEFAULT_I2C_SDA_PIN; + if (scl < 0) scl = PICO_DEFAULT_I2C_SCL_PIN; + gpio_set_function(sda, GPIO_FUNC_I2C); + gpio_set_function(scl, GPIO_FUNC_I2C); + gpio_pull_up(sda); + gpio_pull_up(scl); + + return MRB_I2C_OK; +} + +int +mrb_i2c_read(int unit, uint8_t addr, uint8_t *dst, size_t len, + uint32_t timeout_us) +{ + UNIT_SELECT(unit); + return i2c_read_timeout_us(inst, addr, dst, len, false, timeout_us); +} + +int +mrb_i2c_write(int unit, uint8_t addr, const uint8_t *src, size_t len, + uint32_t timeout_us) +{ + UNIT_SELECT(unit); + return i2c_write_timeout_us(inst, addr, src, len, false, timeout_us); +} + +int +mrb_i2c_write_read(int unit, uint8_t addr, const uint8_t *src, size_t wlen, + uint8_t *dst, size_t rlen, uint32_t timeout_us) +{ + UNIT_SELECT(unit); + /* write with nostop=true (no STOP, keeps bus for repeated START) */ + int ret = i2c_write_timeout_us(inst, addr, src, wlen, true, timeout_us); + if (ret < 0) return ret; + /* read with nostop=false (STOP after read) */ + return i2c_read_timeout_us(inst, addr, dst, rlen, false, timeout_us); +} diff --git a/mrbgems/hw-i2c/src/i2c.c b/mrbgems/hw-i2c/src/i2c.c new file mode 100644 index 0000000000..46aab65542 --- /dev/null +++ b/mrbgems/hw-i2c/src/i2c.c @@ -0,0 +1,200 @@ +#include +#include +#include +#include +#include +#include +#include + +#define STACK_BUF_SIZE 256 +#define E_IO_ERROR mrb_exc_get_id(mrb, MRB_SYM(IOError)) + +static size_t +i2c_fill_buf(mrb_state *mrb, uint8_t *buf, mrb_value *args, mrb_int argc) +{ + size_t pos = 0; + for (mrb_int i = 0; i < argc; i++) { + switch (mrb_type(args[i])) { + case MRB_TT_ARRAY: { + mrb_int alen = RARRAY_LEN(args[i]); + const mrb_value *aptr = RARRAY_PTR(args[i]); + for (mrb_int j = 0; j < alen; j++) { + if (!mrb_integer_p(aptr[j])) { + mrb_raise(mrb, E_TYPE_ERROR, "array element must be Integer"); + } + buf[pos++] = (uint8_t)mrb_integer(aptr[j]); + } + break; + } + case MRB_TT_INTEGER: + buf[pos++] = (uint8_t)mrb_integer(args[i]); + break; + case MRB_TT_STRING: + memcpy(&buf[pos], RSTRING_PTR(args[i]), RSTRING_LEN(args[i])); + pos += RSTRING_LEN(args[i]); + break; + default: + break; + } + } + return pos; +} + +static size_t +i2c_calc_size(mrb_state *mrb, mrb_value *args, mrb_int argc) +{ + size_t total = 0; + for (mrb_int i = 0; i < argc; i++) { + switch (mrb_type(args[i])) { + case MRB_TT_ARRAY: + total += RARRAY_LEN(args[i]); + break; + case MRB_TT_INTEGER: + total += 1; + break; + case MRB_TT_STRING: + total += RSTRING_LEN(args[i]); + break; + default: + mrb_raise(mrb, E_TYPE_ERROR, "Integer, Array, or String expected"); + } + } + return total; +} + +/* Allocate write buffer, fill it, return pointer and size. + Caller must free if need_free is set. */ +static uint8_t* +i2c_build_buf(mrb_state *mrb, mrb_value *args, mrb_int argc, + size_t *out_len, uint8_t *sbuf, mrb_bool *need_free) +{ + size_t total = i2c_calc_size(mrb, args, argc); + uint8_t *buf; + if (total <= STACK_BUF_SIZE) { + buf = sbuf; + *need_free = FALSE; + } + else { + buf = (uint8_t*)mrb_malloc(mrb, total); + *need_free = TRUE; + } + i2c_fill_buf(mrb, buf, args, argc); + *out_len = total; + return buf; +} + +static mrb_int +get_timeout(mrb_state *mrb, mrb_value self, mrb_value kw) +{ + if (mrb_undef_p(kw)) { + return mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(timeout))); + } + return mrb_integer(kw); +} + +static mrb_value +mrb_i2c_m_write(mrb_state *mrb, mrb_value self) +{ + mrb_value *args; + mrb_int argc, addr; + const mrb_sym kw_names[] = { MRB_SYM(timeout) }; + mrb_value kw_values[1]; + mrb_kwargs kwargs = { 1, 0, kw_names, kw_values, NULL }; + + mrb_get_args(mrb, "i*:", &addr, &args, &argc, &kwargs); + + mrb_int timeout_ms = get_timeout(mrb, self, kw_values[0]); + mrb_int unit = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(unit_num))); + + uint8_t sbuf[STACK_BUF_SIZE]; + size_t wlen; + mrb_bool need_free; + uint8_t *buf = i2c_build_buf(mrb, args, argc, &wlen, sbuf, &need_free); + + int ret = mrb_i2c_write((int)unit, (uint8_t)addr, buf, wlen, + (uint32_t)timeout_ms * 1000); + if (need_free) mrb_free(mrb, buf); + if (ret < 0) { + mrb_raise(mrb, E_IO_ERROR, "I2C write failed"); + } + return mrb_fixnum_value(ret); +} + +static mrb_value +mrb_i2c_m_read(mrb_state *mrb, mrb_value self) +{ + mrb_value *args; + mrb_int argc, addr, len; + const mrb_sym kw_names[] = { MRB_SYM(timeout) }; + mrb_value kw_values[1]; + mrb_kwargs kwargs = { 1, 0, kw_names, kw_values, NULL }; + + mrb_get_args(mrb, "ii*:", &addr, &len, &args, &argc, &kwargs); + + if (len <= 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "read length must be positive"); + } + + mrb_int timeout_ms = get_timeout(mrb, self, kw_values[0]); + mrb_int unit = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(unit_num))); + uint32_t timeout_us = (uint32_t)timeout_ms * 1000; + + uint8_t *rxbuf = (uint8_t*)mrb_malloc(mrb, len); + int ret; + + if (argc > 0) { + /* write-then-read (repeated START) */ + uint8_t sbuf[STACK_BUF_SIZE]; + size_t wlen; + mrb_bool need_free; + uint8_t *wbuf = i2c_build_buf(mrb, args, argc, &wlen, sbuf, &need_free); + + ret = mrb_i2c_write_read((int)unit, (uint8_t)addr, + wbuf, wlen, rxbuf, (size_t)len, timeout_us); + if (need_free) mrb_free(mrb, wbuf); + } + else { + ret = mrb_i2c_read((int)unit, (uint8_t)addr, rxbuf, (size_t)len, timeout_us); + } + + if (ret < 0) { + mrb_free(mrb, rxbuf); + mrb_raise(mrb, E_IO_ERROR, "I2C read failed"); + } + mrb_value str = mrb_str_new(mrb, (const char*)rxbuf, ret); + mrb_free(mrb, rxbuf); + return str; +} + +static mrb_value +mrb_i2c_m_init(mrb_state *mrb, mrb_value self) +{ + const char *unit; + mrb_int freq, sda, scl; + + mrb_get_args(mrb, "ziii", &unit, &freq, &sda, &scl); + + int num = mrb_i2c_unit_name_to_num(unit); + if (num < 0) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown I2C unit: %s", unit); + } + mrb_i2c_status st = mrb_i2c_init(num, (uint32_t)freq, (int8_t)sda, (int8_t)scl); + if (st != MRB_I2C_OK) { + mrb_raise(mrb, E_IO_ERROR, "I2C init failed"); + } + return mrb_fixnum_value(num); +} + +void +mrb_hw_i2c_gem_init(mrb_state *mrb) +{ + struct RClass *cls = mrb_define_class_id(mrb, MRB_SYM(I2C), mrb->object_class); + mrb_define_method_id(mrb, cls, MRB_SYM(__init), mrb_i2c_m_init, MRB_ARGS_REQ(4)); + mrb_define_method_id(mrb, cls, MRB_SYM(write), mrb_i2c_m_write, MRB_ARGS_REQ(1)|MRB_ARGS_REST()|MRB_ARGS_KEY(1, 0)); + mrb_define_method_id(mrb, cls, MRB_SYM(read), mrb_i2c_m_read, MRB_ARGS_REQ(2)|MRB_ARGS_REST()|MRB_ARGS_KEY(1, 0)); +} + +void +mrb_hw_i2c_gem_final(mrb_state *mrb) +{ +} diff --git a/mrbgems/hw-pwm/README.md b/mrbgems/hw-pwm/README.md new file mode 100644 index 0000000000..6a4ff01cea --- /dev/null +++ b/mrbgems/hw-pwm/README.md @@ -0,0 +1,87 @@ +# hw-pwm - PWM peripheral interface for mruby + +This gem provides the `PWM` class for Pulse Width Modulation +output from mruby. It is designed for embedded platforms such as +ESP32 and RP2040. + +## Architecture + +Platform-specific HAL implementations are in `ports/` directories: + +- `ports/esp32/` - ESP32 using LEDC peripheral +- `ports/rp2040/` - RP2040 using Pico SDK PWM hardware + +## Build Configuration + +```ruby +MRuby::CrossBuild.new('esp32') do |conf| + conf.ports :esp32 + conf.gem core: 'hw-pwm' +end +``` + +## Ruby API + +### PWM.new + +```ruby +pwm = PWM.new(pin, frequency: 1000, duty: 50) +``` + +- `pin` - GPIO pin number (Integer) +- `frequency:` - frequency in Hz (default: 0, disabled) +- `duty:` - duty cycle in percent 0-100 (default: 50) + +### PWM#frequency(freq) + +Set frequency in Hz. Returns the frequency. Setting 0 disables +output. + +```ruby +pwm.frequency(1000) # 1 kHz +pwm.frequency(0) # disable +``` + +### PWM#period_us(us) + +Set period in microseconds. Returns the corresponding frequency. + +```ruby +pwm.period_us(1000) # 1ms period = 1 kHz +``` + +### PWM#duty(pct) + +Set duty cycle in percent (0.0-100.0). Clamped to range. + +```ruby +pwm.duty(75.0) +``` + +### PWM#pulse_width_us(us) + +Set pulse width in microseconds. Duty cycle is calculated from +current frequency. + +```ruby +pwm.pulse_width_us(500) # 500us pulse width +``` + +## HAL Interface + +To add support for a new platform, create a `ports//` +directory and implement the following C functions declared in +``: + +```c +void mrb_pwm_init(uint32_t pin); +void mrb_pwm_set_freq_duty(uint32_t pin, float frequency, float duty); +void mrb_pwm_set_enabled(uint32_t pin, bool enabled); +``` + +- `frequency` is in Hz, `duty` is in percent (0-100) +- `mrb_pwm_set_enabled` is called with `false` when frequency is 0 + +## License + +MIT diff --git a/mrbgems/hw-pwm/include/mruby/pwm.h b/mrbgems/hw-pwm/include/mruby/pwm.h new file mode 100644 index 0000000000..f1f25d8004 --- /dev/null +++ b/mrbgems/hw-pwm/include/mruby/pwm.h @@ -0,0 +1,20 @@ +#ifndef MRUBY_PWM_H +#define MRUBY_PWM_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* HAL functions - implemented in ports//pwm.c */ +void mrb_pwm_init(uint32_t pin); +void mrb_pwm_set_freq_duty(uint32_t pin, float frequency, float duty); +void mrb_pwm_set_enabled(uint32_t pin, bool enabled); + +#ifdef __cplusplus +} +#endif + +#endif /* MRUBY_PWM_H */ diff --git a/mrbgems/hw-pwm/mrbgem.rake b/mrbgems/hw-pwm/mrbgem.rake new file mode 100644 index 0000000000..bc3d0f0499 --- /dev/null +++ b/mrbgems/hw-pwm/mrbgem.rake @@ -0,0 +1,5 @@ +MRuby::Gem::Specification.new('hw-pwm') do |spec| + spec.license = 'MIT' + spec.authors = ['HASUMI Hitoshi', 'mruby developers'] + spec.summary = 'PWM peripheral interface' +end diff --git a/mrbgems/hw-pwm/mrblib/pwm.rb b/mrbgems/hw-pwm/mrblib/pwm.rb new file mode 100644 index 0000000000..3e2cf82915 --- /dev/null +++ b/mrbgems/hw-pwm/mrblib/pwm.rb @@ -0,0 +1,9 @@ +class PWM + def initialize(pin, frequency: 0, duty: 50) + @pin = pin + __init(@pin) + @frequency = frequency.to_f + @duty = duty.to_f + frequency(@frequency) + end +end diff --git a/mrbgems/hw-pwm/ports/esp32/pwm.c b/mrbgems/hw-pwm/ports/esp32/pwm.c new file mode 100644 index 0000000000..e91f280fc1 --- /dev/null +++ b/mrbgems/hw-pwm/ports/esp32/pwm.c @@ -0,0 +1,59 @@ +#include "driver/ledc.h" +#include + +#define DUTY_RESOLUTION LEDC_TIMER_14_BIT + +static int8_t channel_for_gpio[GPIO_NUM_MAX]; +static int next_channel; + +void +mrb_pwm_init(uint32_t gpio) +{ + if (gpio >= GPIO_NUM_MAX) return; + if (next_channel >= LEDC_CHANNEL_MAX) return; + + ledc_timer_config_t timer_cfg = { + .speed_mode = LEDC_LOW_SPEED_MODE, + .timer_num = LEDC_TIMER_0, + .duty_resolution = DUTY_RESOLUTION, + .freq_hz = 1000, + .clk_cfg = LEDC_AUTO_CLK, + }; + ledc_timer_config(&timer_cfg); + + ledc_channel_config_t ch_cfg = { + .gpio_num = gpio, + .speed_mode = LEDC_LOW_SPEED_MODE, + .channel = next_channel, + .timer_sel = LEDC_TIMER_0, + .intr_type = LEDC_INTR_DISABLE, + .duty = 0, + .hpoint = 0, + }; + ledc_channel_config(&ch_cfg); + + channel_for_gpio[gpio] = next_channel++; +} + +void +mrb_pwm_set_freq_duty(uint32_t gpio, float frequency, float duty) +{ + if (gpio >= GPIO_NUM_MAX) return; + + ledc_set_freq(LEDC_LOW_SPEED_MODE, LEDC_TIMER_0, (uint32_t)frequency); + + int8_t ch = channel_for_gpio[gpio]; + uint32_t max_duty = (1 << DUTY_RESOLUTION) - 1; + uint32_t d = (uint32_t)(duty * max_duty / 100.0f); + ledc_set_duty(LEDC_LOW_SPEED_MODE, ch, d); + ledc_update_duty(LEDC_LOW_SPEED_MODE, ch); +} + +void +mrb_pwm_set_enabled(uint32_t gpio, bool enabled) +{ + if (!enabled) { + int8_t ch = channel_for_gpio[gpio]; + ledc_stop(LEDC_LOW_SPEED_MODE, ch, 0); + } +} diff --git a/mrbgems/hw-pwm/ports/rp2040/pwm.c b/mrbgems/hw-pwm/ports/rp2040/pwm.c new file mode 100644 index 0000000000..5800df3f4f --- /dev/null +++ b/mrbgems/hw-pwm/ports/rp2040/pwm.c @@ -0,0 +1,33 @@ +#include "pico/stdlib.h" +#include "hardware/pwm.h" +#include + +#define APB_CLK_FREQ 125000000 +#define CLK_DIV 100.0f + +void +mrb_pwm_init(uint32_t pin) +{ + gpio_set_function(pin, GPIO_FUNC_PWM); + uint slice = pwm_gpio_to_slice_num(pin); + pwm_set_clkdiv(slice, CLK_DIV); +} + +void +mrb_pwm_set_freq_duty(uint32_t pin, float frequency, float duty) +{ + uint slice = pwm_gpio_to_slice_num(pin); + uint channel = pwm_gpio_to_channel(pin); + float period = 1.0f / frequency; + uint16_t wrap = (uint16_t)(period * APB_CLK_FREQ / CLK_DIV); + pwm_set_wrap(slice, wrap); + uint16_t level = (uint16_t)(wrap * duty / 100.0f); + pwm_set_chan_level(slice, channel, level); +} + +void +mrb_pwm_set_enabled(uint32_t pin, bool enabled) +{ + uint slice = pwm_gpio_to_slice_num(pin); + pwm_set_enabled(slice, enabled); +} diff --git a/mrbgems/hw-pwm/src/pwm.c b/mrbgems/hw-pwm/src/pwm.c new file mode 100644 index 0000000000..6cae315553 --- /dev/null +++ b/mrbgems/hw-pwm/src/pwm.c @@ -0,0 +1,90 @@ +#include +#include +#include +#include + +static mrb_value +mrb_pwm_m_init(mrb_state *mrb, mrb_value self) +{ + mrb_int pin; + mrb_get_args(mrb, "i", &pin); + mrb_pwm_init((uint32_t)pin); + return mrb_nil_value(); +} + +static void +apply_freq_duty(mrb_state *mrb, mrb_value self) +{ + uint32_t pin = (uint32_t)mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(pin))); + mrb_float freq = mrb_as_float(mrb, mrb_iv_get(mrb, self, MRB_IVSYM(frequency))); + mrb_float duty = mrb_as_float(mrb, mrb_iv_get(mrb, self, MRB_IVSYM(duty))); + mrb_pwm_set_freq_duty(pin, (float)freq, (float)duty); + mrb_pwm_set_enabled(pin, freq > 0); +} + +/* PWM#frequency(freq) */ +static mrb_value +mrb_pwm_m_frequency(mrb_state *mrb, mrb_value self) +{ + mrb_float freq; + mrb_get_args(mrb, "f", &freq); + mrb_iv_set(mrb, self, MRB_IVSYM(frequency), mrb_float_value(mrb, freq)); + apply_freq_duty(mrb, self); + return mrb_float_value(mrb, freq); +} + +/* PWM#period_us(us) */ +static mrb_value +mrb_pwm_m_period_us(mrb_state *mrb, mrb_value self) +{ + mrb_int us; + mrb_get_args(mrb, "i", &us); + mrb_float freq = 1000000.0 / us; + mrb_iv_set(mrb, self, MRB_IVSYM(frequency), mrb_float_value(mrb, freq)); + apply_freq_duty(mrb, self); + return mrb_float_value(mrb, freq); +} + +/* PWM#duty(pct) */ +static mrb_value +mrb_pwm_m_duty(mrb_state *mrb, mrb_value self) +{ + mrb_float duty; + mrb_get_args(mrb, "f", &duty); + if (duty < 0.0) duty = 0.0; + if (duty > 100.0) duty = 100.0; + mrb_iv_set(mrb, self, MRB_IVSYM(duty), mrb_float_value(mrb, duty)); + apply_freq_duty(mrb, self); + return mrb_float_value(mrb, duty); +} + +/* PWM#pulse_width_us(us) */ +static mrb_value +mrb_pwm_m_pulse_width_us(mrb_state *mrb, mrb_value self) +{ + mrb_int pw; + mrb_get_args(mrb, "i", &pw); + mrb_float freq = mrb_as_float(mrb, mrb_iv_get(mrb, self, MRB_IVSYM(frequency))); + mrb_float duty = (mrb_float)pw / 10000.0 * freq; + if (duty < 0.0) duty = 0.0; + if (duty > 100.0) duty = 100.0; + mrb_iv_set(mrb, self, MRB_IVSYM(duty), mrb_float_value(mrb, duty)); + apply_freq_duty(mrb, self); + return mrb_float_value(mrb, duty); +} + +void +mrb_hw_pwm_gem_init(mrb_state *mrb) +{ + struct RClass *cls = mrb_define_class_id(mrb, MRB_SYM(PWM), mrb->object_class); + mrb_define_method_id(mrb, cls, MRB_SYM(__init), mrb_pwm_m_init, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, cls, MRB_SYM(frequency), mrb_pwm_m_frequency, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, cls, MRB_SYM(period_us), mrb_pwm_m_period_us, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, cls, MRB_SYM(duty), mrb_pwm_m_duty, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, cls, MRB_SYM(pulse_width_us), mrb_pwm_m_pulse_width_us, MRB_ARGS_REQ(1)); +} + +void +mrb_hw_pwm_gem_final(mrb_state *mrb) +{ +} diff --git a/mrbgems/hw-spi/README.md b/mrbgems/hw-spi/README.md new file mode 100644 index 0000000000..a29403bc58 --- /dev/null +++ b/mrbgems/hw-spi/README.md @@ -0,0 +1,113 @@ +# hw-spi - SPI peripheral interface for mruby + +This gem provides the `SPI` class for Serial Peripheral Interface +communication from mruby. It is designed for embedded platforms +such as ESP32 and RP2040. + +## Architecture + +Platform-specific HAL implementations are in `ports/` directories: + +- `ports/esp32/` - ESP32 using ESP-IDF SPI master driver +- `ports/rp2040/` - RP2040 using Pico SDK + +The build system automatically compiles matching port sources based +on `conf.ports` setting. + +## Build Configuration + +```ruby +MRuby::CrossBuild.new('esp32') do |conf| + conf.ports :esp32 + conf.gem core: 'hw-spi' +end +``` + +## Ruby API + +### SPI.new + +```ruby +spi = SPI.new( + unit: :RP2040_SPI0, # SPI unit name (required) + frequency: 100_000, # clock frequency in Hz (default: 100kHz) + sck_pin: -1, # SCK GPIO pin (default: platform default) + copi_pin: -1, # COPI/MOSI GPIO pin + cipo_pin: -1, # CIPO/MISO GPIO pin + cs_pin: -1, # CS GPIO pin (-1 for manual control) + mode: 0, # SPI mode 0-3 (default: 0) + first_bit: SPI::MSB_FIRST # bit order (default: MSB_FIRST) +) +``` + +#### Unit Names + +| Platform | Available Units | +| -------- | ------------------------------------------ | +| ESP32 | `:ESP32_SPI2_HOST`, `:ESP32_HSPI_HOST`, | +| | `:ESP32_SPI3_HOST`\*, `:ESP32_VSPI_HOST`\* | +| RP2040 | `:RP2040_SPI0`, `:RP2040_SPI1` | + +\*SPI3 availability depends on ESP32 variant. + +### `SPI#write(*data)` + +Write data to the SPI bus. Data can be Integer, Array, or String. + +```ruby +spi.write(0x01, [0x02, 0x03]) +``` + +### SPI#read(len, tx_value = 0) + +Read `len` bytes. Optionally specify the value to transmit during +read. + +```ruby +data = spi.read(4) # transmits 0x00 while reading +data = spi.read(4, 0xFF) # transmits 0xFF while reading +``` + +### `SPI#transfer(*data, additional_read_bytes: 0)` + +Full-duplex transfer. Sends data and returns received bytes. +Use `additional_read_bytes:` to append zero-filled read bytes. + +```ruby +# Send 1 byte command, read 4 bytes response +rx = spi.transfer(0x9F, additional_read_bytes: 4) +``` + +### SPI#select / SPI#deselect + +Manually control the CS pin (when using GPIO-based chip select). + +```ruby +spi.select do |s| + s.write(0x01) + data = s.read(4) +end # CS automatically deasserted +``` + +## HAL Interface + +To add support for a new platform, create a `ports//` +directory and implement the following C functions declared in +``: + +```c +int mrb_spi_unit_name_to_num(const char *name); +mrb_spi_status mrb_spi_init(mrb_spi_info *info); +int mrb_spi_read(mrb_spi_info *info, uint8_t *dst, size_t len, + uint8_t tx_val); +int mrb_spi_write(mrb_spi_info *info, const uint8_t *src, size_t len); +int mrb_spi_transfer(mrb_spi_info *info, const uint8_t *tx, + uint8_t *rx, size_t len); +``` + +The `mrb_spi_info` struct contains all configuration (unit, pins, +frequency, mode, bit order) and is passed to every HAL call. + +## License + +MIT diff --git a/mrbgems/hw-spi/include/mruby/spi.h b/mrbgems/hw-spi/include/mruby/spi.h new file mode 100644 index 0000000000..9be4fc55b2 --- /dev/null +++ b/mrbgems/hw-spi/include/mruby/spi.h @@ -0,0 +1,44 @@ +#ifndef MRUBY_SPI_H +#define MRUBY_SPI_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define MRB_SPI_MSB_FIRST 1 +#define MRB_SPI_LSB_FIRST 0 + +typedef enum { + MRB_SPI_OK = 0, + MRB_SPI_ERROR_UNIT = -1, + MRB_SPI_ERROR_MODE = -2, + MRB_SPI_ERROR_FIRST_BIT = -3, + MRB_SPI_ERROR_INIT = -4, +} mrb_spi_status; + +typedef struct { + uint32_t frequency; + uint8_t unit_num; + int8_t sck_pin; + int8_t copi_pin; + int8_t cipo_pin; + int8_t cs_pin; + uint8_t mode; + uint8_t first_bit; +} mrb_spi_info; + +/* HAL functions - implemented in ports//spi.c */ +int mrb_spi_unit_name_to_num(const char *name); +mrb_spi_status mrb_spi_init(mrb_spi_info *info); +int mrb_spi_read(mrb_spi_info *info, uint8_t *dst, size_t len, uint8_t tx_val); +int mrb_spi_write(mrb_spi_info *info, const uint8_t *src, size_t len); +int mrb_spi_transfer(mrb_spi_info *info, const uint8_t *tx, uint8_t *rx, size_t len); + +#ifdef __cplusplus +} +#endif + +#endif /* MRUBY_SPI_H */ diff --git a/mrbgems/hw-spi/mrbgem.rake b/mrbgems/hw-spi/mrbgem.rake new file mode 100644 index 0000000000..f2f1149adc --- /dev/null +++ b/mrbgems/hw-spi/mrbgem.rake @@ -0,0 +1,5 @@ +MRuby::Gem::Specification.new('hw-spi') do |spec| + spec.license = 'MIT' + spec.authors = ['HASUMI Hitoshi', 'mruby developers'] + spec.summary = 'SPI peripheral interface' +end diff --git a/mrbgems/hw-spi/mrblib/spi.rb b/mrbgems/hw-spi/mrblib/spi.rb new file mode 100644 index 0000000000..d0d6db5192 --- /dev/null +++ b/mrbgems/hw-spi/mrblib/spi.rb @@ -0,0 +1,20 @@ +class SPI + MSB_FIRST = 1 + LSB_FIRST = 0 + DEFAULT_FREQUENCY = 100_000 + + def select + @cs&.write 0 + if block_given? + begin + yield self + ensure + deselect + end + end + end + + def deselect + @cs&.write 1 + end +end diff --git a/mrbgems/hw-spi/ports/esp32/spi.c b/mrbgems/hw-spi/ports/esp32/spi.c new file mode 100644 index 0000000000..9557d6584f --- /dev/null +++ b/mrbgems/hw-spi/ports/esp32/spi.c @@ -0,0 +1,85 @@ +#include +#include "driver/spi_master.h" +#include + +static spi_device_handle_t handles[SPI_HOST_MAX]; + +int +mrb_spi_unit_name_to_num(const char *name) +{ + if (strcmp(name, "ESP32_SPI2_HOST") == 0) return SPI2_HOST; + if (strcmp(name, "ESP32_HSPI_HOST") == 0) return SPI2_HOST; +#if (SOC_SPI_PERIPH_NUM == 3) + if (strcmp(name, "ESP32_SPI3_HOST") == 0) return SPI3_HOST; + if (strcmp(name, "ESP32_VSPI_HOST") == 0) return SPI3_HOST; +#endif + return MRB_SPI_ERROR_UNIT; +} + +mrb_spi_status +mrb_spi_init(mrb_spi_info *info) +{ + if (handles[info->unit_num] != NULL) return MRB_SPI_OK; + + spi_bus_config_t buscfg = { + .mosi_io_num = info->copi_pin, + .miso_io_num = info->cipo_pin, + .sclk_io_num = info->sck_pin, + .quadwp_io_num = -1, + .quadhd_io_num = -1, + }; + + esp_err_t err = spi_bus_initialize(info->unit_num, &buscfg, SPI_DMA_CH_AUTO); + if (err != ESP_OK) return MRB_SPI_ERROR_INIT; + + spi_device_interface_config_t devcfg = { + .clock_speed_hz = info->frequency, + .mode = info->mode, + .spics_io_num = info->cs_pin, + .queue_size = 7, + }; + + err = spi_bus_add_device(info->unit_num, &devcfg, &handles[info->unit_num]); + if (err != ESP_OK) { + spi_bus_free(info->unit_num); + handles[info->unit_num] = NULL; + return MRB_SPI_ERROR_INIT; + } + return MRB_SPI_OK; +} + +int +mrb_spi_read(mrb_spi_info *info, uint8_t *dst, size_t len, uint8_t tx_val) +{ + spi_transaction_t t = { + .length = len * 8, + .tx_buffer = NULL, + .rx_buffer = dst, + }; + esp_err_t err = spi_device_polling_transmit(handles[info->unit_num], &t); + return (err == ESP_OK) ? (int)len : -1; +} + +int +mrb_spi_write(mrb_spi_info *info, const uint8_t *src, size_t len) +{ + spi_transaction_t t = { + .length = len * 8, + .tx_buffer = src, + .rx_buffer = NULL, + }; + esp_err_t err = spi_device_polling_transmit(handles[info->unit_num], &t); + return (err == ESP_OK) ? (int)len : -1; +} + +int +mrb_spi_transfer(mrb_spi_info *info, const uint8_t *tx, uint8_t *rx, size_t len) +{ + spi_transaction_t t = { + .length = len * 8, + .tx_buffer = tx, + .rx_buffer = rx, + }; + esp_err_t err = spi_device_polling_transmit(handles[info->unit_num], &t); + return (err == ESP_OK) ? (int)len : -1; +} diff --git a/mrbgems/hw-spi/ports/rp2040/spi.c b/mrbgems/hw-spi/ports/rp2040/spi.c new file mode 100644 index 0000000000..9210e1c2a7 --- /dev/null +++ b/mrbgems/hw-spi/ports/rp2040/spi.c @@ -0,0 +1,73 @@ +#include +#include "pico/stdlib.h" +#include "hardware/spi.h" +#include + +#define UNIT_SELECT(info) \ + spi_inst_t *inst; \ + switch ((info)->unit_num) { \ + case 0: inst = spi0; break; \ + case 1: inst = spi1; break; \ + default: return MRB_SPI_ERROR_UNIT; \ + } + +int +mrb_spi_unit_name_to_num(const char *name) +{ + if (strcmp(name, "RP2040_SPI0") == 0) return 0; + if (strcmp(name, "RP2040_SPI1") == 0) return 1; + return MRB_SPI_ERROR_UNIT; +} + +mrb_spi_status +mrb_spi_init(mrb_spi_info *info) +{ + UNIT_SELECT(info); + spi_init(inst, info->frequency); + + if (info->sck_pin < 0) info->sck_pin = PICO_DEFAULT_SPI_SCK_PIN; + if (info->cipo_pin < 0) info->cipo_pin = PICO_DEFAULT_SPI_RX_PIN; + if (info->copi_pin < 0) info->copi_pin = PICO_DEFAULT_SPI_TX_PIN; + + gpio_set_function(info->sck_pin, GPIO_FUNC_SPI); + gpio_set_function(info->cipo_pin, GPIO_FUNC_SPI); + gpio_set_function(info->copi_pin, GPIO_FUNC_SPI); + + if (info->first_bit != MRB_SPI_MSB_FIRST) { + return MRB_SPI_ERROR_FIRST_BIT; + } + + spi_cpol_t cpol; + spi_cpha_t cpha; + switch (info->mode) { + case 0: cpol = 0; cpha = 0; break; + case 1: cpol = 0; cpha = 1; break; + case 2: cpol = 1; cpha = 0; break; + case 3: cpol = 1; cpha = 1; break; + default: return MRB_SPI_ERROR_MODE; + } + spi_set_format(inst, 8, cpol, cpha, info->first_bit); + + return MRB_SPI_OK; +} + +int +mrb_spi_read(mrb_spi_info *info, uint8_t *dst, size_t len, uint8_t tx_val) +{ + UNIT_SELECT(info); + return spi_read_blocking(inst, tx_val, dst, len); +} + +int +mrb_spi_write(mrb_spi_info *info, const uint8_t *src, size_t len) +{ + UNIT_SELECT(info); + return spi_write_blocking(inst, src, len); +} + +int +mrb_spi_transfer(mrb_spi_info *info, const uint8_t *tx, uint8_t *rx, size_t len) +{ + UNIT_SELECT(info); + return spi_write_read_blocking(inst, tx, rx, len); +} diff --git a/mrbgems/hw-spi/src/spi.c b/mrbgems/hw-spi/src/spi.c new file mode 100644 index 0000000000..2b75930c87 --- /dev/null +++ b/mrbgems/hw-spi/src/spi.c @@ -0,0 +1,235 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define STACK_BUF_SIZE 256 +#define E_IO_ERROR mrb_exc_get_id(mrb, MRB_SYM(IOError)) + +static void +spi_info_free(mrb_state *mrb, void *ptr) +{ + mrb_free(mrb, ptr); +} + +static const struct mrb_data_type spi_info_type = { "SPI", spi_info_free }; + +#define SPI_INFO(self) \ + ((mrb_spi_info*)mrb_data_get_ptr(mrb, self, &spi_info_type)) + +static size_t +spi_calc_size(mrb_state *mrb, mrb_value *args, mrb_int argc) +{ + size_t total = 0; + for (mrb_int i = 0; i < argc; i++) { + switch (mrb_type(args[i])) { + case MRB_TT_ARRAY: + total += RARRAY_LEN(args[i]); + break; + case MRB_TT_INTEGER: + total += 1; + break; + case MRB_TT_STRING: + total += RSTRING_LEN(args[i]); + break; + default: + mrb_raise(mrb, E_TYPE_ERROR, "Integer, Array, or String expected"); + } + } + return total; +} + +static void +spi_fill_buf(mrb_state *mrb, uint8_t *buf, mrb_value *args, mrb_int argc, + size_t pad) +{ + size_t pos = 0; + for (mrb_int i = 0; i < argc; i++) { + switch (mrb_type(args[i])) { + case MRB_TT_ARRAY: { + mrb_int alen = RARRAY_LEN(args[i]); + const mrb_value *aptr = RARRAY_PTR(args[i]); + for (mrb_int j = 0; j < alen; j++) { + if (!mrb_integer_p(aptr[j])) { + mrb_raise(mrb, E_TYPE_ERROR, "array element must be Integer"); + } + buf[pos++] = (uint8_t)mrb_integer(aptr[j]); + } + break; + } + case MRB_TT_INTEGER: + buf[pos++] = (uint8_t)mrb_integer(args[i]); + break; + case MRB_TT_STRING: + memcpy(&buf[pos], RSTRING_PTR(args[i]), RSTRING_LEN(args[i])); + pos += RSTRING_LEN(args[i]); + break; + default: + break; + } + } + memset(&buf[pos], 0, pad); +} + +/* SPI.new(unit:, frequency:, sck_pin:, cipo_pin:, copi_pin:, + cs_pin:, mode:, first_bit:) */ +static mrb_value +mrb_spi_s_new(mrb_state *mrb, mrb_value klass) +{ + const char *unit_name; + mrb_int freq = 100000, sck = -1, cipo = -1, copi = -1, cs = -1; + mrb_int mode = 0, first_bit = MRB_SPI_MSB_FIRST; + + const mrb_sym kw_names[] = { + MRB_SYM(unit), MRB_SYM(frequency), MRB_SYM(sck_pin), + MRB_SYM(cipo_pin), MRB_SYM(copi_pin), MRB_SYM(cs_pin), + MRB_SYM(mode), MRB_SYM(first_bit) + }; + mrb_value kw_values[8]; + mrb_kwargs kwargs = { 8, 7, kw_names, kw_values, NULL }; + mrb_get_args(mrb, ":", &kwargs); + + /* unit is required */ + if (mrb_undef_p(kw_values[0])) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "unit: is required"); + } + unit_name = mrb_str_to_cstr(mrb, mrb_sym_str(mrb, mrb_symbol(kw_values[0]))); + + if (!mrb_undef_p(kw_values[1])) freq = mrb_integer(kw_values[1]); + if (!mrb_undef_p(kw_values[2])) sck = mrb_integer(kw_values[2]); + if (!mrb_undef_p(kw_values[3])) cipo = mrb_integer(kw_values[3]); + if (!mrb_undef_p(kw_values[4])) copi = mrb_integer(kw_values[4]); + if (!mrb_undef_p(kw_values[5])) cs = mrb_integer(kw_values[5]); + if (!mrb_undef_p(kw_values[6])) mode = mrb_integer(kw_values[6]); + if (!mrb_undef_p(kw_values[7])) first_bit = mrb_integer(kw_values[7]); + + int num = mrb_spi_unit_name_to_num(unit_name); + if (num < 0) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown SPI unit: %s", unit_name); + } + + mrb_spi_info *info = (mrb_spi_info*)mrb_malloc(mrb, sizeof(mrb_spi_info)); + info->unit_num = (uint8_t)num; + info->frequency = (uint32_t)freq; + info->sck_pin = (int8_t)sck; + info->cipo_pin = (int8_t)cipo; + info->copi_pin = (int8_t)copi; + info->cs_pin = (int8_t)cs; + info->mode = (uint8_t)mode; + info->first_bit = (uint8_t)first_bit; + + mrb_value self = mrb_obj_value( + Data_Wrap_Struct(mrb, mrb_class_ptr(klass), &spi_info_type, info)); + + mrb_spi_status st = mrb_spi_init(info); + if (st != MRB_SPI_OK) { + mrb_raise(mrb, E_IO_ERROR, "SPI init failed"); + } + return self; +} + +/* SPI#write(*data) */ +static mrb_value +mrb_spi_m_write(mrb_state *mrb, mrb_value self) +{ + mrb_value *args; + mrb_int argc; + mrb_get_args(mrb, "*", &args, &argc); + + mrb_spi_info *info = SPI_INFO(self); + size_t total = spi_calc_size(mrb, args, argc); + if (total == 0) return mrb_fixnum_value(0); + + uint8_t sbuf[STACK_BUF_SIZE]; + uint8_t *buf = sbuf; + mrb_bool need_free = FALSE; + if (total > STACK_BUF_SIZE) { + buf = (uint8_t*)mrb_malloc(mrb, total); + need_free = TRUE; + } + spi_fill_buf(mrb, buf, args, argc, 0); + + int ret = mrb_spi_write(info, buf, total); + if (need_free) mrb_free(mrb, buf); + if (ret < 0) mrb_raise(mrb, E_IO_ERROR, "SPI write failed"); + return mrb_fixnum_value(ret); +} + +/* SPI#read(len, tx_value=0) */ +static mrb_value +mrb_spi_m_read(mrb_state *mrb, mrb_value self) +{ + mrb_int len, tx_val = 0; + mrb_get_args(mrb, "i|i", &len, &tx_val); + if (len <= 0) mrb_raise(mrb, E_ARGUMENT_ERROR, "length must be positive"); + + mrb_spi_info *info = SPI_INFO(self); + uint8_t *buf = (uint8_t*)mrb_malloc(mrb, len); + int ret = mrb_spi_read(info, buf, (size_t)len, (uint8_t)tx_val); + if (ret < 0) { + mrb_free(mrb, buf); + mrb_raise(mrb, E_IO_ERROR, "SPI read failed"); + } + mrb_value str = mrb_str_new(mrb, (const char*)buf, ret); + mrb_free(mrb, buf); + return str; +} + +/* SPI#transfer(*data, additional_read_bytes: 0) */ +static mrb_value +mrb_spi_m_transfer(mrb_state *mrb, mrb_value self) +{ + mrb_value *args; + mrb_int argc, extra = 0; + const mrb_sym kw_names[] = { MRB_SYM(additional_read_bytes) }; + mrb_value kw_values[1]; + mrb_kwargs kwargs = { 1, 0, kw_names, kw_values, NULL }; + mrb_get_args(mrb, "*:", &args, &argc, &kwargs); + + if (!mrb_undef_p(kw_values[0])) extra = mrb_integer(kw_values[0]); + + mrb_spi_info *info = SPI_INFO(self); + size_t total = spi_calc_size(mrb, args, argc) + (size_t)extra; + if (total == 0) return mrb_str_new(mrb, "", 0); + + uint8_t sbuf_tx[STACK_BUF_SIZE], sbuf_rx[STACK_BUF_SIZE]; + uint8_t *tx = sbuf_tx, *rx = sbuf_rx; + mrb_bool need_free = FALSE; + if (total > STACK_BUF_SIZE) { + tx = (uint8_t*)mrb_malloc(mrb, total * 2); + rx = tx + total; + need_free = TRUE; + } + spi_fill_buf(mrb, tx, args, argc, (size_t)extra); + + int ret = mrb_spi_transfer(info, tx, rx, total); + if (ret < 0) { + if (need_free) mrb_free(mrb, tx); + mrb_raise(mrb, E_IO_ERROR, "SPI transfer failed"); + } + mrb_value str = mrb_str_new(mrb, (const char*)rx, ret); + if (need_free) mrb_free(mrb, tx); + return str; +} + +void +mrb_hw_spi_gem_init(mrb_state *mrb) +{ + struct RClass *cls = mrb_define_class_id(mrb, MRB_SYM(SPI), mrb->object_class); + MRB_SET_INSTANCE_TT(cls, MRB_TT_CDATA); + + mrb_define_class_method_id(mrb, cls, MRB_SYM(new), mrb_spi_s_new, MRB_ARGS_KEY(8, 1)); + mrb_define_method_id(mrb, cls, MRB_SYM(write), mrb_spi_m_write, MRB_ARGS_REST()); + mrb_define_method_id(mrb, cls, MRB_SYM(read), mrb_spi_m_read, MRB_ARGS_ARG(1, 1)); + mrb_define_method_id(mrb, cls, MRB_SYM(transfer), mrb_spi_m_transfer, MRB_ARGS_REST()|MRB_ARGS_KEY(1, 0)); +} + +void +mrb_hw_spi_gem_final(mrb_state *mrb) +{ +} diff --git a/mrbgems/hw-uart/README.md b/mrbgems/hw-uart/README.md new file mode 100644 index 0000000000..23769cb3ae --- /dev/null +++ b/mrbgems/hw-uart/README.md @@ -0,0 +1,181 @@ +# hw-uart - UART peripheral interface for mruby + +This gem provides the `UART` class for serial communication from mruby. +It is designed for embedded platforms such as ESP32 and RP2040. + +## Architecture + +Platform-specific HAL implementations are in `ports/` directories: + +- `ports/esp32/` - ESP32 using ESP-IDF UART driver with FreeRTOS + task for RX +- `ports/rp2040/` - RP2040 using Pico SDK with IRQ-driven RX + +Received data is buffered in a ring buffer (allocated by the common +gem) that the platform HAL populates via interrupt or task. The ring +buffer size must be a power of two. + +## Build Configuration + +```ruby +# For ESP32 +MRuby::CrossBuild.new('esp32') do |conf| + conf.ports :esp32 + conf.gem core: 'hw-uart' +end + +# For RP2040 +MRuby::CrossBuild.new('rp2040') do |conf| + conf.ports :rp2040 + conf.gem core: 'hw-uart' +end +``` + +## Ruby API + +### Constants + +| Constant | Value | Description | +| ---------------------------- | ----- | ------------------ | +| `UART::PARITY_NONE` | `0` | No parity | +| `UART::PARITY_EVEN` | `1` | Even parity | +| `UART::PARITY_ODD` | `2` | Odd parity | +| `UART::FLOW_CONTROL_NONE` | `0` | No flow control | +| `UART::FLOW_CONTROL_RTS_CTS` | `1` | Hardware flow ctrl | + +### UART.new + +```ruby +uart = UART.new( + unit: :ESP32_UART1, # UART unit name (required) + tx_pin: 17, # TX GPIO pin (default: -1) + rx_pin: 16, # RX GPIO pin (default: -1) + baudrate: 9600, # baud rate (default: 9600) + data_bits: 8, # 5-8 (default: 8) + stop_bits: 1, # 1-2 (default: 1) + parity: UART::PARITY_NONE, + flow_control: UART::FLOW_CONTROL_NONE, + rx_buffer_size: 256 # must be power of two (default: 256) +) +``` + +#### Unit Names + +| Platform | Available Units | +| -------- | ------------------------------------------------ | +| ESP32 | `:ESP32_UART0`, `:ESP32_UART1`, `:ESP32_UART2`\* | +| RP2040 | `:RP2040_UART0`, `:RP2040_UART1` | + +\*UART2 availability depends on ESP32 variant. + +### Instance Methods + +#### UART#write(str) + +Write a string to the UART. Returns number of bytes written. + +```ruby +uart.write("Hello\r\n") +``` + +#### UART#read(len = nil) + +Read from the RX buffer. Returns `nil` if no data is available. + +- Without argument: returns all available data +- With `len`: returns exactly `len` bytes, or `nil` if fewer are + available + +```ruby +data = uart.read # all available +data = uart.read(10) # exactly 10 bytes or nil +``` + +#### UART#readpartial(maxlen) + +Read up to `maxlen` bytes from the RX buffer. Returns `nil` if empty. + +```ruby +data = uart.readpartial(64) +``` + +#### UART#gets + +Read a line (up to and including `"\n"`). Returns `nil` if no +complete line is available. + +```ruby +line = uart.gets +``` + +#### UART#bytes_available + +Returns the number of bytes in the RX buffer. + +```ruby +n = uart.bytes_available +``` + +#### UART#puts(str) + +Write string with line ending appended (if not already present). + +```ruby +uart.puts("Hello") # writes "Hello\n" +``` + +#### UART#flush + +Wait for all TX data to be sent. + +#### UART#clear_rx_buffer / UART#clear_tx_buffer + +Discard buffered data. + +#### UART#send_break(duration_ms = 100) + +Send a UART break signal for the specified duration. + +#### UART#setmode(baudrate:, data_bits:, stop_bits:, parity:, flow_control:) + +Reconfigure UART parameters after initialization. All parameters are +optional. + +#### UART#baudrate + +Returns the current baud rate. + +#### UART#line_ending=(ending) + +Set the line ending used by `puts`. Must be `"\n"`, `"\r"`, or +`"\r\n"`. + +## HAL Interface + +To add support for a new platform, create a `ports//` +directory and implement the following C functions declared in +``: + +```c +int mrb_uart_unit_name_to_num(const char *name); +mrb_uart_status mrb_uart_init(int unit, uint32_t tx_pin, uint32_t rx_pin, + mrb_uart_ringbuf *rxbuf); +uint32_t mrb_uart_set_baudrate(int unit, uint32_t baudrate); +void mrb_uart_set_format(int unit, uint32_t data_bits, + uint32_t stop_bits, uint8_t parity); +void mrb_uart_set_flow_control(int unit, bool cts, bool rts); +void mrb_uart_write(int unit, const uint8_t *src, size_t len); +void mrb_uart_flush(int unit); +void mrb_uart_send_break(int unit, uint32_t duration_ms); +void mrb_uart_clear_rx(int unit); +void mrb_uart_clear_tx(int unit); +``` + +The `rxbuf` parameter passed to `mrb_uart_init` is a ring buffer +allocated by the common gem. The platform must arrange for received +bytes to be pushed into it using `mrb_uart_ringbuf_push()` (e.g., +from an interrupt handler or RTOS task). + +## License + +MIT diff --git a/mrbgems/hw-uart/include/mruby/uart.h b/mrbgems/hw-uart/include/mruby/uart.h new file mode 100644 index 0000000000..d38c2052b1 --- /dev/null +++ b/mrbgems/hw-uart/include/mruby/uart.h @@ -0,0 +1,60 @@ +#ifndef MRUBY_UART_H +#define MRUBY_UART_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define MRB_UART_PARITY_NONE 0 +#define MRB_UART_PARITY_EVEN 1 +#define MRB_UART_PARITY_ODD 2 + +#define MRB_UART_FLOW_NONE 0 +#define MRB_UART_FLOW_RTS_CTS 1 + +typedef enum { + MRB_UART_OK = 0, + MRB_UART_ERROR_UNIT = -1, +} mrb_uart_status; + +/* Ring buffer for interrupt-driven RX. + Allocated by common gem, populated by platform interrupt handler. + size must be a power of two. */ +typedef struct { + volatile int head; + volatile int tail; + int mask; + uint8_t data[]; +} mrb_uart_ringbuf; + +/* Ring buffer helpers (implemented in hw-uart/src/ringbuf.c) */ +bool mrb_uart_ringbuf_init(mrb_uart_ringbuf *rb, int size); +bool mrb_uart_ringbuf_push(mrb_uart_ringbuf *rb, uint8_t ch); +int mrb_uart_ringbuf_pop(mrb_uart_ringbuf *rb, uint8_t *dst, int len); +int mrb_uart_ringbuf_available(const mrb_uart_ringbuf *rb); +void mrb_uart_ringbuf_clear(mrb_uart_ringbuf *rb); +int mrb_uart_ringbuf_search(const mrb_uart_ringbuf *rb, uint8_t ch); + +/* HAL functions - implemented by hw--uart gems */ +int mrb_uart_unit_name_to_num(const char *name); +mrb_uart_status mrb_uart_init(int unit, uint32_t tx_pin, uint32_t rx_pin, + mrb_uart_ringbuf *rxbuf); +uint32_t mrb_uart_set_baudrate(int unit, uint32_t baudrate); +void mrb_uart_set_format(int unit, uint32_t data_bits, uint32_t stop_bits, + uint8_t parity); +void mrb_uart_set_flow_control(int unit, bool cts, bool rts); +void mrb_uart_write(int unit, const uint8_t *src, size_t len); +void mrb_uart_flush(int unit); +void mrb_uart_send_break(int unit, uint32_t duration_ms); +void mrb_uart_clear_rx(int unit); +void mrb_uart_clear_tx(int unit); + +#ifdef __cplusplus +} +#endif + +#endif /* MRUBY_UART_H */ diff --git a/mrbgems/hw-uart/mrbgem.rake b/mrbgems/hw-uart/mrbgem.rake new file mode 100644 index 0000000000..fe758f757f --- /dev/null +++ b/mrbgems/hw-uart/mrbgem.rake @@ -0,0 +1,5 @@ +MRuby::Gem::Specification.new('hw-uart') do |spec| + spec.license = 'MIT' + spec.authors = ['HASUMI Hitoshi', 'mruby developers'] + spec.summary = 'UART peripheral interface' +end diff --git a/mrbgems/hw-uart/mrblib/uart.rb b/mrbgems/hw-uart/mrblib/uart.rb new file mode 100644 index 0000000000..34a1b44b48 --- /dev/null +++ b/mrbgems/hw-uart/mrblib/uart.rb @@ -0,0 +1,54 @@ +class UART + PARITY_NONE = 0 + PARITY_EVEN = 1 + PARITY_ODD = 2 + FLOW_CONTROL_NONE = 0 + FLOW_CONTROL_RTS_CTS = 1 + + attr_reader :baudrate + + def initialize(unit:, tx_pin: -1, rx_pin: -1, baudrate: 9600, + data_bits: 8, stop_bits: 1, parity: PARITY_NONE, + flow_control: FLOW_CONTROL_NONE, rx_buffer_size: 256) + __open_rx_buffer(rx_buffer_size) + @unit_num = __open_connection(unit.to_s, tx_pin, rx_pin) + @baudrate = __set_baudrate(baudrate) + __set_format(data_bits, stop_bits, parity) + set_flow_control(flow_control) + @line_ending = "\n" + end + + def setmode(baudrate: nil, data_bits: nil, stop_bits: nil, + parity: nil, flow_control: nil) + @baudrate = __set_baudrate(baudrate) if baudrate + __set_format(data_bits || 8, stop_bits || 1, parity || PARITY_NONE) + set_flow_control(flow_control || FLOW_CONTROL_NONE) + self + end + + def line_ending=(ending) + unless ["\n", "\r", "\r\n"].include?(ending) + raise ArgumentError, "invalid line ending" + end + @line_ending = ending + end + + def puts(str) + write str + write @line_ending unless str.end_with?(@line_ending) + nil + end + + private + + def set_flow_control(mode) + case mode + when FLOW_CONTROL_NONE + __set_flow_control(false, false) + when FLOW_CONTROL_RTS_CTS + __set_flow_control(true, true) + else + raise ArgumentError, "invalid flow control mode" + end + end +end diff --git a/mrbgems/hw-uart/ports/esp32/uart.c b/mrbgems/hw-uart/ports/esp32/uart.c new file mode 100644 index 0000000000..e751f8604b --- /dev/null +++ b/mrbgems/hw-uart/ports/esp32/uart.c @@ -0,0 +1,147 @@ +#include +#include +#include "freertos/FreeRTOS.h" +#include "driver/uart.h" +#include + +#define RX_TASK_BUF_SIZE 128 +#define QUEUE_LENGTH 20 +#define TASK_STACK_SIZE 4096 +#define TASK_PRIORITY 12 + +typedef struct { + int unit; + QueueHandle_t queue; + mrb_uart_ringbuf *rxbuf; +} uart_ctx; + +static uart_ctx ctx[UART_NUM_MAX]; + +static void +rx_task(void *arg) +{ + uart_ctx *c = (uart_ctx*)arg; + uart_event_t event; + uint8_t buf[RX_TASK_BUF_SIZE]; + + for (;;) { + if (xQueueReceive(c->queue, &event, portMAX_DELAY)) { + if (event.type == UART_DATA) { + size_t n = event.size > RX_TASK_BUF_SIZE ? RX_TASK_BUF_SIZE : event.size; + uart_read_bytes(c->unit, buf, n, portMAX_DELAY); + for (size_t i = 0; i < n; i++) { + mrb_uart_ringbuf_push(c->rxbuf, buf[i]); + } + } + } + } +} + +int +mrb_uart_unit_name_to_num(const char *name) +{ + if (strcmp(name, "ESP32_UART0") == 0) return UART_NUM_0; + if (strcmp(name, "ESP32_UART1") == 0) return UART_NUM_1; +#ifdef UART_NUM_2 + if (strcmp(name, "ESP32_UART2") == 0) return UART_NUM_2; +#endif + return MRB_UART_ERROR_UNIT; +} + +mrb_uart_status +mrb_uart_init(int unit, uint32_t tx_pin, uint32_t rx_pin, + mrb_uart_ringbuf *rxbuf) +{ + if (unit < 0 || unit >= UART_NUM_MAX) return MRB_UART_ERROR_UNIT; + + uart_config_t cfg = { + .baud_rate = 9600, + .data_bits = UART_DATA_8_BITS, + .parity = UART_PARITY_DISABLE, + .stop_bits = UART_STOP_BITS_1, + .flow_ctrl = UART_HW_FLOWCTRL_DISABLE, + .source_clk = UART_SCLK_DEFAULT, + }; + + int bufsize = (rxbuf->mask + 1); + uart_driver_install(unit, bufsize, 0, QUEUE_LENGTH, &ctx[unit].queue, 0); + uart_param_config(unit, &cfg); + uart_set_pin(unit, tx_pin, rx_pin, UART_PIN_NO_CHANGE, UART_PIN_NO_CHANGE); + + ctx[unit].unit = unit; + ctx[unit].rxbuf = rxbuf; + + char name[32]; + snprintf(name, sizeof(name), "uart_rx_%d", unit); + xTaskCreate(rx_task, name, TASK_STACK_SIZE, &ctx[unit], TASK_PRIORITY, NULL); + + return MRB_UART_OK; +} + +uint32_t +mrb_uart_set_baudrate(int unit, uint32_t baudrate) +{ + uart_set_baudrate(unit, baudrate); + return baudrate; +} + +void +mrb_uart_set_format(int unit, uint32_t data_bits, uint32_t stop_bits, + uint8_t parity) +{ + static const uart_word_length_t wl[] = { + UART_DATA_5_BITS, UART_DATA_6_BITS, UART_DATA_7_BITS, UART_DATA_8_BITS + }; + static const uart_stop_bits_t sb[] = { + UART_STOP_BITS_1, UART_STOP_BITS_2 + }; + static const uart_parity_t pr[] = { + UART_PARITY_DISABLE, UART_PARITY_EVEN, UART_PARITY_ODD + }; + if (data_bits >= 5 && data_bits <= 8) + uart_set_word_length(unit, wl[data_bits - 5]); + if (stop_bits >= 1 && stop_bits <= 2) + uart_set_stop_bits(unit, sb[stop_bits - 1]); + if (parity <= 2) + uart_set_parity(unit, pr[parity]); +} + +void +mrb_uart_set_flow_control(int unit, bool cts, bool rts) +{ + uart_hw_flowcontrol_t mode = UART_HW_FLOWCTRL_DISABLE; + if (cts && rts) mode = UART_HW_FLOWCTRL_CTS_RTS; + else if (cts) mode = UART_HW_FLOWCTRL_CTS; + else if (rts) mode = UART_HW_FLOWCTRL_RTS; + uart_set_hw_flow_ctrl(unit, mode, 122); +} + +void +mrb_uart_write(int unit, const uint8_t *src, size_t len) +{ + uart_write_bytes(unit, (const char*)src, len); +} + +void +mrb_uart_flush(int unit) +{ + uart_wait_tx_done(unit, 100); +} + +void +mrb_uart_send_break(int unit, uint32_t duration_ms) +{ + uart_write_bytes_with_break(unit, NULL, 0, duration_ms); +} + +void +mrb_uart_clear_rx(int unit) +{ + uart_flush_input(unit); +} + +void +mrb_uart_clear_tx(int unit) +{ + /* not supported on ESP-IDF */ +} diff --git a/mrbgems/hw-uart/ports/rp2040/uart.c b/mrbgems/hw-uart/ports/rp2040/uart.c new file mode 100644 index 0000000000..8c2ba17ea3 --- /dev/null +++ b/mrbgems/hw-uart/ports/rp2040/uart.c @@ -0,0 +1,136 @@ +#include +#include "pico/stdlib.h" +#include "hardware/gpio.h" +#include "hardware/uart.h" +#include "hardware/irq.h" +#include + +#define UNIT_SELECT(u) \ + uart_inst_t *inst; \ + switch (u) { \ + case 0: inst = uart0; break; \ + case 1: inst = uart1; break; \ + default: return MRB_UART_ERROR_UNIT; \ + } + +/* void-returning variant for functions that can't return error */ +#define UNIT_SELECT_V(u) \ + uart_inst_t *inst; \ + switch (u) { \ + case 0: inst = uart0; break; \ + case 1: inst = uart1; break; \ + default: return; \ + } + +static mrb_uart_ringbuf *rx_bufs[2]; + +static void +on_uart0_rx(void) +{ + while (uart_is_readable(uart0)) { + mrb_uart_ringbuf_push(rx_bufs[0], uart_getc(uart0)); + } +} + +static void +on_uart1_rx(void) +{ + while (uart_is_readable(uart1)) { + mrb_uart_ringbuf_push(rx_bufs[1], uart_getc(uart1)); + } +} + +int +mrb_uart_unit_name_to_num(const char *name) +{ + if (strcmp(name, "RP2040_UART0") == 0) return 0; + if (strcmp(name, "RP2040_UART1") == 0) return 1; + return MRB_UART_ERROR_UNIT; +} + +mrb_uart_status +mrb_uart_init(int unit, uint32_t tx_pin, uint32_t rx_pin, + mrb_uart_ringbuf *rxbuf) +{ + UNIT_SELECT(unit); + uart_init(inst, 9600); + + gpio_set_function(tx_pin, GPIO_FUNC_UART); + gpio_set_function(rx_pin, GPIO_FUNC_UART); + + rx_bufs[unit] = rxbuf; + + uint irq; + if (unit == 0) { + irq = UART0_IRQ; + irq_set_exclusive_handler(irq, on_uart0_rx); + } + else { + irq = UART1_IRQ; + irq_set_exclusive_handler(irq, on_uart1_rx); + } + irq_set_enabled(irq, true); + uart_set_irq_enables(inst, true, false); + + return MRB_UART_OK; +} + +uint32_t +mrb_uart_set_baudrate(int unit, uint32_t baudrate) +{ + UNIT_SELECT(unit); + return uart_set_baudrate(inst, baudrate); +} + +void +mrb_uart_set_format(int unit, uint32_t data_bits, uint32_t stop_bits, + uint8_t parity) +{ + UNIT_SELECT_V(unit); + uart_set_format(inst, data_bits, stop_bits, (uart_parity_t)parity); +} + +void +mrb_uart_set_flow_control(int unit, bool cts, bool rts) +{ + UNIT_SELECT_V(unit); + uart_set_hw_flow(inst, cts, rts); +} + +void +mrb_uart_write(int unit, const uint8_t *src, size_t len) +{ + UNIT_SELECT_V(unit); + uart_write_blocking(inst, src, len); +} + +void +mrb_uart_flush(int unit) +{ + UNIT_SELECT_V(unit); + uart_tx_wait_blocking(inst); +} + +void +mrb_uart_send_break(int unit, uint32_t duration_ms) +{ + UNIT_SELECT_V(unit); + uart_set_break(inst, true); + sleep_ms(duration_ms); + uart_set_break(inst, false); +} + +void +mrb_uart_clear_rx(int unit) +{ + UNIT_SELECT_V(unit); + while (uart_is_readable(inst)) { + uart_getc(inst); + } +} + +void +mrb_uart_clear_tx(int unit) +{ + /* not supported on RP2040 */ +} diff --git a/mrbgems/hw-uart/src/ringbuf.c b/mrbgems/hw-uart/src/ringbuf.c new file mode 100644 index 0000000000..a67e324e61 --- /dev/null +++ b/mrbgems/hw-uart/src/ringbuf.c @@ -0,0 +1,59 @@ +#include + +bool +mrb_uart_ringbuf_init(mrb_uart_ringbuf *rb, int size) +{ + /* size must be a power of two */ + if (size <= 0 || (size & (size - 1)) != 0) return false; + rb->head = 0; + rb->tail = 0; + rb->mask = size - 1; + return true; +} + +bool +mrb_uart_ringbuf_push(mrb_uart_ringbuf *rb, uint8_t ch) +{ + int next = (rb->head + 1) & rb->mask; + if (next == rb->tail) return false; /* full */ + rb->data[rb->head] = ch; + rb->head = next; + return true; +} + +int +mrb_uart_ringbuf_pop(mrb_uart_ringbuf *rb, uint8_t *dst, int len) +{ + int i; + for (i = 0; i < len; i++) { + if (rb->tail == rb->head) break; /* empty */ + dst[i] = rb->data[rb->tail]; + rb->tail = (rb->tail + 1) & rb->mask; + } + return i; +} + +int +mrb_uart_ringbuf_available(const mrb_uart_ringbuf *rb) +{ + return (rb->head - rb->tail) & rb->mask; +} + +void +mrb_uart_ringbuf_clear(mrb_uart_ringbuf *rb) +{ + rb->tail = rb->head; +} + +int +mrb_uart_ringbuf_search(const mrb_uart_ringbuf *rb, uint8_t ch) +{ + int pos = rb->tail; + int i = 0; + while (pos != rb->head) { + if (rb->data[pos] == ch) return i; + pos = (pos + 1) & rb->mask; + i++; + } + return -1; +} diff --git a/mrbgems/hw-uart/src/uart.c b/mrbgems/hw-uart/src/uart.c new file mode 100644 index 0000000000..139479a557 --- /dev/null +++ b/mrbgems/hw-uart/src/uart.c @@ -0,0 +1,236 @@ +#include +#include +#include +#include +#include +#include +#include + +#define E_IO_ERROR mrb_exc_get_id(mrb, MRB_SYM(IOError)) + +#define DEFAULT_RX_BUF_SIZE 256 + +static void +rxbuf_free(mrb_state *mrb, void *ptr) +{ + mrb_free(mrb, ptr); +} + +static const struct mrb_data_type rxbuf_type = { "UART", rxbuf_free }; + +/* UART#__open_rx_buffer(size) */ +static mrb_value +mrb_uart_m_open_rxbuf(mrb_state *mrb, mrb_value self) +{ + mrb_int size; + mrb_get_args(mrb, "i", &size); + if (size <= 0) size = DEFAULT_RX_BUF_SIZE; + + mrb_uart_ringbuf *rb = (mrb_uart_ringbuf*)mrb_malloc(mrb, + sizeof(mrb_uart_ringbuf) + sizeof(uint8_t) * size); + if (!mrb_uart_ringbuf_init(rb, (int)size)) { + mrb_free(mrb, rb); + mrb_raise(mrb, E_ARGUMENT_ERROR, "rx_buffer_size must be a power of two"); + } + DATA_PTR(self) = rb; + DATA_TYPE(self) = &rxbuf_type; + return mrb_nil_value(); +} + +/* UART#__open_connection(unit_name, tx_pin, rx_pin) */ +static mrb_value +mrb_uart_m_open_conn(mrb_state *mrb, mrb_value self) +{ + const char *name; + mrb_int tx_pin, rx_pin; + mrb_get_args(mrb, "zii", &name, &tx_pin, &rx_pin); + + int num = mrb_uart_unit_name_to_num(name); + if (num < 0) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown UART unit: %s", name); + } + + mrb_uart_ringbuf *rb = (mrb_uart_ringbuf*)mrb_data_get_ptr(mrb, self, &rxbuf_type); + mrb_uart_status st = mrb_uart_init(num, (uint32_t)tx_pin, (uint32_t)rx_pin, rb); + if (st != MRB_UART_OK) { + mrb_raise(mrb, E_IO_ERROR, "UART init failed"); + } + return mrb_fixnum_value(num); +} + +/* UART#__set_baudrate(baud) */ +static mrb_value +mrb_uart_m_set_baudrate(mrb_state *mrb, mrb_value self) +{ + mrb_int baud; + mrb_get_args(mrb, "i", &baud); + mrb_int unit = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(unit_num))); + uint32_t actual = mrb_uart_set_baudrate((int)unit, (uint32_t)baud); + return mrb_fixnum_value(actual); +} + +/* UART#__set_format(data_bits, stop_bits, parity) */ +static mrb_value +mrb_uart_m_set_format(mrb_state *mrb, mrb_value self) +{ + mrb_int data_bits, stop_bits, parity; + mrb_get_args(mrb, "iii", &data_bits, &stop_bits, &parity); + mrb_int unit = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(unit_num))); + mrb_uart_set_format((int)unit, (uint32_t)data_bits, (uint32_t)stop_bits, (uint8_t)parity); + return mrb_nil_value(); +} + +/* UART#__set_flow_control(cts, rts) */ +static mrb_value +mrb_uart_m_set_flow(mrb_state *mrb, mrb_value self) +{ + mrb_bool cts, rts; + mrb_get_args(mrb, "bb", &cts, &rts); + mrb_int unit = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(unit_num))); + mrb_uart_set_flow_control((int)unit, cts, rts); + return mrb_nil_value(); +} + +/* UART#write(str) */ +static mrb_value +mrb_uart_m_write(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + mrb_get_args(mrb, "S", &str); + mrb_int unit = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(unit_num))); + size_t len = RSTRING_LEN(str); + mrb_uart_write((int)unit, (const uint8_t*)RSTRING_PTR(str), len); + return mrb_fixnum_value(len); +} + +/* UART#read(len=nil) */ +static mrb_value +mrb_uart_m_read(mrb_state *mrb, mrb_value self) +{ + mrb_int len = -1; + mrb_get_args(mrb, "|i", &len); + + mrb_uart_ringbuf *rb = (mrb_uart_ringbuf*)mrb_data_get_ptr(mrb, self, &rxbuf_type); + int avail = mrb_uart_ringbuf_available(rb); + if (avail == 0) return mrb_nil_value(); + + if (len >= 0) { + if (avail < len) return mrb_nil_value(); + avail = (int)len; + } + + uint8_t *buf = (uint8_t*)mrb_malloc(mrb, avail); + int n = mrb_uart_ringbuf_pop(rb, buf, avail); + mrb_value str = mrb_str_new(mrb, (const char*)buf, n); + mrb_free(mrb, buf); + return str; +} + +/* UART#readpartial(maxlen) */ +static mrb_value +mrb_uart_m_readpartial(mrb_state *mrb, mrb_value self) +{ + mrb_int maxlen; + mrb_get_args(mrb, "i", &maxlen); + + mrb_uart_ringbuf *rb = (mrb_uart_ringbuf*)mrb_data_get_ptr(mrb, self, &rxbuf_type); + int avail = mrb_uart_ringbuf_available(rb); + if (avail == 0) return mrb_nil_value(); + if (avail > maxlen) avail = (int)maxlen; + + uint8_t *buf = (uint8_t*)mrb_malloc(mrb, avail); + int n = mrb_uart_ringbuf_pop(rb, buf, avail); + mrb_value str = mrb_str_new(mrb, (const char*)buf, n); + mrb_free(mrb, buf); + return str; +} + +/* UART#bytes_available */ +static mrb_value +mrb_uart_m_bytes_available(mrb_state *mrb, mrb_value self) +{ + mrb_uart_ringbuf *rb = (mrb_uart_ringbuf*)mrb_data_get_ptr(mrb, self, &rxbuf_type); + return mrb_fixnum_value(mrb_uart_ringbuf_available(rb)); +} + +/* UART#gets */ +static mrb_value +mrb_uart_m_gets(mrb_state *mrb, mrb_value self) +{ + mrb_uart_ringbuf *rb = (mrb_uart_ringbuf*)mrb_data_get_ptr(mrb, self, &rxbuf_type); + int pos = mrb_uart_ringbuf_search(rb, (uint8_t)'\n'); + if (pos < 0) return mrb_nil_value(); + int len = pos + 1; + uint8_t *buf = (uint8_t*)mrb_malloc(mrb, len); + mrb_uart_ringbuf_pop(rb, buf, len); + mrb_value str = mrb_str_new(mrb, (const char*)buf, len); + mrb_free(mrb, buf); + return str; +} + +/* UART#flush */ +static mrb_value +mrb_uart_m_flush(mrb_state *mrb, mrb_value self) +{ + mrb_int unit = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(unit_num))); + mrb_uart_flush((int)unit); + return self; +} + +/* UART#clear_tx_buffer */ +static mrb_value +mrb_uart_m_clear_tx(mrb_state *mrb, mrb_value self) +{ + mrb_int unit = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(unit_num))); + mrb_uart_clear_tx((int)unit); + return self; +} + +/* UART#clear_rx_buffer */ +static mrb_value +mrb_uart_m_clear_rx(mrb_state *mrb, mrb_value self) +{ + mrb_uart_ringbuf *rb = (mrb_uart_ringbuf*)mrb_data_get_ptr(mrb, self, &rxbuf_type); + mrb_uart_ringbuf_clear(rb); + mrb_int unit = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(unit_num))); + mrb_uart_clear_rx((int)unit); + return self; +} + +/* UART#send_break(duration_ms=100) */ +static mrb_value +mrb_uart_m_send_break(mrb_state *mrb, mrb_value self) +{ + mrb_int ms = 100; + mrb_get_args(mrb, "|i", &ms); + mrb_int unit = mrb_integer(mrb_iv_get(mrb, self, MRB_IVSYM(unit_num))); + mrb_uart_send_break((int)unit, (uint32_t)ms); + return self; +} + +void +mrb_hw_uart_gem_init(mrb_state *mrb) +{ + struct RClass *cls = mrb_define_class_id(mrb, MRB_SYM(UART), mrb->object_class); + MRB_SET_INSTANCE_TT(cls, MRB_TT_CDATA); + + mrb_define_method_id(mrb, cls, MRB_SYM(__open_rx_buffer), mrb_uart_m_open_rxbuf, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, cls, MRB_SYM(__open_connection), mrb_uart_m_open_conn, MRB_ARGS_REQ(3)); + mrb_define_method_id(mrb, cls, MRB_SYM(__set_baudrate), mrb_uart_m_set_baudrate, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, cls, MRB_SYM(__set_format), mrb_uart_m_set_format, MRB_ARGS_REQ(3)); + mrb_define_method_id(mrb, cls, MRB_SYM(__set_flow_control), mrb_uart_m_set_flow, MRB_ARGS_REQ(2)); + mrb_define_method_id(mrb, cls, MRB_SYM(write), mrb_uart_m_write, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, cls, MRB_SYM(read), mrb_uart_m_read, MRB_ARGS_OPT(1)); + mrb_define_method_id(mrb, cls, MRB_SYM(readpartial), mrb_uart_m_readpartial, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, cls, MRB_SYM(bytes_available), mrb_uart_m_bytes_available, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, cls, MRB_SYM(gets), mrb_uart_m_gets, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, cls, MRB_SYM(flush), mrb_uart_m_flush, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, cls, MRB_SYM(clear_tx_buffer), mrb_uart_m_clear_tx, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, cls, MRB_SYM(clear_rx_buffer), mrb_uart_m_clear_rx, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, cls, MRB_SYM(send_break), mrb_uart_m_send_break, MRB_ARGS_OPT(1)); +} + +void +mrb_hw_uart_gem_final(mrb_state *mrb) +{ +} diff --git a/mrbgems/math.gembox b/mrbgems/math.gembox index d2947314e3..b07842481a 100644 --- a/mrbgems/math.gembox +++ b/mrbgems/math.gembox @@ -7,8 +7,6 @@ MRuby::GemBox.new do |conf| # Use Rational/Complex numbers conf.gem :core => "mruby-rational" conf.gem :core => "mruby-complex" - # Use Complex math functions in CMath module - conf.gem :core => "mruby-cmath" # Use Multi-precision Integer conf.gem :core => "mruby-bigint" end diff --git a/mrbgems/metaprog.gembox b/mrbgems/metaprog.gembox index db35d307c6..315c817bfb 100644 --- a/mrbgems/metaprog.gembox +++ b/mrbgems/metaprog.gembox @@ -10,6 +10,12 @@ MRuby::GemBox.new do |conf| # Use eval() conf.gem :core => "mruby-eval" + # Use binding() + conf.gem :core => "mruby-binding" + + # Use Proc#binding() + conf.gem :core => "mruby-proc-binding" + # Use mruby-compiler to build other mrbgems conf.gem :core => "mruby-compiler" end diff --git a/mrbgems/mruby-array-ext/README.md b/mrbgems/mruby-array-ext/README.md new file mode 100644 index 0000000000..d565018103 --- /dev/null +++ b/mrbgems/mruby-array-ext/README.md @@ -0,0 +1,87 @@ +# mruby-array-ext + +This mrbgem extends the `Array` class in mruby, providing a rich set of additional methods for array manipulation. These extensions enhance mruby's built-in array capabilities, offering functionalities commonly found in standard Ruby. + +## Functionality + +The `mruby-array-ext` gem adds the following methods to the `Array` class: + +### Set Operations + +- `uniq`: Returns a new array by removing duplicate values. +- `uniq!`: Removes duplicate elements from self. Returns `nil` if no changes are made. +- `-` (difference): Returns a new array that is a copy of the original array, removing any items that also appear in another array. +- `|` (union): Returns a new array by joining this array with another array, removing duplicates. +- `&` (intersection): Returns a new array containing elements common to two arrays, with no duplicates. +- `difference`: Returns a new array that is a copy of the original array, removing all occurrences of any item that also appear in the specified other arrays. +- `union`: Returns a new array by joining this array with other arrays, removing duplicates. +- `intersection`: Returns a new array containing elements common to this array and specified other arrays, removing duplicates. +- `intersect?`: Returns `true` if the array and another array have at least one element in common. + +### Element Manipulation and Access + +- `flatten`: Returns a new array that is a one-dimensional flattening of self. +- `flatten!`: Flattens self in place. Returns `nil` if no modifications were made. +- `fetch`: Tries to return the element at a given position. Throws an `IndexError` if the index is out of bounds, unless a default value or block is provided. +- `fill`: Sets selected elements of self to a given object or the result of a block. +- `compact`: Returns a copy of self with all `nil` elements removed. +- `compact!`: Removes `nil` elements from self. Returns `nil` if no changes were made. +- `rotate(count=1)`: Returns a new array by rotating self so that the element at `count` is the first element. +- `rotate!(count=1)`: Rotates self in place. +- `insert(index, obj...)`: Inserts the given values before the element with the given index. +- `slice!(index)` / `slice!(start, length)` / `slice!(range)`: Deletes element(s) given by an index or range and returns the deleted object(s). +- `at(index)`: Returns the element at `index`. Returns `nil` if the index is out of range. +- `dig(idx, ...)`: Extracts a nested value specified by a sequence of indices. +- `fetch_values(idx, ...)`: Returns an array containing the values associated with the given indexes. Raises `IndexError` if an index is not found, unless a block is provided. +- `values_at(selector, ...)`: Returns an array containing the elements corresponding to the given selector(s). + +### Searching and Comparison + +- `assoc(obj)`: Searches through an array of arrays, comparing `obj` with the first element of each contained array. Returns the first matching contained array or `nil`. +- `rassoc(obj)`: Searches through an array of arrays, comparing `obj` with the second element of each contained array. Returns the first matching contained array or `nil`. +- `bsearch { |x| block }`: Finds a value from the array which meets the given condition using binary search. +- `bsearch_index { |x| block }`: Finds the index of a value from the array which meets the given condition using binary search. + +### Iterators and Combinatorics + +- `reverse_each`: Calls the given block for each element in reverse order. +- `permutation(n=self.size)`: Yields all permutations of length `n` of the elements of the array. +- `combination(n)`: Yields all combinations of length `n` of elements from the array. +- `product(*arys)`: Returns an array of all combinations of elements from self and the given arrays. +- `repeated_combination(n)`: Yields all repeated combinations of length `n`. +- `repeated_permutation(n)`: Yields all repeated permutations of length `n`. + +### Conversions and Other Utilities + +- `to_h`: Returns the result of interpreting the array as an array of `[key, value]` pairs. +- `transpose`: Assumes that self is an array of arrays and transposes the rows and columns. + +### Filtering + +- `delete_if { |item| block }`: Deletes every element of self for which the block evaluates to `true`. +- `reject! { |item| block }`: Equivalent to `delete_if`, but returns `nil` if no changes were made. +- `keep_if { |item| block }`: Deletes every element of self for which the given block evaluates to `false`. +- `select! { |item| block }`: Deletes elements for which the block returns a `false` value. Returns `self` if changes were made, otherwise `nil`. + +### Aliases + +- `append` (alias for `push`) +- `prepend` (alias for `unshift`) +- `filter!` (alias for `select!`) + +## Usage + +To use this gem, add it to your `build_config.rb` or `mrbgem.rake` file. For detailed examples of each method, please refer to the comments in the source code (`mrblib/array.rb` and `src/array.c`). + +Example: + +```ruby +a = [1, 2, 2, 3, nil, 4] +p a.compact #=> [1, 2, 2, 3, 4] +p a.uniq #=> [1, 2, 3, nil, 4] + +b = ["a", "b", "c"] +p b.rotate #=> ["b", "c", "a"] +``` + +This gem is part of the mruby project. diff --git a/mrbgems/mruby-array-ext/mrblib/array.rb b/mrbgems/mruby-array-ext/mrblib/array.rb index 1722a3631a..35abf95b40 100644 --- a/mrbgems/mruby-array-ext/mrblib/array.rb +++ b/mrbgems/mruby-array-ext/mrblib/array.rb @@ -4,8 +4,8 @@ class Array # ary.uniq! -> ary or nil # ary.uniq! { |item| ... } -> ary or nil # - # Removes duplicate elements from +self+. - # Returns nil if no changes are made (that is, no + # Removes duplicate elements from `self`. + # Returns `nil` if no changes are made (that is, no # duplicates are found). # # a = [ "a", "a", "b", "b", "c" ] @@ -16,24 +16,24 @@ class Array # c.uniq! { |s| s.first } # => [["student", "sam"], ["teacher", "matz"]] # def uniq!(&block) - hash = {} if block + hash = {} + result = [] self.each do |val| key = block.call(val) - hash[key] = val unless hash.key?(key) + unless hash.key?(key) + hash[key] = true + result << val + end end - result = hash.values - else - hash = {} - self.each do |val| - hash[val] = val + + if result.size == self.size + nil + else + self.replace(result) end - result = hash.keys - end - if result.size == self.size - nil else - self.replace(result) + __uniq! end end @@ -42,7 +42,7 @@ def uniq!(&block) # ary.uniq -> new_ary # ary.uniq { |item| ... } -> new_ary # - # Returns a new array by removing duplicate values in +self+. + # Returns a new array by removing duplicate values in `self`. # # a = [ "a", "a", "b", "b", "c" ] # a.uniq #=> ["a", "b", "c"] @@ -51,252 +51,18 @@ def uniq!(&block) # b.uniq { |s| s.first } # => [["student", "sam"], ["teacher", "matz"]] # def uniq(&block) - ary = self[0..-1] - ary.uniq!(&block) - ary - end - - ## - # call-seq: - # ary - other_ary -> new_ary - # - # Array Difference---Returns a new array that is a copy of - # the original array, removing any items that also appear in - # other_ary. (If you need set-like behavior, see the - # library class Set.) - # - # [ 1, 1, 2, 2, 3, 3, 4, 5 ] - [ 1, 2, 4 ] #=> [ 3, 3, 5 ] - # - def -(elem) - raise TypeError, "can't convert #{elem.class} into Array" unless elem.class == Array - - hash = {} - array = [] - idx = 0 - len = elem.size - while idx < len - hash[elem[idx]] = true - idx += 1 - end - idx = 0 - len = size - while idx < len - v = self[idx] - array << v unless hash[v] - idx += 1 - end - array - end - - ## - # call-seq: - # ary.difference(other_ary1, other_ary2, ...) -> new_ary - # - # Returns a new array that is a copy of the original array, removing all - # occurrences of any item that also appear in +other_ary+. The order is - # preserved from the original array. - # - def difference(*args) - ary = self - args.each do |x| - ary = ary - x - end - ary - end - - ## - # call-seq: - # ary | other_ary -> new_ary - # - # Set Union---Returns a new array by joining this array with - # other_ary, removing duplicates. - # - # [ "a", "b", "c" ] | [ "c", "d", "a" ] - # #=> [ "a", "b", "c", "d" ] - # - def |(elem) - raise TypeError, "can't convert #{elem.class} into Array" unless elem.class == Array - - ary = self + elem - ary.uniq! or ary - end - - ## - # call-seq: - # ary.union(other_ary,...) -> new_ary - # - # Set Union---Returns a new array by joining this array with - # other_ary, removing duplicates. - # - # ["a", "b", "c"].union(["c", "d", "a"], ["a", "c", "e"]) - # #=> ["a", "b", "c", "d", "e"] - # - def union(*args) - ary = self.dup - args.each do |x| - ary.concat(x) - ary.uniq! - end - ary - end - - ## - # call-seq: - # ary & other_ary -> new_ary - # - # Set Intersection---Returns a new array - # containing elements common to the two arrays, with no duplicates. - # - # [ 1, 1, 3, 5 ] & [ 1, 2, 3 ] #=> [ 1, 3 ] - # - def &(elem) - raise TypeError, "cannot convert #{elem.class} into Array" unless elem.class == Array - - hash = {} - array = [] - idx = 0 - len = elem.size - while idx < len - hash[elem[idx]] = true - idx += 1 - end - idx = 0 - len = size - while idx < len - v = self[idx] - if hash[v] - array << v - hash.delete v - end - idx += 1 - end - array - end - - ## - # call-seq: - # ary.intersection(other_ary,...) -> new_ary - # - # Set Intersection---Returns a new array containing elements common to - # this array and other_arys, removing duplicates. The order is - # preserved from the original array. - # - # [1, 2, 3].intersection([3, 4, 1], [1, 3, 5]) #=> [1, 3] - # - def intersection(*args) - ary = self - args.each do |x| - ary = ary & x - end - ary - end - - ## - # call-seq: - # ary.intersect?(other_ary) -> true or false - # - # Returns +true+ if the array and +other_ary+ have at least one element in - # common, otherwise returns +false+. - # - # a = [ 1, 2, 3 ] - # b = [ 3, 4, 5 ] - # c = [ 5, 6, 7 ] - # a.intersect?(b) #=> true - # a.intersect?(c) #=> false - def intersect?(ary) - raise TypeError, "cannot convert #{ary.class} into Array" unless ary.class == Array - - hash = {} - if self.length > ary.length - shorter = ary - longer = self - else - shorter = self - longer = ary - end - idx = 0 - len = shorter.size - while idx < len - hash[shorter[idx]] = true - idx += 1 - end - idx = 0 - len = size - while idx < len - v = longer[idx] - if hash[v] - return true - end - idx += 1 - end - false - end - - ## - # call-seq: - # ary.flatten -> new_ary - # ary.flatten(level) -> new_ary - # - # Returns a new array that is a one-dimensional flattening of this - # array (recursively). That is, for every element that is an array, - # extract its elements into the new array. If the optional - # level argument determines the level of recursion to flatten. - # - # s = [ 1, 2, 3 ] #=> [1, 2, 3] - # t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]] - # a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10] - # a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - # a = [ 1, 2, [3, [4, 5] ] ] - # a.flatten(1) #=> [1, 2, 3, [4, 5]] - # - def flatten(depth=nil) - res = Array.new(self) - res.flatten! depth - res - end - - ## - # call-seq: - # ary.flatten! -> ary or nil - # ary.flatten!(level) -> array or nil - # - # Flattens +self+ in place. - # Returns nil if no modifications were made (i.e., - # ary contains no subarrays.) If the optional level - # argument determines the level of recursion to flatten. - # - # a = [ 1, 2, [3, [4, 5] ] ] - # a.flatten! #=> [1, 2, 3, 4, 5] - # a.flatten! #=> nil - # a #=> [1, 2, 3, 4, 5] - # a = [ 1, 2, [3, [4, 5] ] ] - # a.flatten!(1) #=> [1, 2, 3, [4, 5]] - # - def flatten!(depth=nil) - modified = false - ar = [] - idx = 0 - len = size - while idx < len - e = self[idx] - if e.is_a?(Array) && (depth.nil? || depth > 0) - ar += e.flatten(depth.nil? ? nil : depth - 1) - modified = true - else - ar << e - end - idx += 1 - end - if modified - self.replace(ar) + if block + ary = self.dup + ary.uniq!(&block) + ary else - nil + __uniq end end # for efficiency def reverse_each(&block) - return to_enum :reverse_each unless block + return to_enum(:reverse_each) unless block i = self.size - 1 while i>=0 @@ -312,15 +78,15 @@ def reverse_each(&block) # ary.fetch(index, default) -> obj # ary.fetch(index) { |index| block } -> obj # - # Tries to return the element at position +index+, but throws an IndexError - # exception if the referenced +index+ lies outside of the array bounds. This + # Tries to return the element at position `index`, but throws an IndexError + # exception if the referenced `index` lies outside of the array bounds. This # error can be prevented by supplying a second argument, which will act as a - # +default+ value. + # `default` value. # # Alternatively, if a block is given it will only be executed when an - # invalid +index+ is referenced. + # invalid `index` is referenced. # - # Negative values of +index+ count from the end of the array. + # Negative values of `index` count from the end of the array. # # a = [ 11, 22, 33, 44 ] # a.fetch(1) #=> 22 @@ -333,18 +99,18 @@ def reverse_each(&block) def fetch(n, ifnone=NONE, &block) #warn "block supersedes default value argument" if !n.nil? && ifnone != NONE && block - idx = n - if idx < 0 - idx += size - end - if idx < 0 || size <= idx - return block.call(n) if block - if ifnone == NONE - raise IndexError, "index #{n} outside of array bounds: #{-size}...#{size}" + if block + # Block case: use shared index helper + Ruby block handling + normalized_index = __normalize_index(n) + if normalized_index + self[normalized_index] + else + block.call(n) end - return ifnone + else + # Fast C implementation for non-block cases + __fetch(n, ifnone, NONE) end - self[idx] end ## @@ -356,17 +122,17 @@ def fetch(n, ifnone=NONE, &block) # ary.fill(start [, length] ) { |index| block } -> ary # ary.fill(range) { |index| block } -> ary # - # The first three forms set the selected elements of +self+ (which - # may be the entire array) to +obj+. + # The first three forms set the selected elements of `self` (which + # may be the entire array) to `obj`. # - # A +start+ of +nil+ is equivalent to zero. + # A `start` of `nil` is equivalent to zero. # - # A +length+ of +nil+ is equivalent to the length of the array. + # A `length` of `nil` is equivalent to the length of the array. # # The last three forms fill the array with the value of the given block, # which is passed the absolute index of each element to be filled. # - # Negative values of +start+ count from the end of the array, where +-1+ is + # Negative values of `start` count from the end of the array, where +-1+ is # the last element. # # a = [ "a", "b", "c", "d" ] @@ -385,65 +151,21 @@ def fill(arg0=nil, arg1=nil, arg2=nil, &block) raise ArgumentError, "wrong number of arguments (given 0, expected 1..3)" end - beg = len = 0 - if block - if arg0.nil? && arg1.nil? && arg2.nil? - # ary.fill { |index| block } -> ary - beg = 0 - len = self.size - elsif !arg0.nil? && arg0.kind_of?(Range) - # ary.fill(range) { |index| block } -> ary - beg = arg0.begin - beg += self.size if beg < 0 - len = arg0.end - len += self.size if len < 0 - len += 1 unless arg0.exclude_end? - elsif !arg0.nil? - # ary.fill(start [, length] ) { |index| block } -> ary - beg = arg0 - beg += self.size if beg < 0 - if arg1.nil? - len = self.size - else - len = arg0 + arg1 - end - end - else - if !arg0.nil? && arg1.nil? && arg2.nil? - # ary.fill(obj) -> ary - beg = 0 - len = self.size - elsif !arg0.nil? && !arg1.nil? && arg1.kind_of?(Range) - # ary.fill(obj, range ) -> ary - beg = arg1.begin - beg += self.size if beg < 0 - len = arg1.end - len += self.size if len < 0 - len += 1 unless arg1.exclude_end? - elsif !arg0.nil? && !arg1.nil? - # ary.fill(obj, start [, length]) -> ary - beg = arg1 - beg += self.size if beg < 0 - if arg2.nil? - len = self.size - else - len = beg + arg2 - end - end - end + # Use shared C argument parser for all cases + start, length = __fill_parse_arg(arg0, arg1, arg2, &block) - i = beg if block - while i < len + # Block-based filling in Ruby + i = start + while i < start + length self[i] = block.call(i) i += 1 end else - while i < len - self[i] = arg0 - i += 1 - end + # Use fast C implementation for value filling + __fill_exec(start, length, arg0) end + self end @@ -452,7 +174,7 @@ def fill(arg0=nil, arg1=nil, arg2=nil, &block) # ary.delete_if { |item| block } -> ary # ary.delete_if -> Enumerator # - # Deletes every element of +self+ for which block evaluates to +true+. + # Deletes every element of `self` for which block evaluates to `true`. # # The array is changed instantly every time the block is called, not after # the iteration is over. @@ -465,17 +187,18 @@ def fill(arg0=nil, arg1=nil, arg2=nil, &block) # scores.delete_if {|score| score < 80 } #=> [97] def delete_if(&block) - return to_enum :delete_if unless block + return to_enum(:delete_if) unless block + result = [] idx = 0 - while idx < self.size do - if block.call(self[idx]) - self.delete_at(idx) - else - idx += 1 - end + len = size + while idx < len + elem = self[idx] + result << elem unless block.call(elem) + idx += 1 end - self + + self.replace(result) end ## @@ -483,8 +206,8 @@ def delete_if(&block) # ary.reject! { |item| block } -> ary or nil # ary.reject! -> Enumerator # - # Equivalent to Array#delete_if, deleting elements from +self+ for which the - # block evaluates to +true+, but returns +nil+ if no changes were made. + # Equivalent to Array#delete_if, deleting elements from `self` for which the + # block evaluates to `true`, but returns `nil` if no changes were made. # # The array is changed instantly every time the block is called, not after # the iteration is over. @@ -494,43 +217,24 @@ def delete_if(&block) # If no block is given, an Enumerator is returned instead. def reject!(&block) - return to_enum :reject! unless block + return to_enum(:reject!) unless block - len = self.size + result = [] idx = 0 - while idx < self.size do - if block.call(self[idx]) - self.delete_at(idx) - else - idx += 1 - end - end - if self.size == len - nil - else - self + len = size + while idx < len + elem = self[idx] + result << elem unless block.call(elem) + idx += 1 end - end - ## - # call-seq: - # ary.insert(index, obj...) -> ary - # - # Inserts the given values before the element with the given +index+. - # - # Negative indices count backwards from the end of the array, where +-1+ is - # the last element. - # - # a = %w{ a b c d } - # a.insert(2, 99) #=> ["a", "b", 99, "c", "d"] - # a.insert(-2, 1, 2, 3) #=> ["a", "b", 99, "c", 1, 2, 3, "d"] + return nil if len == result.size - def insert(idx, *args) - idx += self.size + 1 if idx < 0 - self[idx, 0] = args - self + self.replace(result) end + + ## # call-seq: # ary.bsearch {|x| block } -> elem @@ -539,7 +243,7 @@ def insert(idx, *args) # the given condition in O(log n) where n is the size of the array. # # You can use this method in two use cases: a find-minimum mode and - # a find-any mode. In either case, the elements of the array must be + # a find-any mode. In either case, the elements of the array must be # monotone (or sorted) with respect to the block. # # In find-minimum mode (this is a good choice for typical use case), @@ -551,7 +255,7 @@ def insert(idx, *args) # - the block returns true for any element whose index is greater # than or equal to i. # - # This method returns the i-th element. If i is equal to ary.size, + # This method returns the i-th element. If i is equal to ary.size, # it returns nil. # # ary = [0, 4, 7, 10, 12] @@ -570,7 +274,7 @@ def insert(idx, *args) # j <= k < ary.size. # # Under this condition, this method returns any element whose index - # is within i...j. If i is equal to j (i.e., there is no element + # is within i...j. If i is equal to j (i.e., there is no element # that satisfies the block), this method returns nil. # # ary = [0, 4, 7, 10, 12] @@ -580,11 +284,11 @@ def insert(idx, *args) # ary.bsearch {|x| 4 - (x / 2).truncate } #=> nil # # You must not mix the two modes at a time; the block must always - # return either true/false, or always return a number. It is + # return either true/false, or always return a number. It is # undefined which value is actually picked up at each iteration. def bsearch(&block) - return to_enum :bsearch unless block + return to_enum(:bsearch) unless block if idx = bsearch_index(&block) self[idx] @@ -606,7 +310,7 @@ def bsearch(&block) # element itself. For more details consult the documentation for #bsearch. def bsearch_index(&block) - return to_enum :bsearch_index unless block + return to_enum(:bsearch_index) unless block low = 0 high = size @@ -614,7 +318,7 @@ def bsearch_index(&block) while low < high mid = ((low+high)/2).truncate - res = block.call self[mid] + res = block.call(self[mid]) case res when 0 # find-any mode: Found! @@ -645,8 +349,8 @@ def bsearch_index(&block) # ary.keep_if { |item| block } -> ary # ary.keep_if -> Enumerator # - # Deletes every element of +self+ for which the given block evaluates to - # +false+. + # Deletes every element of `self` for which the given block evaluates to + # `false`. # # See also Array#select! # @@ -656,17 +360,18 @@ def bsearch_index(&block) # a.keep_if { |val| val > 3 } #=> [4, 5] def keep_if(&block) - return to_enum :keep_if unless block + return to_enum(:keep_if) unless block + result = [] idx = 0 - while idx < self.size do - if block.call(self[idx]) - idx += 1 - else - self.delete_at(idx) - end + len = size + while idx < len + elem = self[idx] + result << elem if block.call(elem) + idx += 1 end - self + + self.replace(result) end ## @@ -674,17 +379,17 @@ def keep_if(&block) # ary.select! {|item| block } -> ary or nil # ary.select! -> Enumerator # - # Invokes the given block passing in successive elements from +self+, - # deleting elements for which the block returns a +false+ value. + # Invokes the given block passing in successive elements from `self`, + # deleting elements for which the block returns a `false` value. # - # If changes were made, it will return +self+, otherwise it returns +nil+. + # If changes were made, it will return `self`, otherwise it returns `nil`. # # See also Array#keep_if # # If no block is given, an Enumerator is returned instead. def select!(&block) - return to_enum :select! unless block + return to_enum(:select!) unless block result = [] idx = 0 @@ -694,48 +399,22 @@ def select!(&block) result << elem if block.call(elem) idx += 1 end - return nil if len == result.size - self.replace(result) - end - ## - # call-seq: - # ary.index(val) -> int or nil - # ary.index {|item| block } -> int or nil - # - # Returns the _index_ of the first object in +ary+ such that the object is - # == to +obj+. - # - # If a block is given instead of an argument, returns the _index_ of the - # first object for which the block returns +true+. Returns +nil+ if no - # match is found. - # - # ISO 15.2.12.5.14 - def index(val=NONE, &block) - return to_enum(:find_index, val) if !block && val == NONE + return nil if len == result.size - if block - idx = 0 - len = size - while idx < len - return idx if block.call self[idx] - idx += 1 - end - else - return self.__ary_index(val) - end - nil + self.replace(result) end ## # call-seq: # ary.dig(idx, ...) -> object # - # Extracts the nested value specified by the sequence of idx - # objects by calling +dig+ at each step, returning +nil+ if any - # intermediate step is +nil+. + # Extracts the nested value specified by the sequence of *idx* + # objects by calling `dig` at each step, returning `nil` if any + # intermediate step is `nil`. # def dig(idx,*args) + idx = idx.__to_int n = self[idx] if args.size > 0 n&.dig(*args) @@ -751,10 +430,10 @@ def dig(idx,*args) # ary.permutation(n) { |p| block } -> ary # ary.permutation(n) -> Enumerator # - # When invoked with a block, yield all permutations of length +n+ of the + # When invoked with a block, yield all permutations of length `n` of the # elements of the array, then return the array itself. # - # If +n+ is not specified, yield all permutations of all elements. + # If `n` is not specified, yield all permutations of all elements. # # The implementation makes no guarantees about the order in which the # permutations are yielded. @@ -771,26 +450,7 @@ def dig(idx,*args) # a.permutation(0).to_a #=> [[]] # one permutation of length 0 # a.permutation(4).to_a #=> [] # no permutations of length 4 def permutation(n=self.size, &block) - return to_enum(:permutation, n) unless block - size = self.size - if n == 0 - yield [] - elsif 0 < n && n <= size - i = 0 - while i 0 - ary = self[0...i] + self[i+1..-1] - ary.permutation(n-1) do |c| - yield result + c - end - else - yield result - end - i += 1 - end - end - self + __combination(:permutation, n, &block) end ## @@ -798,7 +458,7 @@ def permutation(n=self.size, &block) # ary.combination(n) { |c| block } -> ary # ary.combination(n) -> Enumerator # - # When invoked with a block, yields all combinations of length +n+ of elements + # When invoked with a block, yields all combinations of length `n` of elements # from the array and then returns the array itself. # # The implementation makes no guarantees about the order in which the @@ -817,27 +477,7 @@ def permutation(n=self.size, &block) # a.combination(5).to_a #=> [] # no combinations of length 5 def combination(n, &block) - return to_enum(:combination, n) unless block - size = self.size - if n == 0 - yield [] - elsif n == 1 - i = 0 - while i Hash # ary.to_h{|item| ... } -> Hash # - # Returns the result of interpreting array as an array of - # [key, value] pairs. If a block is given, it should - # return [key, value] pairs to construct a hash. + # Returns the result of interpreting *array* as an array of + # `[key, value]` pairs. If a block is given, it should + # return `[key, value]` pairs to construct a hash. # # [[:foo, :bar], [1, 2]].to_h # # => {:foo => :bar, 1 => 2} @@ -899,121 +539,172 @@ def to_h(&blk) ## # call-seq: - # ary.product(*arys) -> array - # ary.product(*arys) { |item| ... } -> self - def product(*arys, &block) - size = arys.size - i = size - while i > 0 - i -= 1 - unless arys[i].kind_of?(Array) - raise TypeError, "no implicit conversion into Array" - end - end - - i = size - total = self.size - total *= arys[i -= 1].size while i > 0 - + # ary.fetch_values(idx, ...) -> array + # ary.fetch_values(idx, ...) { |i| block } -> array + # + # Returns an array containing the values associated with the given indexes. + # but also raises `IndexError` when one of indexes can't be found. + # Also see `Array#values_at` and `Array#fetch`. + # + # a = ["cat", "dog", "cow"] + # + # a.fetch_values(2, 0) #=> ["cow", "cat"] + # a.fetch_values(2, 5) # raises KeyError + # a.fetch_values(2, 5) {|i| "BIRD" } #=> ["cow", "BIRD"] + # + def fetch_values(*idx, &block) if block - result = self - list = ->(*, e) { block.call e } - class << list; alias []= call; end + idx.map do |i| + self.fetch(i, &block) + end else - result = [nil] * total - list = result + # Fast path: use C implementation for non-block cases + idx.map do |i| + __fetch(i, NONE, NONE) + end end + end - i = 0 - while i < total - group = [nil] * (size + 1) - j = size - n = i - while j > 0 - j -= 1 - a = arys[j] - b = a.size - group[j + 1] = a[n % b] - n /= b + ## + # call-seq: + # ary.product(*arys) -> array + # ary.product(*arys) { |item| ... } -> self + def product(*arys, &block) + gen = __product_generate(arys, &block) + return gen unless block + + if gen + while group = __product_next(arys, gen) + yield group end - group[0] = self[n] - list[i] = group - i += 1 end - - result + self end ## # call-seq: - # ary.repeated_combination(n) { |combination| ... } -> self - # ary.repeated_combination(n) -> enumerator + # ary.repeated_combination(n) { |combination| ... } -> ary + # ary.repeated_combination(n) -> Enumerator + # + # When invoked with a block, yields all length `n` combinations of elements + # from the array, with replacement, and then returns the array itself. + # + # This means that, unlike `combination`, elements can be chosen more than once. + # + # The implementation makes no guarantees about the order in which the + # combinations are yielded. # - # A +combination+ method that contains the same elements. + # If no block is given, an Enumerator is returned instead. + # + # Examples: + # + # a = [1, 2, 3] + # a.repeated_combination(2).to_a #=> [[1,1],[1,2],[1,3],[2,2],[2,3],[3,3]] def repeated_combination(n, &block) - raise TypeError, "no implicit conversion into Integer" unless 0 <=> n - return to_enum(:repeated_combination, n) unless block - __repeated_combination(n, false, &block) + __combination(:repeated_combination, n, &block) end ## # call-seq: - # ary.repeated_permutation(n) { |permutation| ... } -> self - # ary.repeated_permutation(n) -> enumerator + # ary.repeated_permutation(n) { |permutation| ... } -> ary + # ary.repeated_permutation(n) -> Enumerator + # + # When invoked with a block, yields all length `n` permutations of elements + # from the array, with replacement, and then returns the array itself. + # + # This means that, unlike `permutation`, elements can be chosen more than once. + # + # The implementation makes no guarantees about the order in which the + # permutations are yielded. + # + # If no block is given, an Enumerator is returned instead. + # + # Examples: # - # A +permutation+ method that contains the same elements. + # a = [1, 2] + # a.repeated_permutation(2).to_a #=> [[1,1],[1,2],[2,1],[2,2]] def repeated_permutation(n, &block) - raise TypeError, "no implicit conversion into Integer" unless 0 <=> n - return to_enum(:repeated_permutation, n) unless block - __repeated_combination(n, true, &block) + __combination(:repeated_permutation, n, &block) end - def __repeated_combination(n, permutation, &block) - case n + def __combination(mode, k, &block) + k = k.__to_int + return to_enum(mode, k) unless block + + case k when 0 yield [] when 1 + # Keep fast Ruby path for k=1 i = 0 while i < self.size yield [self[i]] i += 1 end else - if n > 0 - v = [0] * n - while true - tmp = [nil] * n - i = 0 - while i < n - tmp[i] = self[v[i]] - i += 1 - end - + if state = __combination_init(mode, k) + # Use C iterator for complex cases + while tmp = __combination_next(state) yield tmp - - tmp = self.size - i = n - 1 - while i >= 0 - v[i] += 1 - break if v[i] < tmp - i -= 1 - end - break unless v[0] < tmp - i = 1 - while i < n - unless v[i] < tmp - if permutation - v[i] = 0 - else - v[i] = v[i - 1] - end - end - i += 1 - end end end end self end + + ## + # call-seq: + # ary.find(ifnone = nil) { |elem| block } -> obj or nil + # ary.find(ifnone = nil) -> Enumerator + # + # Returns the first element for which the block returns a true value. + # If no element matches and +ifnone+ is given, calls +ifnone+ and + # returns its result. Otherwise returns +nil+. + # + # This is an optimized version of Enumerable#find for arrays. + # + # [1, 2, 3, 4].find { |x| x > 2 } #=> 3 + # [1, 2, 3, 4].find { |x| x > 10 } #=> nil + # [1, 2, 3, 4].find(->{0}) { |x| x > 10 } #=> 0 + # + def find(ifnone=nil, &block) + return to_enum(:find, ifnone) unless block + + idx = 0 + len = self.size + while idx < len + elem = self[idx] + return elem if block.call(elem) + idx += 1 + end + ifnone&.call + end + + ## + # call-seq: + # ary.rfind(ifnone = nil) { |elem| block } -> obj or nil + # ary.rfind(ifnone = nil) -> Enumerator + # + # Returns the last element for which the block returns a true value. + # Searches from the end of the array to the beginning. + # If no element matches and +ifnone+ is given, calls +ifnone+ and + # returns its result. Otherwise returns +nil+. + # + # [1, 2, 3, 4, 3].rfind { |x| x == 3 } #=> 3 (the last one) + # [1, 2, 3, 4].rfind { |x| x > 2 } #=> 4 + # [1, 2, 3, 4].rfind { |x| x > 10 } #=> nil + # [1, 2, 3, 4].rfind(->{0}) { |x| x > 10 } #=> 0 + # + def rfind(ifnone=nil, &block) + return to_enum(:rfind, ifnone) unless block + + idx = self.size - 1 + while idx >= 0 + elem = self[idx] + return elem if block.call(elem) + idx -= 1 + end + ifnone&.call + end end diff --git a/mrbgems/mruby-array-ext/src/array.c b/mrbgems/mruby-array-ext/src/array.c index 97a3946919..62a731c363 100644 --- a/mrbgems/mruby-array-ext/src/array.c +++ b/mrbgems/mruby-array-ext/src/array.c @@ -1,10 +1,56 @@ #include +#include #include #include #include #include +#include +#include +#include #include -#include +#include +#include + +/* khash set for temporary array operations */ +static inline khint_t +ary_set_hash_func(mrb_state *mrb, mrb_value key) +{ + return (khint_t)mrb_obj_hash_code(mrb, key); +} + +static inline mrb_bool +ary_set_equal_func(mrb_state *mrb, mrb_value a, mrb_value b) +{ + return mrb_eql(mrb, a, b); +} + +KHASH_DECLARE(ary_set, mrb_value, char, 0) +KHASH_DEFINE(ary_set, mrb_value, char, 0, ary_set_hash_func, ary_set_equal_func) + +typedef khash_t(ary_set) ary_set_t; + +/* Combination state structure for repeated_combination optimization */ +struct mrb_combination_state { + mrb_int *indices; + mrb_int n, k; /* nPk, nCk */ + int mode; +}; + +static void +mrb_combination_state_free(mrb_state *mrb, void *ptr) +{ + struct mrb_combination_state *state = (struct mrb_combination_state*)ptr; + if (state) { + if (state->indices) { + mrb_free(mrb, state->indices); + } + mrb_free(mrb, state); + } +} + +static struct mrb_data_type mrb_combination_state_type = { + "CombinationState", mrb_combination_state_free +}; /* * call-seq: @@ -15,8 +61,8 @@ * using obj.==. * Returns the first contained array that matches (that * is, the first associated array), - * or +nil+ if no match is found. - * See also Array#rassoc. + * or `nil` if no match is found. + * See also `Array#rassoc`. * * s1 = [ "colors", "red", "blue", "green" ] * s2 = [ "letters", "a", "b", "c" ] @@ -27,17 +73,18 @@ */ static mrb_value -mrb_ary_assoc(mrb_state *mrb, mrb_value ary) +ary_assoc(mrb_state *mrb, mrb_value ary) { - mrb_int i; - mrb_value v; mrb_value k = mrb_get_arg1(mrb); - for (i = 0; i < RARRAY_LEN(ary); ++i) { - v = mrb_check_array_type(mrb, RARRAY_PTR(ary)[i]); + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < RARRAY_LEN(ary); i++) { + mrb_value v = mrb_check_array_type(mrb, RARRAY_PTR(ary)[i]); + mrb_gc_protect(mrb, v); // v may be removed from ary by mrb_equal() if (!mrb_nil_p(v) && RARRAY_LEN(v) > 0 && mrb_equal(mrb, RARRAY_PTR(v)[0], k)) return v; + mrb_gc_arena_restore(mrb, ai); } return mrb_nil_value(); } @@ -48,8 +95,8 @@ mrb_ary_assoc(mrb_state *mrb, mrb_value ary) * * Searches through the array whose elements are also arrays. Compares * _obj_ with the second element of each contained array using - * ==. Returns the first contained array that matches. See - * also Array#assoc. + * `==`. Returns the first contained array that matches. See + * also `Array#assoc`. * * a = [ [ 1, "one"], [2, "two"], [3, "three"], ["ii", "two"] ] * a.rassoc("two") #=> [2, "two"] @@ -57,18 +104,19 @@ mrb_ary_assoc(mrb_state *mrb, mrb_value ary) */ static mrb_value -mrb_ary_rassoc(mrb_state *mrb, mrb_value ary) +ary_rassoc(mrb_state *mrb, mrb_value ary) { - mrb_int i; - mrb_value v; mrb_value value = mrb_get_arg1(mrb); - for (i = 0; i < RARRAY_LEN(ary); ++i) { - v = RARRAY_PTR(ary)[i]; + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < RARRAY_LEN(ary); i++) { + mrb_value v = RARRAY_PTR(ary)[i]; + mrb_gc_protect(mrb, v); // v may be removed from ary by mrb_equal() if (mrb_array_p(v) && RARRAY_LEN(v) > 1 && mrb_equal(mrb, RARRAY_PTR(v)[1], value)) return v; + mrb_gc_arena_restore(mrb, ai); } return mrb_nil_value(); } @@ -78,8 +126,8 @@ mrb_ary_rassoc(mrb_state *mrb, mrb_value ary) * ary.at(index) -> obj or nil * * Returns the element at _index_. A - * negative index counts from the end of +self+. Returns +nil+ - * if the index is out of range. See also Array#[]. + * negative index counts from the end of `self`. Returns `nil` + * if the index is out of range. See also `Array#[]`. * * a = [ "a", "b", "c", "d", "e" ] * a.at(0) #=> "a" @@ -87,27 +135,39 @@ mrb_ary_rassoc(mrb_state *mrb, mrb_value ary) */ static mrb_value -mrb_ary_at(mrb_state *mrb, mrb_value ary) +ary_at(mrb_state *mrb, mrb_value ary) { - mrb_int pos; - mrb_get_args(mrb, "i", &pos); + mrb_int pos = mrb_as_int(mrb, mrb_get_arg1(mrb)); return mrb_ary_entry(ary, pos); } +/* Helper function for values_at - returns element at index n */ static mrb_value ary_ref(mrb_state *mrb, mrb_value ary, mrb_int n) { return mrb_ary_entry(ary, n); } +/* + * call-seq: + * ary.values_at(selector, ...) -> new_ary + * + * Returns an array containing the elements in `self` corresponding to the + * given `selector`(s). The selectors may be either integer indices or ranges. + * + * a = %w{ a b c d e f } + * a.values_at(1, 3, 5) # => ["b", "d", "f"] + * a.values_at(1, 3, 5, 7) # => ["b", "d", "f", nil] + * a.values_at(-1, -2, -2, -7) # => ["f", "e", "e", nil] + * a.values_at(4..6, 3...6) # => ["e", "f", nil, "d", "e", "f"] + */ + static mrb_value -mrb_ary_values_at(mrb_state *mrb, mrb_value self) +ary_values_at(mrb_state *mrb, mrb_value self) { - mrb_int argc; - const mrb_value *argv; - - mrb_get_args(mrb, "*", &argv, &argc); + mrb_int argc = mrb_get_argc(mrb); + const mrb_value *argv = mrb_get_argv(mrb); return mrb_get_values_at(mrb, self, RARRAY_LEN(self), argc, argv, ary_ref); } @@ -120,10 +180,10 @@ mrb_value mrb_ary_delete_at(mrb_state *mrb, mrb_value self); * ary.slice!(start, length) -> new_ary or nil * ary.slice!(range) -> new_ary or nil * - * Deletes the element(s) given by an +index+ (optionally up to +length+ - * elements) or by a +range+. + * Deletes the element(s) given by an `index` (optionally up to `length` + * elements) or by a `range`. * - * Returns the deleted object (or objects), or +nil+ if the +index+ is out of + * Returns the deleted object (or objects), or `nil` if the `index` is out of * range. * * a = [ "a", "b", "c" ] @@ -136,12 +196,10 @@ mrb_value mrb_ary_delete_at(mrb_state *mrb, mrb_value self); */ static mrb_value -mrb_ary_slice_bang(mrb_state *mrb, mrb_value self) +ary_slice_bang(mrb_state *mrb, mrb_value self) { struct RArray *a = mrb_ary_ptr(self); - mrb_int i, j, k, len, alen; - mrb_value *ptr; - mrb_value ary; + mrb_int i, len; mrb_ary_modify(mrb, a); @@ -149,105 +207,99 @@ mrb_ary_slice_bang(mrb_state *mrb, mrb_value self) mrb_value index = mrb_get_arg1(mrb); if (mrb_type(index) == MRB_TT_RANGE) { - if (mrb_range_beg_len(mrb, index, &i, &len, ARY_LEN(a), TRUE) == MRB_RANGE_OK) { - goto delete_pos_len; + if (mrb_range_beg_len(mrb, index, &i, &len, ARY_LEN(a), TRUE) != MRB_RANGE_OK) { + return mrb_nil_value(); } - return mrb_nil_value(); } - return mrb_ary_delete_at(mrb, self); + else { + return mrb_ary_delete_at(mrb, self); + } + } + else { + mrb_get_args(mrb, "ii", &i, &len); } - mrb_get_args(mrb, "ii", &i, &len); - delete_pos_len: - alen = ARY_LEN(a); + mrb_int alen = ARY_LEN(a); if (i < 0) i += alen; if (i < 0 || alen < i) return mrb_nil_value(); if (len < 0) return mrb_nil_value(); if (alen == i) return mrb_ary_new(mrb); if (len > alen - i) len = alen - i; - ary = mrb_ary_new_capa(mrb, len); - ptr = ARY_PTR(a); - for (j = i, k = 0; k < len; ++j, ++k) { - mrb_ary_push(mrb, ary, ptr[j]); - } + mrb_value ary = mrb_ary_new_from_values(mrb, len, ARY_PTR(a) + i); + + /* refresh pointer after mrb_ary_new_from_values */ + a = mrb_ary_ptr(self); - ptr += i; - for (j = i; j < alen - len; ++j) { - *ptr = *(ptr+len); - ++ptr; + for (int j = i; j < alen - len; j++) { + ARY_PTR(a)[j] = ARY_PTR(a)[j+len]; } mrb_ary_resize(mrb, self, alen - len); return ary; } -/* - * call-seq: - * ary.compact -> new_ary - * - * Returns a copy of +self+ with all +nil+ elements removed. - * - * [ "a", nil, "b", nil, "c", nil ].compact - * #=> [ "a", "b", "c" ] - */ - -static mrb_value -mrb_ary_compact(mrb_state *mrb, mrb_value self) -{ - mrb_value ary = mrb_ary_new(mrb); - mrb_int len = RARRAY_LEN(self); - mrb_value *p = RARRAY_PTR(self); - - for (mrb_int i = 0; i < len; ++i) { - if (!mrb_nil_p(p[i])) { - mrb_ary_push(mrb, ary, p[i]); - } - } - return ary; -} - /* * call-seq: * ary.compact! -> ary or nil * - * Removes +nil+ elements from the array. - * Returns +nil+ if no changes were made, otherwise returns - * ary. + * Removes `nil` elements from the array. + * Returns `nil` if no changes were made, otherwise returns + * *ary*. * * [ "a", nil, "b", nil, "c" ].compact! #=> [ "a", "b", "c" ] * [ "a", "b", "c" ].compact! #=> nil */ static mrb_value -mrb_ary_compact_bang(mrb_state *mrb, mrb_value self) +ary_compact_bang(mrb_state *mrb, mrb_value self) { struct RArray *a = mrb_ary_ptr(self); mrb_int i, j = 0; mrb_int len = ARY_LEN(a); - mrb_value *p = ARY_PTR(a); mrb_ary_modify(mrb, a); - for (i = 0; i < len; ++i) { - if (!mrb_nil_p(p[i])) { - if (i != j) p[j] = p[i]; + /* a is still valid here, as mrb_ary_modify only modifies the RArray struct, not reallocates it */ + /* Hoist pointer retrieval outside loop to avoid repeated conditionals */ + mrb_value *ptr = RARRAY_PTR(self); + for (i = 0; i < len; i++) { + if (!mrb_nil_p(ptr[i])) { + if (i != j) ptr[j] = ptr[i]; j++; } } if (i == j) return mrb_nil_value(); - if (j < len) ARY_SET_LEN(RARRAY(self), j); + ARY_SET_LEN(RARRAY(self), j); return self; } +/* + * call-seq: + * ary.compact -> new_ary + * + * Returns a copy of `self` with all `nil` elements removed. + * + * [ "a", nil, "b", nil, "c", nil ].compact + * #=> [ "a", "b", "c" ] + */ + +static mrb_value +ary_compact(mrb_state *mrb, mrb_value self) +{ + mrb_value ary = mrb_ary_dup(mrb, self); + ary_compact_bang(mrb, ary); + return ary; +} + /* * call-seq: * ary.rotate(count=1) -> new_ary * - * Returns a new array by rotating +self+ so that the element at +count+ is + * Returns a new array by rotating `self` so that the element at `count` is * the first element of the new array. * - * If +count+ is negative then it rotates in the opposite direction, starting - * from the end of +self+ where +-1+ is the last element. + * If `count` is negative then it rotates in the opposite direction, starting + * from the end of `self` where +-1+ is the last element. * * a = [ "a", "b", "c", "d" ] * a.rotate #=> ["b", "c", "d", "a"] @@ -256,14 +308,13 @@ mrb_ary_compact_bang(mrb_state *mrb, mrb_value self) * a.rotate(-3) #=> ["b", "c", "d", "a"] */ static mrb_value -mrb_ary_rotate(mrb_state *mrb, mrb_value self) +ary_rotate(mrb_state *mrb, mrb_value self) { mrb_int count=1; mrb_get_args(mrb, "|i", &count); mrb_value ary = mrb_ary_new(mrb); mrb_int len = RARRAY_LEN(self); - mrb_value *p = RARRAY_PTR(self); mrb_int idx; if (len <= 0) return ary; @@ -273,13 +324,16 @@ mrb_ary_rotate(mrb_state *mrb, mrb_value self) else { idx = count % len; } + /* Hoist pointer retrieval outside loop */ + mrb_value *ptr = RARRAY_PTR(self); for (mrb_int i = 0; i ary * - * Rotates +self+ in place so that the element at +count+ comes first, and - * returns +self+. + * Rotates `self` in place so that the element at `count` comes first, and + * returns `self`. * - * If +count+ is negative then it rotates in the opposite direction, starting - * from the end of the array where +-1+ is the last element. + * If `count` is negative then it rotates in the opposite direction, starting + * from the end of the array where `-1` is the last element. * * a = [ "a", "b", "c", "d" ] * a.rotate! #=> ["b", "c", "d", "a"] @@ -307,17 +361,17 @@ rev(mrb_value *p, mrb_int beg, mrb_int end) * a.rotate!(-3) #=> ["a", "b", "c", "d"] */ static mrb_value -mrb_ary_rotate_bang(mrb_state *mrb, mrb_value self) +ary_rotate_bang(mrb_state *mrb, mrb_value self) { mrb_int count=1; mrb_get_args(mrb, "|i", &count); struct RArray *a = mrb_ary_ptr(self); mrb_int len = ARY_LEN(a); - mrb_value *p = ARY_PTR(a); mrb_int idx; mrb_ary_modify(mrb, a); + mrb_value *p = ARY_PTR(a); if (len == 0 || count == 0) return self; if (count == 1) { mrb_value v = p[0]; @@ -348,20 +402,1349 @@ mrb_ary_rotate_bang(mrb_state *mrb, mrb_value self) return self; } +#define SET_OP_HASH_THRESHOLD 32 + +/* Helper functions for temporary khash sets */ +static void +ary_init_temp_set(mrb_state *mrb, ary_set_t *set, mrb_int capacity) +{ + kh_init_data(ary_set, mrb, set, (khint_t)(capacity > 0 ? capacity : 8)); +} + +static void +ary_populate_temp_set(mrb_state *mrb, ary_set_t *set, mrb_value ary) +{ + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < RARRAY_LEN(ary); i++) { + mrb_value p = RARRAY_PTR(ary)[i]; + mrb_gc_protect(mrb, p); // p may be removed from ary by kh_put(ary_set, ...) + kh_put(ary_set, mrb, set, p); + mrb_gc_arena_restore(mrb, ai); + } +} + +static void +ary_destroy_temp_set(mrb_state *mrb, ary_set_t *set) +{ + if (set) { + kh_destroy_data(ary_set, mrb, set); + } +} + + +static mrb_int +ary_get_array_args(mrb_state *mrb, mrb_int argc, const mrb_value **argv_ptr) +{ + mrb_int total_len = 0; + const mrb_value *argv = *argv_ptr; + mrb_value *converted_argv = (mrb_value *)mrb_alloca(mrb, sizeof(mrb_value) * argc); + + for (mrb_int i = 0; i < argc; i++) { + mrb_value other = mrb_check_array_type(mrb, argv[i]); + if (mrb_nil_p(other)) { + mrb_raise(mrb, E_TYPE_ERROR, "can't convert passed argument to Array"); + } + converted_argv[i] = other; + total_len += RARRAY_LEN(other); + } + *argv_ptr = converted_argv; + return total_len; +} + +struct ary_subtract_ctx { + ary_set_t *set; + mrb_value self; + mrb_value result; + const mrb_value *argv; + mrb_int argc; +}; + +static mrb_value +ary_subtract_body(mrb_state *mrb, void *data) +{ + struct ary_subtract_ctx *ctx = (struct ary_subtract_ctx *)data; + + for (mrb_int i = 0; i < ctx->argc; i++) { + ary_populate_temp_set(mrb, ctx->set, ctx->argv[i]); + } + + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < RARRAY_LEN(ctx->self); i++) { + mrb_value p = RARRAY_PTR(ctx->self)[i]; + mrb_gc_protect(mrb, p); // p may be removed from self by kh_get(ary_set, ...) + khiter_t k = kh_get(ary_set, mrb, ctx->set, p); + if (kh_is_end(ctx->set, k)) { /* key doesn't exist in any ary */ + mrb_ary_push(mrb, ctx->result, p); + } + mrb_gc_arena_restore(mrb, ai); + } + + return ctx->result; +} + +static mrb_value +ary_subtract_internal(mrb_state *mrb, mrb_value self, mrb_int argc, const mrb_value *argv) +{ + if (argc == 0) { + return mrb_ary_dup(mrb, self); + } + + mrb_int total_len = ary_get_array_args(mrb, argc, &argv); + + mrb_value result = mrb_ary_new(mrb); + + if (total_len > SET_OP_HASH_THRESHOLD) { + /* Create shared copies to protect elements during khash operations */ + mrb_value *argv_copies = (mrb_value *)mrb_alloca(mrb, sizeof(mrb_value) * argc); + for (mrb_int i = 0; i < argc; i++) { + argv_copies[i] = mrb_ary_make_shared_copy(mrb, argv[i]); + } + + ary_set_t set_struct; + ary_set_t *set = &set_struct; + ary_init_temp_set(mrb, set, total_len); + + struct ary_subtract_ctx ctx = { set, self, result, argv_copies, argc }; + MRB_ENSURE(mrb, result, ary_subtract_body, &ctx) { + ary_destroy_temp_set(mrb, set); + } + } + else { + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < RARRAY_LEN(self); i++) { + mrb_value p = RARRAY_PTR(self)[i]; + mrb_gc_protect(mrb, p); // p may be removed from self by mrb_equal() + mrb_bool found = FALSE; + for (mrb_int j = 0; j < argc; j++) { + for (mrb_int k = 0; k < RARRAY_LEN(argv[j]); k++) { + if (mrb_equal(mrb, p, RARRAY_PTR(argv[j])[k])) { + found = TRUE; + break; + } + } + if (found) break; + } + if (!found) { + mrb_ary_push(mrb, result, p); + } + mrb_gc_arena_restore(mrb, ai); + } + } + + return result; +} + +/* + * call-seq: + * ary - other_ary -> new_ary + * + * Returns a new array that is a copy of the original array, with any items + * that also appear in `other_ary` removed. + * + * [ 1, 1, 2, 2, 3, 3, 4, 5 ] - [ 1, 2, 4 ] #=> [ 3, 3, 5 ] + */ + +static mrb_value +ary_sub(mrb_state *mrb, mrb_value self) +{ + mrb_value other; + mrb_get_args(mrb, "A", &other); + return ary_subtract_internal(mrb, self, 1, &other); +} + +/* + * call-seq: + * ary.difference(other_ary, ...) -> new_ary + * + * Returns a new array that is a copy of the original array, removing all + * occurrences of any item that also appear in any of the `other_ary`s. + * The order is preserved from the original array. + * + * [1, 2, 3, 4, 5].difference([2, 4], [1, 5]) #=> [3] + */ +static mrb_value +ary_difference(mrb_state *mrb, mrb_value self) +{ + const mrb_value *argv; + mrb_int argc; + mrb_get_args(mrb, "*", &argv, &argc); + return ary_subtract_internal(mrb, self, argc, argv); +} + +static void +add_uniq(mrb_state *mrb, mrb_value item, mrb_value result) +{ + for (mrb_int i = 0; i < RARRAY_LEN(result); i++) { + if (mrb_eql(mrb, item, RARRAY_PTR(result)[i])) { + return; + } + } + mrb_ary_push(mrb, result, item); +} + +struct ary_union_ctx { + ary_set_t *set; + mrb_value self_copy; + mrb_value result; + const mrb_value *argv; + mrb_int argc; +}; + +static mrb_value +ary_union_body(mrb_state *mrb, void *data) +{ + struct ary_union_ctx *ctx = (struct ary_union_ctx *)data; + int ai = mrb_gc_arena_save(mrb); + + /* Add unique elements from self */ + for (mrb_int i = 0; i < RARRAY_LEN(ctx->self_copy); i++) { + mrb_value elem = RARRAY_PTR(ctx->self_copy)[i]; + mrb_gc_protect(mrb, elem); // elem may be removed from self_copy by kh_get(ary_set, ...) + khiter_t k = kh_get(ary_set, mrb, ctx->set, elem); + if (kh_is_end(ctx->set, k)) { + kh_put(ary_set, mrb, ctx->set, elem); + mrb_ary_push(mrb, ctx->result, elem); + } + mrb_gc_arena_restore(mrb, ai); + } + + /* Add unique elements from others */ + for (mrb_int i = 0; i < ctx->argc; i++) { + mrb_value other = ctx->argv[i]; + for (mrb_int j = 0; j < RARRAY_LEN(other); j++) { + mrb_value elem = RARRAY_PTR(other)[j]; + mrb_gc_protect(mrb, elem); // elem may be removed from other by kh_get(ary_set, ...) + khiter_t k = kh_get(ary_set, mrb, ctx->set, elem); + if (kh_is_end(ctx->set, k)) { + kh_put(ary_set, mrb, ctx->set, elem); + mrb_ary_push(mrb, ctx->result, elem); + } + mrb_gc_arena_restore(mrb, ai); + } + } + + return ctx->result; +} + +static mrb_value +ary_union_internal(mrb_state *mrb, mrb_value self, mrb_int argc, const mrb_value *argv) +{ + mrb_int total_len = ary_get_array_args(mrb, argc, &argv) + RARRAY_LEN(self); + + mrb_value result = mrb_ary_new(mrb); + + if (total_len > SET_OP_HASH_THRESHOLD) { + /* Create shared copies to protect elements during khash operations */ + mrb_value self_copy = mrb_ary_make_shared_copy(mrb, self); + mrb_value *argv_copies = (mrb_value *)mrb_alloca(mrb, sizeof(mrb_value) * argc); + for (mrb_int i = 0; i < argc; i++) { + argv_copies[i] = mrb_ary_make_shared_copy(mrb, argv[i]); + } + + ary_set_t set_struct; + ary_set_t *set = &set_struct; + ary_init_temp_set(mrb, set, total_len); + + struct ary_union_ctx ctx = { set, self_copy, result, argv_copies, argc }; + MRB_ENSURE(mrb, result, ary_union_body, &ctx) { + ary_destroy_temp_set(mrb, set); + } + } + else { + int ai = mrb_gc_arena_save(mrb); + + /* Use linear search for small arrays */ + /* Add unique elements from self */ + for (mrb_int i = 0; i < RARRAY_LEN(self); i++) { + mrb_value p = RARRAY_PTR(self)[i]; + mrb_gc_protect(mrb, p); // p may be removed from self by add_uniq() + add_uniq(mrb, p, result); + mrb_gc_arena_restore(mrb, ai); + } + + /* Add unique elements from others */ + for (mrb_int i = 0; i < argc; i++) { + mrb_value other = argv[i]; + for (mrb_int j = 0; j < RARRAY_LEN(other); j++) { + mrb_value p = RARRAY_PTR(other)[j]; + mrb_gc_protect(mrb, p); // p may be removed from other by add_uniq() + add_uniq(mrb, p, result); + mrb_gc_arena_restore(mrb, ai); + } + } + } + + return result; +} + +/* + * call-seq: + * ary | other_ary -> new_ary + * + * Set Union---Returns a new array by joining this array with + * `other_ary`, removing duplicates. + * + * [ "a", "b", "c" ] | [ "c", "d", "a" ] + * #=> [ "a", "b", "c", "d" ] + */ + +static mrb_value +ary_union(mrb_state *mrb, mrb_value self) +{ + mrb_value other; + mrb_get_args(mrb, "A", &other); + return ary_union_internal(mrb, self, 1, &other); +} + +/* + * call-seq: + * ary.union(other_ary,...) -> new_ary + * + * Set Union---Returns a new array by joining this array with + * `other_ary`s, removing duplicates. + * + * ["a", "b", "c"].union(["c", "d", "a"], ["a", "c", "e"]) + * #=> ["a", "b", "c", "d", "e"] + */ +static mrb_value +ary_union_multi(mrb_state *mrb, mrb_value self) +{ + const mrb_value *argv; + mrb_int argc; + mrb_get_args(mrb, "*", &argv, &argc); + return ary_union_internal(mrb, self, argc, argv); +} + +struct ary_intersection_ctx { + ary_set_t *set; + mrb_value self; + mrb_value result; + const mrb_value *argv; + mrb_int argc; +}; + +static mrb_value +ary_intersection_body(mrb_state *mrb, void *data) +{ + struct ary_intersection_ctx *ctx = (struct ary_intersection_ctx *)data; + + for (mrb_int i = 0; i < ctx->argc; i++) { + ary_populate_temp_set(mrb, ctx->set, ctx->argv[i]); + } + + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < RARRAY_LEN(ctx->self); i++) { + mrb_value p = RARRAY_PTR(ctx->self)[i]; + mrb_gc_protect(mrb, p); // p may be removed from self by kh_get(ary_set, ...) + khiter_t k = kh_get(ary_set, mrb, ctx->set, p); + if (!kh_is_end(ctx->set, k)) { + mrb_ary_push(mrb, ctx->result, p); + kh_del(ary_set, mrb, ctx->set, k); + } + mrb_gc_arena_restore(mrb, ai); + } + + return ctx->result; +} + +static mrb_value +ary_intersection_internal(mrb_state *mrb, mrb_value self, mrb_int argc, const mrb_value *argv) +{ + if (argc == 0) { + return mrb_ary_new(mrb); + } + + mrb_int total_len = ary_get_array_args(mrb, argc, &argv); + + mrb_value result = mrb_ary_new(mrb); + + if (total_len > SET_OP_HASH_THRESHOLD) { + /* Create shared copies to protect elements during khash operations */ + mrb_value *argv_copies = (mrb_value *)mrb_alloca(mrb, sizeof(mrb_value) * argc); + for (mrb_int i = 0; i < argc; i++) { + argv_copies[i] = mrb_ary_make_shared_copy(mrb, argv[i]); + } + + ary_set_t set_struct; + ary_set_t *set = &set_struct; + ary_init_temp_set(mrb, set, total_len); + + struct ary_intersection_ctx ctx = { set, self, result, argv_copies, argc }; + MRB_ENSURE(mrb, result, ary_intersection_body, &ctx) { + ary_destroy_temp_set(mrb, set); + } + } + else { + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < RARRAY_LEN(self); i++) { + mrb_value p = RARRAY_PTR(self)[i]; + mrb_gc_protect(mrb, p); // p may be removed from self by mrb_equal() + mrb_bool found_in_all = TRUE; + + for (mrb_int j = 0; j < argc; j++) { + mrb_bool found_in_current_other = FALSE; + for (mrb_int k = 0; k < RARRAY_LEN(argv[j]); k++) { + if (mrb_equal(mrb, p, RARRAY_PTR(argv[j])[k])) { + found_in_current_other = TRUE; + break; + } + } + if (!found_in_current_other) { + found_in_all = FALSE; + break; + } + } + + if (found_in_all) { + mrb_bool already_added = FALSE; + for (mrb_int j = 0; j < RARRAY_LEN(result); j++) { + if (mrb_equal(mrb, p, RARRAY_PTR(result)[j])) { + already_added = TRUE; + break; + } + } + if (!already_added) { + mrb_ary_push(mrb, result, p); + } + } + mrb_gc_arena_restore(mrb, ai); + } + } + return result; +} + +/* + * call-seq: + * ary & other_ary -> new_ary + * + * Set Intersection---Returns a new array + * containing elements common to the two arrays, with no duplicates. + * + * [ 1, 1, 3, 5 ] & [ 1, 2, 3 ] #=> [ 1, 3 ] + */ + +static mrb_value +ary_intersection(mrb_state *mrb, mrb_value self) +{ + mrb_value other; + mrb_get_args(mrb, "A", &other); + return ary_intersection_internal(mrb, self, 1, &other); +} + +/* + * call-seq: + * ary.intersection(other_ary,...) -> new_ary + * + * Set Intersection---Returns a new array containing elements common to + * this array and `other_ary`s, removing duplicates. The order is + * preserved from the original array. + * + * [1, 2, 3].intersection([3, 4, 1], [1, 3, 5]) #=> [1, 3] + */ +static mrb_value +ary_intersection_multi(mrb_state *mrb, mrb_value self) +{ + const mrb_value *argv; + mrb_int argc; + mrb_get_args(mrb, "*", &argv, &argc); + return ary_intersection_internal(mrb, self, argc, argv); +} + +/* + * call-seq: + * ary.intersect?(other_ary) -> true or false + * + * Returns `true` if the array and `other_ary` have at least one element in + * common, otherwise returns `false`. + * + * a = [ 1, 2, 3 ] + * b = [ 3, 4, 5 ] + * c = [ 5, 6, 7 ] + * a.intersect?(b) #=> true + * a.intersect?(c) #=> false + */ + +struct ary_intersect_p_ctx { + ary_set_t *set; + mrb_value shorter_ary_copy; + mrb_value longer_ary; + mrb_bool *found; +}; + +static mrb_value +ary_intersect_p_body(mrb_state *mrb, void *data) +{ + struct ary_intersect_p_ctx *ctx = (struct ary_intersect_p_ctx *)data; + + ary_populate_temp_set(mrb, ctx->set, ctx->shorter_ary_copy); + + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < RARRAY_LEN(ctx->longer_ary); i++) { + mrb_value p = RARRAY_PTR(ctx->longer_ary)[i]; + mrb_gc_protect(mrb, p); // p may be removed from longer_ary by kh_get(ary_set, ...) + khiter_t k = kh_get(ary_set, mrb, ctx->set, p); + mrb_gc_arena_restore(mrb, ai); + if (!kh_is_end(ctx->set, k)) { + *ctx->found = TRUE; + break; + } + } + + return mrb_nil_value(); +} + +static mrb_value +ary_intersect_p(mrb_state *mrb, mrb_value self) +{ + mrb_value other; + mrb_get_args(mrb, "A", &other); + + mrb_value shorter_ary, longer_ary; + if (RARRAY_LEN(self) > RARRAY_LEN(other)) { + shorter_ary = other; + longer_ary = self; + } + else { + shorter_ary = self; + longer_ary = other; + } + + if (RARRAY_LEN(shorter_ary) == 0 || RARRAY_LEN(longer_ary) == 0) { + return mrb_false_value(); + } + + if (RARRAY_LEN(shorter_ary) > SET_OP_HASH_THRESHOLD) { + mrb_value shorter_ary_copy = mrb_ary_make_shared_copy(mrb, shorter_ary); + + ary_set_t set_struct; + ary_set_t *set = &set_struct; + ary_init_temp_set(mrb, set, RARRAY_LEN(shorter_ary_copy)); + + mrb_bool found = FALSE; + + struct ary_intersect_p_ctx ctx = { set, shorter_ary_copy, longer_ary, &found }; + mrb_value result; + MRB_ENSURE(mrb, result, ary_intersect_p_body, &ctx) { + ary_destroy_temp_set(mrb, set); + } + + if (found) { + return mrb_true_value(); + } + } + else { + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < RARRAY_LEN(longer_ary); i++) { + mrb_value p = RARRAY_PTR(longer_ary)[i]; + mrb_gc_protect(mrb, p); // p may be removed from longer_ary by mrb_equal() + for (mrb_int j = 0; j < RARRAY_LEN(shorter_ary); j++) { + if (mrb_equal(mrb, p, RARRAY_PTR(shorter_ary)[j])) { + return mrb_true_value(); + } + } + mrb_gc_arena_restore(mrb, ai); + } + } + + return mrb_false_value(); +} + +/* + * Internal helper for Array#fill that handles all the complex + * argument parsing logic including ranges, negative indices, etc. + * Returns normalized [start, length] array for use by ary_fill_exec. + */ + +static mrb_value +ary_fill_parse_arg(mrb_state *mrb, mrb_value self) +{ + mrb_value arg0 = mrb_nil_value(), arg1 = mrb_nil_value(), arg2 = mrb_nil_value(); + mrb_value block = mrb_nil_value(); + mrb_int argc = mrb_get_args(mrb, "|ooo&", &arg0, &arg1, &arg2, &block); + + struct RArray *ary = mrb_ary_ptr(self); + mrb_int ary_len = ARY_LEN(ary); + mrb_int start = 0, length = 0; + + if (!mrb_nil_p(block)) { + if (argc == 0 || (argc >= 1 && mrb_nil_p(arg0))) { + /* fill { |index| block } */ + start = 0; + length = ary_len; + } + else if (argc >= 1 && mrb_range_p(arg0)) { + /* fill(range) { |index| block } */ + mrb_int range_beg, range_end; + + if (mrb_range_beg_len(mrb, arg0, &range_beg, &range_end, ary_len, 1)) { + start = range_beg; + length = range_end; + } + } + else if (argc >= 1 && !mrb_nil_p(arg0)) { + /* fill(start [, length]) { |index| block } */ + start = mrb_int(mrb, arg0); + if (start < 0) start += ary_len; + if (start < 0) start = 0; + + if (argc == 1 || mrb_nil_p(arg1)) { + length = ary_len - start; + } + else { + length = mrb_int(mrb, arg1); + if (length < 0) length = 0; + } + } + } + else { + if (argc >= 1 && !mrb_nil_p(arg0)) { + if (argc == 1 || (argc >= 2 && mrb_nil_p(arg1) && mrb_nil_p(arg2))) { + /* fill(obj) */ + start = 0; + length = ary_len; + } + else if (argc >= 2 && mrb_range_p(arg1)) { + /* fill(obj, range) */ + mrb_int range_beg, range_end; + + if (mrb_range_beg_len(mrb, arg1, &range_beg, &range_end, ary_len, 1)) { + start = range_beg; + length = range_end; + } + } + else if (argc >= 2 && !mrb_nil_p(arg1)) { + /* fill(obj, start [, length]) */ + start = mrb_int(mrb, arg1); + if (start < 0) start += ary_len; + if (start < 0) start = 0; + + if (argc == 2 || mrb_nil_p(arg2)) { + length = ary_len - start; + } + else { + length = mrb_int(mrb, arg2); + if (length < 0) length = 0; + } + } + } + } + + /* Return [start, length] array */ + mrb_value result = mrb_ary_new_capa(mrb, 2); + mrb_ary_push(mrb, result, mrb_fixnum_value(start)); + mrb_ary_push(mrb, result, mrb_fixnum_value(length)); + return result; +} + +/* + * Internal helper that fills a specific range of the array + * with the given object. Handles array extension if necessary. + * Used by Ruby-level Array#fill method. + */ + +static mrb_value +ary_fill_exec(mrb_state *mrb, mrb_value self) +{ + mrb_value obj; + mrb_int start, length; + + mrb_get_args(mrb, "iio", &start, &length, &obj); + + if (start < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative start index"); + } + if (length < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative length"); + } + + struct RArray *ary = mrb_ary_ptr(self); + mrb_int ary_len = ARY_LEN(ary); + + /* Extend array if necessary */ + if (start + length > ary_len) { + mrb_ary_resize(mrb, self, start + length); + ary = mrb_ary_ptr(self); /* refresh pointer after resize */ + } + + /* Ensure we don't go beyond array bounds */ + if (start >= ARY_LEN(ary) || length <= 0) return self; + if (start + length > ARY_LEN(ary)) { + length = ARY_LEN(ary) - start; + } + + /* Fill the array */ + mrb_ary_modify(mrb, ary); + mrb_value *ptr = ARY_PTR(ary) + start; + for (mrb_int i = 0; i < length; i++) { + ptr[i] = obj; + } + + return self; +} + +/* + * Internal helper for Array#uniq! without blocks. + * Modifies array in-place, returns nil if no changes. + */ +struct ary_uniq_bang_ctx { + ary_set_t *set; + mrb_value self_copy; + mrb_value self; + mrb_int *write_pos; +}; + +static mrb_value +ary_uniq_bang_body(mrb_state *mrb, void *data) +{ + struct ary_uniq_bang_ctx *ctx = (struct ary_uniq_bang_ctx *)data; + + ary_populate_temp_set(mrb, ctx->set, ctx->self_copy); + + int ai = mrb_gc_arena_save(mrb); + for (mrb_int read_pos = 0; read_pos < RARRAY_LEN(ctx->self); read_pos++) { + mrb_value elem = RARRAY_PTR(ctx->self)[read_pos]; + mrb_gc_protect(mrb, elem); // elem may be removed from self by kh_get(ary_set, ...) + khiter_t k = kh_get(ary_set, mrb, ctx->set, elem); + if (!kh_is_end(ctx->set, k)) { + if (*ctx->write_pos != read_pos && *ctx->write_pos < RARRAY_LEN(ctx->self)) { + mrb_ary_modify(mrb, mrb_ary_ptr(ctx->self)); + RARRAY_PTR(ctx->self)[*ctx->write_pos] = elem; + } + (*ctx->write_pos)++; + kh_del(ary_set, mrb, ctx->set, k); + mrb_gc_arena_restore(mrb, ai); + } + } + + return mrb_nil_value(); +} + +static mrb_value +ary_uniq_bang(mrb_state *mrb, mrb_value self) +{ + mrb_int len = RARRAY_LEN(self); + + if (len <= 1) { + return mrb_nil_value(); + } + + mrb_ary_modify(mrb, mrb_ary_ptr(self)); + mrb_int write_pos = 0; + + if (len > SET_OP_HASH_THRESHOLD) { + /* Create shared copy to protect elements during khash operations */ + mrb_value self_copy = mrb_ary_make_shared_copy(mrb, self); + + ary_set_t set_struct; + ary_set_t *set = &set_struct; + ary_init_temp_set(mrb, set, len); + + struct ary_uniq_bang_ctx ctx = { set, self_copy, self, &write_pos }; + mrb_value result; + MRB_ENSURE(mrb, result, ary_uniq_bang_body, &ctx) { + ary_destroy_temp_set(mrb, set); + } + } + else { + int ai = mrb_gc_arena_save(mrb); + for (mrb_int read_pos = 0; read_pos < RARRAY_LEN(self); read_pos++) { + mrb_value elem = RARRAY_PTR(self)[read_pos]; + mrb_gc_protect(mrb, elem); // elem may be removed from self by mrb_equal() + mrb_bool found = FALSE; + for (mrb_int j = 0; j < write_pos && j < RARRAY_LEN(self); j++) { + if (mrb_equal(mrb, elem, RARRAY_PTR(self)[j])) { + found = TRUE; + break; + } + } + if (!found) { + if (write_pos != read_pos && write_pos < RARRAY_LEN(self)) { + mrb_ary_modify(mrb, mrb_ary_ptr(self)); + RARRAY_PTR(self)[write_pos] = elem; + } + write_pos++; + } + mrb_gc_arena_restore(mrb, ai); + } + } + + if (write_pos == len) { + return mrb_nil_value(); + } + + mrb_ary_resize(mrb, self, write_pos); + return self; +} + +/* + * Internal helper for Array#uniq without blocks. + * Uses hash-based deduplication for large arrays, + * linear search for small arrays. + */ +static mrb_value +ary_uniq(mrb_state *mrb, mrb_value self) +{ + mrb_value ary = mrb_ary_dup(mrb, self); + ary_uniq_bang(mrb, ary); + return ary; +} + +/* Internal helper for flatten operations using iterative stack-based approach */ +static mrb_value +flatten_internal(mrb_state *mrb, mrb_value self, mrb_int level, mrb_bool *modified) +{ + *modified = FALSE; + mrb_value result = mrb_ary_new(mrb); + mrb_value stack = mrb_ary_new(mrb); + mrb_ary_push(mrb, stack, self); + mrb_ary_push(mrb, stack, mrb_fixnum_value(0)); // index + mrb_ary_push(mrb, stack, mrb_fixnum_value(1)); // depth + + while (RARRAY_LEN(stack) > 0) { + mrb_int depth = mrb_fixnum(mrb_ary_pop(mrb, stack)); + mrb_int idx = mrb_fixnum(mrb_ary_pop(mrb, stack)); + mrb_value ary = mrb_ary_pop(mrb, stack); + + while (idx < RARRAY_LEN(ary)) { + mrb_value e = mrb_ary_entry(ary, idx); + idx++; + + if (mrb_array_p(e) && (level < 0 || depth <= level)) { + *modified = TRUE; + // Push current state back + mrb_ary_push(mrb, stack, ary); + mrb_ary_push(mrb, stack, mrb_fixnum_value(idx)); + mrb_ary_push(mrb, stack, mrb_fixnum_value(depth)); + + // Push new array to process + ary = e; + idx = 0; + depth++; + } + else { + mrb_ary_push(mrb, result, e); + } + } + } + return result; +} + +/* + * call-seq: + * ary.flatten -> new_ary + * ary.flatten(level) -> new_ary + * + * Returns a new array that is a one-dimensional flattening of this + * array (recursively). That is, for every element that is an array, + * extract its elements into the new array. If the optional + * `level` argument determines the level of recursion to flatten. + * + * s = [ 1, 2, 3 ] #=> [1, 2, 3] + * t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]] + * a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10] + * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + * a = [ 1, 2, [3, [4, 5] ] ] + * a.flatten(1) #=> [1, 2, 3, [4, 5]] + */ +static mrb_value +ary_flatten(mrb_state *mrb, mrb_value self) +{ + mrb_int level = -1; + mrb_get_args(mrb, "|i", &level); + mrb_bool modified; // dummy + return flatten_internal(mrb, self, level, &modified); +} + +/* + * Internal helper for index normalization and bounds checking. + * Returns normalized index if in bounds, nil if out of bounds. + * Used by Ruby-level array methods. + */ + +static mrb_value +ary_normalize_index(mrb_state *mrb, mrb_value self) +{ + mrb_value index_val; + mrb_get_args(mrb, "o", &index_val); + + mrb_int index = mrb_as_int(mrb, index_val); + struct RArray *ary = mrb_ary_ptr(self); + mrb_int len = ARY_LEN(ary); + + // Handle negative indices + if (index < 0) { + index += len; + } + + // Check bounds + if (index >= 0 && index < len) { + return mrb_fixnum_value(index); + } + else { + return mrb_nil_value(); + } +} + +/* + * Internal helper for Array#fetch without blocks. + * Returns the element at index, or default if out of bounds. + * Raises IndexError if out of bounds and default equals none. + */ + +static mrb_value +ary_fetch(mrb_state *mrb, mrb_value self) +{ + mrb_value index_val, default_val, none; + mrb_get_args(mrb, "ooo", &index_val, &default_val, &none); + + // Convert index to integer + mrb_int index = mrb_as_int(mrb, index_val); + mrb_int original_index = index; // Keep original for error message + + struct RArray *ary = mrb_ary_ptr(self); + mrb_int len = ARY_LEN(ary); + + // Handle negative indices + if (index < 0) { + index += len; + } + + // Check bounds + if (index < 0 || index >= len) { + // Check if default is the NONE sentinel (means no default provided) + if (mrb_obj_equal(mrb, default_val, none)) { + // No default provided - raise IndexError + mrb_raisef(mrb, E_INDEX_ERROR, + "index %i outside of array bounds: %i...%i", + original_index, -len, len); + } + return default_val; + } + + // Return element at index + return ARY_PTR(ary)[index]; +} + +/* + * call-seq: + * ary.flatten! -> ary or nil + * ary.flatten!(level) -> array or nil + * + * Flattens `self` in place. Returns `nil` if no modifications were made + * (i.e., *ary* contains no subarrays.) If the optional `level` argument + * determines the level of recursion to flatten. + * + * a = [ 1, 2, [3, [4, 5] ] ] + * a.flatten! #=> [1, 2, 3, 4, 5] + * a.flatten! #=> nil + * a #=> [1, 2, 3, 4, 5] + * a = [ 1, 2, [3, [4, 5] ] ] + * a.flatten!(1) #=> [1, 2, 3, [4, 5]] + */ +static mrb_value +ary_flatten_bang(mrb_state *mrb, mrb_value self) +{ + mrb_int level = -1; + mrb_get_args(mrb, "|i", &level); + + mrb_ary_modify(mrb, mrb_ary_ptr(self)); + mrb_bool modified; + mrb_value result = flatten_internal(mrb, self, level, &modified); + + if (!modified) { + return mrb_nil_value(); + } + mrb_ary_replace(mrb, self, result); + return self; +} + +/* + * call-seq: + * ary.insert(index, obj...) -> ary + * + * Inserts the given values before the element with the given index. + * + * Negative indices count backwards from the end of the array, where -1 + * is the last element. If a negative index is used, the elements are + * inserted after that element. + * + * If the index is greater than the length of the array, the array is + * extended with nil elements. + * + * a = %w{ a b c d } + * a.insert(2, 99) #=> ["a", "b", 99, "c", "d"] + * a.insert(-2, 1, 2, 3) #=> ["a", "b", 99, "c", 1, 2, 3, "d"] + */ +static mrb_value +ary_insert(mrb_state *mrb, mrb_value self) +{ + mrb_int idx; + const mrb_value *argv; + mrb_int argc; + + mrb_get_args(mrb, "i*", &idx, &argv, &argc); + + if (argc == 0) { + return self; + } + + mrb_int len = RARRAY_LEN(self); + + if (idx < 0) { + idx += len + 1; + if (idx < 0) { + mrb_raisef(mrb, E_INDEX_ERROR, "index %i outside of array bounds", idx - (len + 1)); + } + } + + mrb_ary_modify(mrb, mrb_ary_ptr(self)); + + mrb_int new_len = (idx > len ? idx : len) + argc; + mrb_ary_resize(mrb, self, new_len); + + if (idx < len) { + memmove(RARRAY_PTR(self) + idx + argc, RARRAY_PTR(self) + idx, (len - idx) * sizeof(mrb_value)); + } + + for (mrb_int i = 0; i < argc; i++) { + mrb_ary_set(mrb, self, idx + i, argv[i]); + } + + return self; +} + +struct ary_product_generator { + mrb_int total; + mrb_int cursor; +}; + +static struct mrb_data_type ary_product_generator_type = { "ary_product_generator", mrb_free }; + +/* + * Internal helper for Array#product to construct a group array. + * Takes the base array (self), the array of other arrays (arys), + * the current iteration index (current_i), and the desired length + * of the group array (group_len). + */ +static mrb_value +ary_product_fetch(mrb_state *mrb, mrb_value self_ary, mrb_value arys_ary, mrb_int n) +{ + mrb_int j = RARRAY_LEN(arys_ary); // Corresponds to 'size' in Ruby + mrb_value group = mrb_ary_new_capa(mrb, j + 1 /* self_ary */); + + while (j-- > 0) { + mrb_value a = RARRAY_PTR(arys_ary)[j]; // arys[j] + mrb_check_type(mrb, a, MRB_TT_ARRAY); + mrb_int b = RARRAY_LEN(a); // a.size + if (b <= 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "cannot compute product with an empty array"); + } + mrb_ary_set(mrb, group, j + 1, RARRAY_PTR(a)[n % b]); + n /= b; + } + if (n >= RARRAY_LEN(self_ary)) { + mrb_raise(mrb, E_INDEX_ERROR, "index out of range"); + } + mrb_ary_set(mrb, group, 0, RARRAY_PTR(self_ary)[n]); + + return group; +} + +static mrb_value +ary_product_generate(mrb_state *mrb, mrb_value self) +{ + mrb_value arys_ary, block; + mrb_get_args(mrb, "A&", &arys_ary, &block); + + mrb_int total = RARRAY_LEN(self); + for (mrb_int i = 0; i < RARRAY_LEN(arys_ary); i++) { + mrb_value a = RARRAY_PTR(arys_ary)[i]; + mrb_check_type(mrb, a, MRB_TT_ARRAY); + mrb_int n = RARRAY_LEN(a); + if (n == 0) { + total = 0; + break; + } + if (mrb_int_mul_overflow(total, n, &total)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "result too big"); + } + } + + if (mrb_nil_p(block)) { + mrb_value result = mrb_ary_new_capa(mrb, total); + for (mrb_int i = 0; i < total; i++) { + mrb_value group = ary_product_fetch(mrb, self, arys_ary, i); + mrb_ary_push(mrb, result, group); + } + return result; + } + else if (total > 0) { + struct RData *d; + struct ary_product_generator *g; + Data_Make_Struct(mrb, mrb->object_class, struct ary_product_generator, + &ary_product_generator_type, g, d); + g->total = total; + g->cursor = 0; + return mrb_obj_value(d); + } + else { + return mrb_nil_value(); + } +} + +static mrb_value +ary_product_next(mrb_state *mrb, mrb_value self) +{ + mrb_value arys; + struct ary_product_generator *g; + mrb_get_args(mrb, "Ad", &arys, &g, &ary_product_generator_type); + + if (g->cursor >= g->total) { + return mrb_nil_value(); + } + + return ary_product_fetch(mrb, self, arys, g->cursor++); +} + +/* + * call-seq: + * ary.deconstruct -> ary + * + * Returns the array itself for pattern matching. + * + * This method is used by pattern matching to deconstruct arrays. + * It simply returns the array itself, allowing pattern matching + * to work with array elements. + * + * a = [1, 2, 3] + * a.deconstruct #=> [1, 2, 3] + * + * Pattern matching usage: + * case [1, 2, 3] + * in [x, y, z] + * # x=1, y=2, z=3 + * end + */ +static mrb_value +ary_deconstruct(mrb_state *mrb, mrb_value ary) +{ + return ary; +} + +enum { + comb_finished = 0, + comb_repeated_permutation = 1, + comb_repeated_combination = 2, + comb_permutation = 3, + comb_combination = 4 +}; + +/* + * Internal method to initialize combination state. + * Returns opaque state object for use by __combination_next. + */ +static mrb_value +ary_combination_init(mrb_state *mrb, mrb_value self) +{ + mrb_int k; + mrb_sym mode_sym; + + mrb_get_args(mrb, "ni", &mode_sym, &k); +#if MRB_INT_MAX > SIZE_MAX + if (k > SIZE_MAX) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "number too large"); + } +#endif + + if (k < 1 || RARRAY_LEN(self) < 1) { + return mrb_nil_value(); + } + + int mode; + switch (mode_sym) { + case MRB_SYM(repeated_permutation): + mode = comb_repeated_permutation; + break; + case MRB_SYM(repeated_combination): + mode = comb_repeated_combination; + break; + case MRB_SYM(permutation): + if (k > RARRAY_LEN(self)) { + return mrb_nil_value(); + } + mode = comb_permutation; + break; + case MRB_SYM(combination): + if (k > RARRAY_LEN(self)) { + return mrb_nil_value(); + } + mode = comb_combination; + break; + default: + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong mode"); + } + + struct RData *d; + struct mrb_combination_state *state; + Data_Make_Struct(mrb, mrb->object_class, struct mrb_combination_state, + &mrb_combination_state_type, state, d); + + state->k = k; + state->n = RARRAY_LEN(self); + state->mode = mode; + state->indices = (mrb_int*)mrb_calloc(mrb, k, sizeof(mrb_int)); + + if (mode == comb_permutation || mode == comb_combination) { + for (mrb_int i = 0; i < k; i++) { + state->indices[i] = i; + } + } + + return mrb_obj_value(d); +} + +static void +adjust_next_permutation_index(struct mrb_combination_state *state, mrb_int i) +{ + for (mrb_int j = i - 1; j >= 0; j--) { + if (state->indices[i] == state->indices[j]) { + state->indices[i]++; + j = i; + } + } +} + +/* + * Internal method to get next combination as index array. + * Returns array of indices or nil when iteration is complete. + */ +static mrb_value +ary_combination_next(mrb_state *mrb, mrb_value self) +{ + struct mrb_combination_state *state; + mrb_get_args(mrb, "d", &state, &mrb_combination_state_type); + + /* Check if iteration is complete */ + if (state->mode == comb_finished) return mrb_nil_value(); + + /* Validate array hasn't been modified during iteration */ + if (RARRAY_LEN(self) != state->n) { + mrb_raise(mrb, E_RUNTIME_ERROR, "array modified during iteration"); + } + + /* Validate current indices are still in bounds */ + for (mrb_int i = 0; i < state->k; i++) { + if (state->indices[i] >= state->n) { + state->mode = comb_finished; + mrb_free(mrb, state->indices); + state->indices = NULL; + return mrb_nil_value(); + } + } + + /* Build current combination */ + mrb_value result = mrb_ary_new_capa(mrb, state->k); + const mrb_value *p = RARRAY_PTR(self); + for (mrb_int i = 0; i < state->k; i++) { + mrb_ary_push(mrb, result, p[state->indices[i]]); + } + + switch (state->mode) { + case comb_repeated_permutation: + case comb_repeated_combination: + for (mrb_int i = state->k - 1; i >= 0; i--) { + state->indices[i]++; + if (state->indices[i] < state->n) { + /* Reset dependent indices */ + mrb_int reset = (state->mode == comb_repeated_permutation) ? 0 : state->indices[i]; + for (i++; i < state->k; i++) { + state->indices[i] = reset; + } + return result; + } + } + break; + case comb_permutation: + for (mrb_int i = state->k - 1; i >= 0; i--) { + state->indices[i]++; + + // adjust so that it does not overlap with the leading index + adjust_next_permutation_index(state, i); + + if (state->indices[i] < state->n) { + // adjust all trailing indexes to complete the function + for (i++; i < state->k; i++) { + state->indices[i] = 0; + adjust_next_permutation_index(state, i); + } + return result; + } + } + break; + case comb_combination: + for (mrb_int i = state->k - 1; i >= 0; i--) { + state->indices[i]++; + + if (state->indices[i] <= state->n - state->k + i) { + // replace each overflowed indices with an index incremented by 1 from the previous one + for (i++; i < state->k; i++) { + state->indices[i] = state->indices[i - 1] + 1; + } + return result; + } + } + break; + default: // it probably won’t happen, but just in case + result = mrb_nil_value(); + break; + } + + state->mode = comb_finished; + mrb_free(mrb, state->indices); + state->indices = NULL; + return result; +} + +/* ---------------------------*/ +static const mrb_mt_entry array_ext_rom_entries[] = { + MRB_MT_ENTRY(ary_assoc, MRB_SYM(assoc), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(ary_at, MRB_SYM(at), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(ary_rassoc, MRB_SYM(rassoc), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(ary_values_at, MRB_SYM(values_at), MRB_ARGS_ANY()), + MRB_MT_ENTRY(ary_slice_bang, MRB_SYM_B(slice), MRB_ARGS_ARG(1,1)), + MRB_MT_ENTRY(ary_compact, MRB_SYM(compact), MRB_ARGS_NONE()), + MRB_MT_ENTRY(ary_compact_bang, MRB_SYM_B(compact), MRB_ARGS_NONE()), + MRB_MT_ENTRY(ary_rotate, MRB_SYM(rotate), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(ary_rotate_bang, MRB_SYM_B(rotate), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(ary_sub, MRB_OPSYM(sub), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(ary_difference, MRB_SYM(difference), MRB_ARGS_ANY()), + MRB_MT_ENTRY(ary_union, MRB_OPSYM(or), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(ary_union_multi, MRB_SYM(union), MRB_ARGS_ANY()), + MRB_MT_ENTRY(ary_intersection, MRB_OPSYM(and), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(ary_intersection_multi, MRB_SYM(intersection), MRB_ARGS_ANY()), + MRB_MT_ENTRY(ary_intersect_p, MRB_SYM_Q(intersect), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(ary_fill_parse_arg, MRB_SYM(__fill_parse_arg), MRB_ARGS_ARG(0,4)), + MRB_MT_ENTRY(ary_fill_exec, MRB_SYM(__fill_exec), MRB_ARGS_REQ(3)), + MRB_MT_ENTRY(ary_uniq, MRB_SYM(__uniq), MRB_ARGS_NONE()), + MRB_MT_ENTRY(ary_uniq_bang, MRB_SYM_B(__uniq), MRB_ARGS_NONE()), + MRB_MT_ENTRY(ary_flatten, MRB_SYM(flatten), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(ary_flatten_bang, MRB_SYM_B(flatten), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(ary_normalize_index, MRB_SYM(__normalize_index), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(ary_fetch, MRB_SYM(__fetch), MRB_ARGS_REQ(3)), + MRB_MT_ENTRY(ary_insert, MRB_SYM(insert), MRB_ARGS_ARG(1,-1)), + MRB_MT_ENTRY(ary_deconstruct, MRB_SYM(deconstruct), MRB_ARGS_NONE()), + MRB_MT_ENTRY(ary_product_generate, MRB_SYM(__product_generate), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(ary_product_next, MRB_SYM(__product_next), MRB_ARGS_REQ(2)), + MRB_MT_ENTRY(ary_combination_init, MRB_SYM(__combination_init), MRB_ARGS_REQ(2)), + MRB_MT_ENTRY(ary_combination_next, MRB_SYM(__combination_next), MRB_ARGS_REQ(1)), +}; + void mrb_mruby_array_ext_gem_init(mrb_state* mrb) { struct RClass * a = mrb->array_class; - mrb_define_method(mrb, a, "assoc", mrb_ary_assoc, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, a, "at", mrb_ary_at, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, a, "rassoc", mrb_ary_rassoc, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, a, "values_at", mrb_ary_values_at, MRB_ARGS_ANY()); - mrb_define_method(mrb, a, "slice!", mrb_ary_slice_bang, MRB_ARGS_ARG(1,1)); - mrb_define_method(mrb, a, "compact", mrb_ary_compact, MRB_ARGS_NONE()); - mrb_define_method(mrb, a, "compact!", mrb_ary_compact_bang, MRB_ARGS_NONE()); - mrb_define_method(mrb, a, "rotate", mrb_ary_rotate, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, a, "rotate!", mrb_ary_rotate_bang, MRB_ARGS_OPT(1)); + MRB_MT_INIT_ROM(mrb, a, array_ext_rom_entries); } void diff --git a/mrbgems/mruby-array-ext/test/array.rb b/mrbgems/mruby-array-ext/test/array.rb index bd7a2dc458..fc175b8630 100644 --- a/mrbgems/mruby-array-ext/test/array.rb +++ b/mrbgems/mruby-array-ext/test/array.rb @@ -74,6 +74,7 @@ def assert_repeated_combination(exp, receiver, *args) end assert("Array#-") do + # Test basic functionality a = [1, 2, 3, 1] b = [1] c = 1 @@ -81,6 +82,50 @@ def assert_repeated_combination(exp, receiver, *args) assert_raise(TypeError) { a - c } assert_equal [2, 3], (a - b) assert_equal [1, 2, 3, 1], a + + # Test hash-based implementation (other_ary length > 32) + a = (1..50).to_a + b = (15..50).to_a # 36 elements > 32, triggers hash approach + result = a - b + expected = (1..14).to_a + + assert_equal expected, result + assert_equal 14, result.size + + # Test with larger removal set + a = (1..60).to_a + b = (20..55).to_a # 36 elements > 32, triggers hash approach + result = a - b + expected = (1..19).to_a + (56..60).to_a + + assert_equal expected, result + assert_equal 24, result.size + + # Test removing all elements + a = (1..20).to_a + b = (1..20).to_a + result = a - b + expected = [] + + assert_equal expected, result + assert_equal 0, result.size + + # Test removing no elements + a = (1..20).to_a + b = (30..50).to_a # 21 elements > 16, triggers hash approach + result = a - b + expected = (1..20).to_a + + assert_equal expected, result + assert_equal 20, result.size + + # Ensure original arrays are unchanged + original_a = (1..30).to_a + original_b = (10..25).to_a + result = original_a - original_b + assert_equal [1, 2, 3, 4, 5, 6, 7, 8, 9, 26, 27, 28, 29, 30], result + assert_equal (1..30).to_a, original_a + assert_equal (10..25).to_a, original_b end assert("Array#|") do @@ -93,6 +138,34 @@ def assert_repeated_combination(exp, receiver, *args) assert_equal [1, 2, 3, 1], a end +assert("Array#| with large arrays") do + # Test hash-based implementation (total length > 32) + a = (1..25).to_a + b = (20..45).to_a # total = 51 > 32, triggers hash approach + result = a | b + expected = (1..45).to_a + + assert_equal expected, result + assert_equal 45, result.size + + # Test with overlapping ranges + a = (1..20).to_a + b = (15..35).to_a # total = 41 > 32, triggers hash approach + result = a | b + expected = (1..35).to_a + + assert_equal expected, result + assert_equal 35, result.size + + # Ensure original arrays are unchanged + original_a = (1..20).to_a + original_b = (18..50).to_a + result = original_a | original_b + assert_equal (1..50).to_a, result + assert_equal (1..20).to_a, original_a + assert_equal (18..50).to_a, original_b +end + assert("Array#union") do a = [1, 2, 3, 1] b = [1, 4] @@ -119,6 +192,61 @@ def assert_repeated_combination(exp, receiver, *args) assert_equal [1, 2, 3, 1], a end +assert("Array#& with large arrays") do + # Test hash-based implementation (other_ary length > 32) + a = (1..50).to_a + b = (20..55).to_a # 36 elements > 32, triggers hash approach + result = a & b + expected = (20..50).to_a + + assert_equal expected, result + assert_equal 31, result.size + + # Test with larger intersection set + a = (1..60).to_a + b = (25..60).to_a # 36 elements > 32, triggers hash approach + result = a & b + expected = (25..60).to_a + + assert_equal expected, result + assert_equal 36, result.size + + # Test with duplicates in first array + a = [1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10] + b = (5..25).to_a # 21 elements > 16, triggers hash approach + result = a & b + expected = [5, 6, 7, 8, 9, 10] # no duplicates in result + + assert_equal expected, result + assert_equal 6, result.size + + # Test no intersection + a = (1..20).to_a + b = (30..50).to_a # 21 elements > 16, triggers hash approach + result = a & b + expected = [] + + assert_equal expected, result + assert_equal 0, result.size + + # Test complete intersection + a = (1..20).to_a + b = (1..20).to_a + result = a & b + expected = (1..20).to_a + + assert_equal expected, result + assert_equal 20, result.size + + # Ensure original arrays are unchanged + original_a = (1..30).to_a + original_b = (10..25).to_a + result = original_a & original_b + assert_equal (10..25).to_a, result + assert_equal (1..30).to_a, original_a + assert_equal (10..25).to_a, original_b +end + assert("Array#intersection") do a = [1, 2, 3, 1, 8, 6, 7, 8] b = [1, 4, 6, 8] @@ -135,6 +263,63 @@ def assert_repeated_combination(exp, receiver, *args) assert_false(a.intersect?(c)) end +assert("Array#intersect? with large arrays") do + # Test hash-based implementation (shorter array > 32) + a = (1..50).to_a + b = (40..75).to_a # 36 elements > 32, but a is longer so b is shorter + result = a.intersect?(b) + assert_true(result) # should find intersection at 40-50 + + # Test with larger arrays, no intersection + a = (1..30).to_a + b = (50..85).to_a # 36 elements > 32, triggers hash approach + result = a.intersect?(b) + assert_false(result) # no intersection + + # Test with first element matching (early termination) + a = (1..30).to_a + b = [1] + (50..70).to_a # 22 elements > 16, first element matches + result = a.intersect?(b) + assert_true(result) # should terminate early on first element + + # Test with last element matching + a = (1..30).to_a + b = (50..70).to_a + [30] # 22 elements > 16, last element matches + result = a.intersect?(b) + assert_true(result) # should find match at the end + + # Test empty arrays + a = [] + b = (1..20).to_a + result = a.intersect?(b) + assert_false(result) # empty array intersects with nothing + + a = (1..20).to_a + b = [] + result = a.intersect?(b) + assert_false(result) # intersecting with empty array + + # Test array size optimization (shorter array used for hash) + a = (1..5).to_a # shorter + b = (3..30).to_a # longer, 28 elements > 16 + result = a.intersect?(b) + assert_true(result) # should use a (shorter) for hash, find 3,4,5 + + # Test with duplicates + a = [1, 1, 2, 2, 3, 3] * 5 # 30 elements with duplicates + b = (25..50).to_a # 26 elements > 16, no intersection + result = a.intersect?(b) + assert_false(result) + + # Ensure original arrays are unchanged + original_a = (1..30).to_a + original_b = (25..50).to_a + result = original_a.intersect?(original_b) + assert_true(result) + assert_equal (1..30).to_a, original_a + assert_equal (25..50).to_a, original_b +end + assert("Array#flatten") do assert_equal [1, 2, "3", {4=>5}, :'6'], [1, 2, "3", {4=>5}, :'6'].flatten assert_equal [1, 2, 3, 4, 5, 6], [1, 2, [3, 4, 5], 6].flatten @@ -172,6 +357,26 @@ def assert_repeated_combination(exp, receiver, *args) a.fetch(100) { |i| ret = i } assert_equal 100, ret assert_raise(IndexError) { a.fetch(100) } + + # Additional edge cases + assert_equal "default", [].fetch(0, "default") + assert_equal "missing 5", ["a"].fetch(5) { |i| "missing #{i}" } + assert_equal "from block", ["a"].fetch(5, "default") { "from block" } + + # Error message format + begin + ["a", "b"].fetch(5) + assert_false true + rescue IndexError => e + assert_true e.message.include?("index 5 outside of array bounds: -2...2") + end +end + +assert("Array#fetch_values") do + a = [ 11, 22, 33, 44 ] + assert_equal([33, 11], a.fetch_values(2, 0)) + assert_raise(IndexError) { a.fetch_values(2, 5) } + assert_equal([33, 55], a.fetch_values(2, 5) { |i| i*11 }) end assert("Array#fill") do @@ -202,8 +407,14 @@ def assert_repeated_combination(exp, receiver, *args) assert_equal [1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6].fill('x', -2...-2) assert_equal [1, 2, 3, 4, 'x', 6], [1, 2, 3, 4, 5, 6].fill('x', -2..-2) assert_equal [1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6].fill('x', -2..0) + + # Test extending array + a = [1, 2] + assert_equal [1, 2, nil, nil, "x"], a.fill("x", 4, 1) end + + assert("Array#reverse_each") do a = [ "a", "b", "c", "d" ] b = [] @@ -262,12 +473,63 @@ def assert_repeated_combination(exp, receiver, *args) end assert("Array#insert") do - a = ["a", "b", "c", "d"] - assert_equal ["a", "b", 99, "c", "d"], a.insert(2, 99) - assert_equal ["a", "b", 99, "c", 1, 2, 3, "d"], a.insert(-2, 1, 2, 3) + # Basic insertion + a = [1, 2, 3] + assert_same a, a.insert(1, 99) + assert_equal [1, 99, 2, 3], a + + # Multiple elements + a = [1, 2, 3] + a.insert(2, 'a', 'b') + assert_equal [1, 2, 'a', 'b', 3], a - b = ["a", "b", "c", "d"] - assert_equal ["a", "b", "c", "d", nil, nil, 99], b.insert(6, 99) + # Negative index + a = [1, 2, 3, 4] + a.insert(-2, 99) + assert_equal [1, 2, 3, 99, 4], a + + # Negative index out of bounds + a = [1, 2, 3] + assert_raise(IndexError) { a.insert(-5, 99) } + assert_equal [1, 2, 3], a + + # Insertion beyond bounds (creates nils) + a = [1, 2] + a.insert(5, 99) + assert_equal [1, 2, nil, nil, nil, 99], a + + # Insertion at the end + a = [1, 2, 3] + a.insert(3, 99) + assert_equal [1, 2, 3, 99], a + + # Insertion into an empty array + a = [] + a.insert(0, 1, 2) + assert_equal [1, 2], a + + # Insertion into an empty array at a non-zero index + a = [] + a.insert(2, 99) + assert_equal [nil, nil, 99], a + + # No-op (inserting zero elements) + a = [1, 2, 3] + a.insert(1) + assert_equal [1, 2, 3], a + + # Return value is self + a = [1, 2, 3] + b = a.insert(1, 99) + assert_same a, b + + # Large array insertion + a = (0...1000).to_a + a.insert(500, "x") + assert_equal 1001, a.size + assert_equal "x", a[500] + assert_equal 499, a[499] + assert_equal 500, a[501] end assert("Array#bsearch") do @@ -361,11 +623,6 @@ def between(lo, x, hi) assert_raise(ArgumentError) { [[1]].to_h } end -assert("Array#index (block)") do - assert_nil (1..10).to_a.index { |i| i % 5 == 0 and i % 7 == 0 } - assert_equal 34, (1..100).to_a.index { |i| i % 5 == 0 and i % 7 == 0 } -end - assert("Array#dig") do h = [[[1]], 0] assert_equal(1, h.dig(0, 0, 0)) @@ -499,3 +756,94 @@ def between(lo, x, hi) assert_repeated_permutation([[]], a, 0) assert_repeated_permutation([], a, -1) end + +assert("Array#deconstruct") do + # Basic functionality - returns self + a = [1, 2, 3] + result = a.deconstruct + assert_equal([1, 2, 3], result) + assert_true(result.equal?(a)) + + # Empty array + b = [] + result_empty = b.deconstruct + assert_equal([], result_empty) + assert_true(result_empty.equal?(b)) + + # Mixed types + c = [1, "hello", :symbol, nil, true] + result_mixed = c.deconstruct + assert_equal([1, "hello", :symbol, nil, true], result_mixed) + assert_true(result_mixed.equal?(c)) + + # Nested arrays + d = [[1, 2], [3, 4], [5]] + result_nested = d.deconstruct + assert_equal([[1, 2], [3, 4], [5]], result_nested) + assert_true(result_nested.equal?(d)) +end + +assert("Array#find") do + # Basic find + assert_equal 3, [1, 2, 3, 4, 5].find { |x| x > 2 } + assert_equal 1, [1, 2, 3, 4, 5].find { |x| x < 2 } + + # No match returns nil + assert_nil [1, 2, 3].find { |x| x > 10 } + + # Empty array + assert_nil [].find { |x| x > 0 } + + # With ifnone callable + assert_equal 0, [1, 2, 3].find(->{ 0 }) { |x| x > 10 } + assert_equal "default", [1, 2, 3].find(->{ "default" }) { |x| x > 10 } + + # ifnone not called when match found + called = false + [1, 2, 3].find(->{ called = true; 0 }) { |x| x == 2 } + assert_false called + + # Returns first match + assert_equal 2, [1, 2, 2, 3].find { |x| x == 2 } + + # Works with different types + assert_equal "b", ["a", "b", "c"].find { |x| x == "b" } + assert_equal :bar, [:foo, :bar, :baz].find { |x| x == :bar } +end + +assert("Array#rfind") do + # Basic rfind - finds from end (first match scanning backwards) + assert_equal 5, [1, 2, 3, 4, 5].rfind { |x| x > 2 } # 5 is first match from end + assert_equal 5, [1, 2, 3, 4, 5].rfind { |x| x > 0 } # 5 is first match from end + + # Returns last occurrence when duplicates exist + a = [1, 2, 3, 2, 1] + assert_equal 2, a.rfind { |x| x == 2 } # finds the 2 at index 3 + + # No match returns nil + assert_nil [1, 2, 3].rfind { |x| x > 10 } + + # Empty array + assert_nil [].rfind { |x| x > 0 } + + # With ifnone callable + assert_equal 0, [1, 2, 3].rfind(->{ 0 }) { |x| x > 10 } + assert_equal "default", [1, 2, 3].rfind(->{ "default" }) { |x| x > 10 } + + # ifnone not called when match found + called = false + [1, 2, 3].rfind(->{ called = true; 0 }) { |x| x == 2 } + assert_false called + + # Compare find vs rfind - same result for unique match + arr = [1, 2, 3, 4, 3, 2, 1] + assert_equal 3, arr.find { |x| x == 3 } # first 3 (index 2) + assert_equal 3, arr.rfind { |x| x == 3 } # last 3 (index 4), same value + + # Different results with inequality - rfind scans from end + assert_equal 3, arr.find { |x| x >= 3 } # first >= 3 is 3 (at index 2) + assert_equal 3, arr.rfind { |x| x >= 3 } # scanning from end: 1,2,3 - 3 matches first + + # Works with different types + assert_equal "b", ["a", "b", "c", "b", "a"].rfind { |x| x > "a" } # scanning from end: a,b - b matches +end diff --git a/mrbgems/mruby-benchmark/README.md b/mrbgems/mruby-benchmark/README.md new file mode 100644 index 0000000000..35fa2a4331 --- /dev/null +++ b/mrbgems/mruby-benchmark/README.md @@ -0,0 +1,257 @@ +# mruby-benchmark + +Benchmarking and profiling tools for mruby. + +## Overview + +The `mruby-benchmark` gem provides simple and lightweight benchmarking capabilities for measuring execution time and memory usage in mruby applications. It is designed for embedded systems and resource-constrained environments. + +## Installation + +Add the following line to your `build_config.rb`: + +```ruby +conf.gem :core => 'mruby-benchmark' +``` + +## API + +### Benchmark Module + +The main interface for benchmarking operations. + +#### `Benchmark.measure { block }` → Benchmark::Tms + +Measures the execution time of the given block and returns a `Benchmark::Tms` object containing timing information. + +```ruby +result = Benchmark.measure do + # code to benchmark + 1000.times { "string interpolation: #{42}" } +end + +puts result # Prints formatted timing information +``` + +#### `Benchmark.realtime { block }` → Float + +Returns only the real (wall-clock) time in seconds as a floating-point number. + +```ruby +time = Benchmark.realtime do + sleep(0.1) +end + +puts "Took #{time} seconds" # => "Took 0.100... seconds" +``` + +#### `Benchmark.bm(label_width = 0) { |x| ... }` + +Performs formatted benchmark comparisons with aligned output. + +```ruby +Benchmark.bm(10) do |x| + x.report("array:") { 1000.times { [1, 2, 3, 4, 5] } } + x.report("hash:") { 1000.times { {a: 1, b: 2, c: 3} } } + x.report("string:") { 1000.times { "hello" * 100 } } +end +``` + +Output example: + +``` + user system total real +array: 0.010000 0.000000 0.010000 ( 0.012345) +hash: 0.015000 0.000000 0.015000 ( 0.016789) +string: 0.008000 0.000000 0.008000 ( 0.009012) +``` + +#### `Benchmark.measure(memory: true) { block }` → Benchmark::Tms + +Measures both execution time and memory allocation when `memory: true` is specified. + +```ruby +result = Benchmark.measure(memory: true) do + array = [] + 1000.times { |i| array << i } +end + +puts "Objects allocated: #{result.objects}" +puts "Memory used: #{result.memory} bytes" +``` + +### Benchmark::Tms Class + +Holds timing measurement results. Provides methods to access individual timing components. + +#### Attributes + +- `utime` - User CPU time in seconds (Float) +- `stime` - System CPU time in seconds (Float) +- `cutime` - User CPU time of child processes (Float, usually 0 in mruby) +- `cstime` - System CPU time of child processes (Float, usually 0 in mruby) +- `real` - Real (wall-clock) time in seconds (Float) +- `objects` - Number of objects allocated (Integer, when memory tracking enabled) +- `memory` - Memory allocated in bytes (Integer, when memory tracking enabled) + +#### Methods + +##### `total` → Float + +Returns the total CPU time (user + system). + +```ruby +result = Benchmark.measure { heavy_computation } +puts "Total CPU time: #{result.total} seconds" +``` + +##### `to_s` → String + +Returns formatted string representation of timing results. + +```ruby +result = Benchmark.measure { sleep(0.1) } +puts result.to_s +# => " 0.000000 0.000000 0.000000 ( 0.100123)" +``` + +##### `format(format_str)` → String + +Returns timing results formatted according to the format string. + +Format specifiers: + +- `%u` - User CPU time +- `%s` - System CPU time +- `%t` - Total CPU time +- `%r` - Real time +- `%o` - Objects allocated (if memory tracking enabled) +- `%m` - Memory allocated (if memory tracking enabled) +- `%n` - Label name + +```ruby +result = Benchmark.measure { computation } +puts result.format("Real: %rs, CPU: %ts") +# => "Real: 0.123s, CPU: 0.100s" +``` + +### Benchmark::Report Class + +Used within `Benchmark.bm` for formatted reporting. + +#### `report(label = "") { block }` + +Executes and reports on a single benchmark within a `bm` block. + +```ruby +Benchmark.bm do |x| + x.report("first test") { code1 } + x.report("second test") { code2 } +end +``` + +## Usage Examples + +### Basic Timing + +```ruby +require 'benchmark' + +# Simple timing +time = Benchmark.realtime do + sum = 0 + 1000000.times { |i| sum += i } +end +puts "Calculation took #{time} seconds" + +# Detailed timing +result = Benchmark.measure do + arr = (1..10000).to_a + arr.sort! +end +puts result +``` + +### Comparing Implementations + +```ruby +require 'benchmark' + +Benchmark.bm(15) do |x| + x.report("Array#each:") do + arr = (1..1000).to_a + sum = 0 + arr.each { |n| sum += n } + end + + x.report("Array#inject:") do + arr = (1..1000).to_a + arr.inject(0) { |sum, n| sum + n } + end + + x.report("Numeric#times:") do + sum = 0 + 1000.times { |n| sum += n } + end +end +``` + +### Memory Profiling + +```ruby +require 'benchmark' + +# Track memory allocation +result = Benchmark.measure(memory: true) do + strings = [] + 1000.times { |i| strings << "string_#{i}" } +end + +puts "Execution time: #{result.real}s" +puts "Objects created: #{result.objects}" +puts "Memory allocated: #{result.memory} bytes" +``` + +### Performance Testing in Tests + +```ruby +# In test files +assert('String concatenation performance') do + time = Benchmark.realtime do + 1000.times { "hello" + "world" } + end + + # Assert it completes within reasonable time + assert_true time < 0.1, "String concat should be fast" +end +``` + +## Implementation Notes + +### Time Measurement + +mruby-benchmark uses `Process.clock_gettime` (via mruby-time) for high-resolution timing when available. User and system CPU times are measured using platform-specific APIs where available, otherwise both are set to 0. + +### Memory Tracking + +Memory profiling uses `ObjectSpace.count_objects` (via mruby-objectspace) to track object allocation. Memory size estimation is based on typical object overhead and may not be exact for all platforms. + +### Limitations + +- Child process timing (`cutime`, `cstime`) is not supported in most mruby environments and always returns 0 +- System CPU time may not be available on all platforms +- Memory measurements are estimates and may not reflect actual heap usage +- GC activity during benchmarking may affect timing results + +## Dependencies + +- **mruby-time** - Required for timing measurements +- **mruby-objectspace** - Required for memory profiling + +## License + +MIT License + +## Authors + +mruby developers diff --git a/mrbgems/mruby-benchmark/mrbgem.rake b/mrbgems/mruby-benchmark/mrbgem.rake new file mode 100644 index 0000000000..56cfc6b16d --- /dev/null +++ b/mrbgems/mruby-benchmark/mrbgem.rake @@ -0,0 +1,10 @@ +MRuby::Gem::Specification.new('mruby-benchmark') do |spec| + spec.license = 'MIT' + spec.author = 'mruby developers' + spec.summary = 'benchmarking and profiling tools' + + spec.add_dependency('mruby-time', :core => 'mruby-time') + spec.add_dependency('mruby-objectspace', :core => 'mruby-objectspace') + spec.add_dependency('mruby-sprintf', :core => 'mruby-sprintf') + spec.add_dependency('mruby-io', :core => 'mruby-io') +end diff --git a/mrbgems/mruby-benchmark/mrblib/benchmark.rb b/mrbgems/mruby-benchmark/mrblib/benchmark.rb new file mode 100644 index 0000000000..ee3efea114 --- /dev/null +++ b/mrbgems/mruby-benchmark/mrblib/benchmark.rb @@ -0,0 +1,131 @@ +module Benchmark + # Timing measurement result + class Tms + attr_reader :utime, :stime, :cutime, :cstime, :real + attr_reader :objects, :memory + + def initialize(utime, stime, cutime, cstime, real, label = nil, objects = nil, memory = nil) + @utime = utime + @stime = stime + @cutime = cutime + @cstime = cstime + @real = real + @label = label + @objects = objects + @memory = memory + end + + def total + @utime + @stime + @cutime + @cstime + end + + def to_s + "%10.6f %10.6f %10.6f (%10.6f)\n" % [@utime, @stime, total, @real] + end + + def format(format_str) + str = format_str.dup + str.gsub!('%u', @utime.to_s) + str.gsub!('%s', @stime.to_s) + str.gsub!('%t', total.to_s) + str.gsub!('%r', @real.to_s) + str.gsub!('%o', @objects.to_s) if @objects + str.gsub!('%m', @memory.to_s) if @memory + str.gsub!('%n', @label.to_s) if @label + str + end + end + + # Report class for formatted benchmark output + class Report + def initialize(width = 0) + @width = width + @results = [] + end + + def report(label = "") + tms = Benchmark.measure { yield } + # Create new Tms with label set + tms = Benchmark::Tms.new(tms.utime, tms.stime, tms.cutime, tms.cstime, + tms.real, label, tms.objects, tms.memory) + + label_str = label.to_s + if label_str.length < @width + label_str = label_str + " " * (@width - label_str.length) + end + + if $stdout + $stdout.print label_str + $stdout.print tms.to_s + end + + @results << tms + tms + end + + def results + @results + end + end + + # Measure execution time of a block + def self.measure(memory: false) + start_time = Time.now + start_objects = nil + start_count = nil + + if memory + if Object.const_defined?(:ObjectSpace) + start_count = ObjectSpace.count_objects + start_objects = start_count.values.inject(0) {|sum, n| sum + n } + end + end + + yield + + end_time = Time.now + real = end_time - start_time + + objects_allocated = nil + memory_allocated = nil + + if memory && start_count + end_count = ObjectSpace.count_objects + end_objects = end_count.values.inject(0) {|sum, n| sum + n } + objects_allocated = end_objects - start_objects + + # Estimate memory based on object count + # Average object overhead in mruby (approximate) + memory_allocated = objects_allocated * 40 + end + + # mruby typically doesn't have per-process CPU time + # Set user/system times to 0 + Tms.new(0.0, 0.0, 0.0, 0.0, real, nil, objects_allocated, memory_allocated) + end + + # Return only real time as a float + def self.realtime + start_time = Time.now + yield + end_time = Time.now + end_time - start_time + end + + # Formatted benchmark with labeled reports + def self.bm(label_width = 0) + report = Report.new(label_width) + + # Print header + if $stdout + if label_width > 0 + $stdout.print " " * label_width + end + $stdout.puts " user system total real" + end + + yield report + + report + end +end diff --git a/mrbgems/mruby-benchmark/test/benchmark.rb b/mrbgems/mruby-benchmark/test/benchmark.rb new file mode 100644 index 0000000000..f87f516a39 --- /dev/null +++ b/mrbgems/mruby-benchmark/test/benchmark.rb @@ -0,0 +1,244 @@ +## +# Benchmark Test + +assert('Benchmark') do + assert_equal(Module, Benchmark.class) +end + +assert('Benchmark::Tms') do + assert_equal(Class, Benchmark::Tms.class) +end + +assert('Benchmark::Report') do + assert_equal(Class, Benchmark::Report.class) +end + +assert('Benchmark.measure') do + result = Benchmark.measure do + sum = 0 + 100.times { |i| sum += i } + end + + assert_kind_of(Benchmark::Tms, result) + assert_kind_of(Float, result.utime) + assert_kind_of(Float, result.stime) + assert_kind_of(Float, result.cutime) + assert_kind_of(Float, result.cstime) + assert_kind_of(Float, result.real) + + # For mruby, CPU times are typically 0 + assert_equal(0.0, result.utime) + assert_equal(0.0, result.stime) + assert_equal(0.0, result.cutime) + assert_equal(0.0, result.cstime) + + # Real time should be positive + assert_true(result.real >= 0) +end + +assert('Benchmark.measure with actual delay') do + result = Benchmark.measure do + # Create some objects to ensure measurable time + arr = [] + 1000.times { |i| arr << i } + end + + # Real time should be measurable (greater than 0) + assert_true(result.real > 0) +end + +assert('Benchmark.realtime') do + time = Benchmark.realtime do + sum = 0 + 100.times { |i| sum += i } + end + + assert_kind_of(Float, time) + assert_true(time >= 0) +end + +assert('Benchmark.realtime with actual delay') do + time = Benchmark.realtime do + arr = [] + 1000.times { |i| arr << i } + end + + assert_true(time > 0) +end + +assert('Benchmark::Tms#total') do + tms = Benchmark::Tms.new(1.0, 2.0, 3.0, 4.0, 5.0) + assert_equal(10.0, tms.total) +end + +assert('Benchmark::Tms#to_s') do + tms = Benchmark::Tms.new(1.234567, 2.345678, 3.456789, 4.567890, 5.678901) + str = tms.to_s + + assert_kind_of(String, str) + # Should contain formatted numbers + assert_true(str.include?('1.234567')) + assert_true(str.include?('2.345678')) +end + +assert('Benchmark::Tms#format') do + tms = Benchmark::Tms.new(1.5, 2.5, 0.0, 0.0, 10.0) + + result = tms.format("user: %u, system: %s, total: %t, real: %r") + assert_equal("user: 1.5, system: 2.5, total: 4.0, real: 10.0", result) +end + +assert('Benchmark::Tms attributes') do + tms = Benchmark::Tms.new(1.0, 2.0, 3.0, 4.0, 5.0) + + assert_equal(1.0, tms.utime) + assert_equal(2.0, tms.stime) + assert_equal(3.0, tms.cutime) + assert_equal(4.0, tms.cstime) + assert_equal(5.0, tms.real) +end + +assert('Benchmark.bm') do + # Suppress output during test + old_stdout = $stdout + $stdout = nil + + results = [] + + report = Benchmark.bm(10) do |x| + results << x.report("test1") { 100.times { 1 + 1 } } + results << x.report("test2") { 100.times { 2 * 2 } } + end + + $stdout = old_stdout + + assert_kind_of(Benchmark::Report, report) + assert_equal(2, results.length) + assert_kind_of(Benchmark::Tms, results[0]) + assert_kind_of(Benchmark::Tms, results[1]) + + # Verify results were stored in report + assert_equal(2, report.results.length) +end + +assert('Benchmark.bm without label width') do + # Suppress output during test + old_stdout = $stdout + $stdout = nil + + report = Benchmark.bm do |x| + x.report { 50.times { 1 + 1 } } + end + + $stdout = old_stdout + + assert_kind_of(Benchmark::Report, report) + assert_equal(1, report.results.length) +end + +assert('Benchmark::Report#report') do + # Suppress output during test + old_stdout = $stdout + $stdout = nil + + report = Benchmark::Report.new(5) + result = report.report("test") { 100.times { 1 + 1 } } + + $stdout = old_stdout + + assert_kind_of(Benchmark::Tms, result) + assert_equal(1, report.results.length) +end + +assert('Benchmark.measure with memory tracking') do + skip unless Object.const_defined?(:ObjectSpace) + + result = Benchmark.measure(memory: true) do + arr = [] + # Create actual objects (strings) that will be tracked + 100.times { |i| arr << "string_#{i}" } + end + + assert_kind_of(Benchmark::Tms, result) + assert_kind_of(Integer, result.objects) + assert_kind_of(Integer, result.memory) + + # Memory tracking is best-effort, just verify the attributes exist + # and have reasonable values (non-negative) + assert_true(result.objects >= 0) + assert_true(result.memory >= 0) +end + +assert('Benchmark.measure without memory tracking') do + result = Benchmark.measure(memory: false) do + arr = [] + 100.times { |i| arr << i } + end + + assert_kind_of(Benchmark::Tms, result) + assert_nil(result.objects) + assert_nil(result.memory) +end + +assert('Benchmark::Tms with memory attributes') do + tms = Benchmark::Tms.new(1.0, 2.0, 3.0, 4.0, 5.0, "label", 1000, 40000) + + assert_equal(1000, tms.objects) + assert_equal(40000, tms.memory) +end + +assert('Benchmark::Tms#format with memory') do + tms = Benchmark::Tms.new(1.0, 2.0, 0.0, 0.0, 5.0, "test", 100, 4000) + + result = tms.format("Objects: %o, Memory: %m bytes, Label: %n") + assert_equal("Objects: 100, Memory: 4000 bytes, Label: test", result) +end + +assert('Benchmark comparison example') do + # Suppress output during test + old_stdout = $stdout + $stdout = nil + + results = [] + + Benchmark.bm(15) do |x| + results << x.report("array creation") do + 100.times { [1, 2, 3, 4, 5] } + end + + results << x.report("hash creation") do + 100.times { {a: 1, b: 2, c: 3} } + end + end + + $stdout = old_stdout + + assert_equal(2, results.length) + # Both should have measurable real time + assert_true(results[0].real >= 0) + assert_true(results[1].real >= 0) +end + +assert('Benchmark.measure consistency') do + # Measure the same operation twice + result1 = Benchmark.measure { 500.times { 1 + 1 } } + result2 = Benchmark.measure { 500.times { 1 + 1 } } + + # Both should return valid Tms objects + assert_kind_of(Benchmark::Tms, result1) + assert_kind_of(Benchmark::Tms, result2) + + # Real times should be positive + assert_true(result1.real >= 0) + assert_true(result2.real >= 0) +end + +assert('Benchmark.realtime consistency') do + time1 = Benchmark.realtime { 500.times { 1 + 1 } } + time2 = Benchmark.realtime { 500.times { 1 + 1 } } + + assert_kind_of(Float, time1) + assert_kind_of(Float, time2) + assert_true(time1 >= 0) + assert_true(time2 >= 0) +end diff --git a/mrbgems/mruby-bigint/README-fgmp.md b/mrbgems/mruby-bigint/README-fgmp.md index 3e5400dd54..de0adfa691 100644 --- a/mrbgems/mruby-bigint/README-fgmp.md +++ b/mrbgems/mruby-bigint/README-fgmp.md @@ -10,7 +10,7 @@ with the same API. For instance, you can link the following trivial program with either this code, or libgmp.a and get the same results. -``` C +```c #include #include "gmp.h" main() @@ -46,17 +46,17 @@ Mark Henderson # This is the fifth BETA release. 1.0b5 -I hearby place this file and all of FGMP in the public domain. +I hereby place this file and all of FGMP in the public domain. Thanks to Paul Rouse for changes to get fgmp to work on a 286 MS-DOS compiler, the functions mpz_sqrt and -mpz_sqrtrem, plus other general bug fixes. +mpz_sqrtrem, plus other general bugfixes. Thanks also to Erick Gallesio for a fix to mpz_init_set_str -Define B64 if your "long" type is 64 bits. Otherwise we assume 32 -bit longs. (The 64 bit version hasn't been tested enough) +Define B64 if your "long" type is 64 bits. Otherwise, we assume 32 +bit longs. (The 64-bit version hasn't been tested enough) ``` Platforms: @@ -73,25 +73,25 @@ MS-DOS 286 C compiler (see credits above) 1. fgmp is considerably slower than gmp 2. fgmp does not implement the following: - all mpq_* - internal mpn_* functions - mpz_perfect_square_p - mpz_inp_raw, mpz_out_raw - mp_set_memory_functions, mpz_out_str, mpz_inp_str + - all mpq\_\* + - internal mpn\_\* functions + - mpz_perfect_square_p + - mpz_inp_raw, mpz_out_raw + - mp_set_memory_functions, mpz_out_str, mpz_inp_str 3. fgmp implements the following in addition to the routines in GNU gmp. - `int mpz_jacobi(MP_INT *a, MP_INT *b)` - - finds the jacobi symbol (a/b) + `int mpz_jacobi(MP_INT *a, MP_INT *b)` + - finds the jacobi symbol (a/b) 4. mpz_sizeinbase often overestimates the exact value 5. To convert your gmp based program to fgmp (subject to the -above) + above) - recompile your source. Make sure to include the gmp.h file included with fgmp rather than that included with gmp. (The point is to recompile all files which include gmp.h) - link with gmp.o instead of libgmp.a -Here's a complete sorted list of function implemented in fgmp: +Here's a completely sorted list of functions implemented in fgmp: ``` _mpz_realloc diff --git a/mrbgems/mruby-bigint/README.md b/mrbgems/mruby-bigint/README.md index 3b1e1d9101..a1138e8de3 100644 --- a/mrbgems/mruby-bigint/README.md +++ b/mrbgems/mruby-bigint/README.md @@ -1,5 +1,44 @@ -# Multi-precision Integer extension for mruby +# mruby-bigint + +mruby-bigint is an mrbgem that provides multi-precision integer (BigInt) support for mruby. It allows you to work with integers that are larger than the standard Integer type can handle. This extension uses fgmp, which is a public domain implementation of a subset of the GNU gmp library by Mark Henderson . But it's heavily modified to fit with mruby. You can get the original source code from . You can read the original README for fgmp in [README-fgmp.md](README-fgmp.md). + +If you want to create your own Multi-precision Integer GEM, see [examples/mrbgems/mruby-YOUR-bigint/TODO-HINT.md](../../examples/mrbgems/mruby-YOUR-bigint/TODO-HINT.md). + +## Features + +- Basic arithmetic operations: `+`, `-`, `*`, `/`, `%` +- Power operation: `**` +- Modular exponentiation +- Bitwise operations: `&`, `|`, `^`, `<<`, `>>` +- Comparison: `<=>` +- Conversion to and from strings: `to_s`, `String#to_i` (with base) +- Square root +- Greatest Common Divisor (GCD) (available if `MRB_USE_RATIONAL` is defined) + +## Usage + +Here are some simple examples of how to use mruby-bigint: + +```ruby +# Creating BigInts +a = BigInt(12345678901234567890) +b = "98765432109876543210".to_i(10) # Specify base 10 for string conversion + +# Arithmetic operations +c = a + b +puts c.to_s # Output: 111111111011111111100 + +d = a * 2 +puts d.to_s # Output: 24691357802469135780 + +# Comparison +puts a <=> b # Output: -1 +``` + +## fgmp Dependency + +mruby-bigint depends on fgmp. For more information about fgmp, please see [README-fgmp.md](README-fgmp.md). diff --git a/mrbgems/mruby-bigint/core/bigint.c b/mrbgems/mruby-bigint/core/bigint.c index dd6dbd7d2b..839798b2e0 100644 --- a/mrbgems/mruby-bigint/core/bigint.c +++ b/mrbgems/mruby-bigint/core/bigint.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include "bigint.h" @@ -24,99 +25,321 @@ #define imin(x,y) (((x)<(y))?(x):(y)) #define dg(x,i) (((size_t)i < (x)->sz)?(x)->p[i]:0) +#ifndef MRB_BIGINT_POOL_SIZE +#define MRB_BIGINT_POOL_SIZE 512 /* 2KB on 32-bit, 4KB on 64-bit */ +#endif + +/* Scoped Memory Pool Infrastructure */ +#if MRB_BIGINT_POOL_SIZE == 0 +#define mpz_ctx_t mrb_state +#define MPZ_MRB(ctx) (ctx) +#define MPZ_HAS_POOL(ctx) (0) +#define MPZ_CTX_INIT(mrb_ptr, ctx, pool_ptr) mrb_state *ctx = (mrb_ptr); +#define pool_save(ctx) 0 +#define pool_restore(ctx, state) (void)state +#define pool_alloc(pool, limbs) NULL +#else +typedef struct mpz_pool { + mp_limb data[MRB_BIGINT_POOL_SIZE]; + size_t used; +} mpz_pool_t; + +/* MPZ Context Architecture - unified parameter for mrb_state and optional pool */ +typedef struct mpz_context { + mrb_state *mrb; + mpz_pool_t *pool; /* NULL for heap-only operations */ +} mpz_ctx_t; + +/* Convenience macros for context creation. + * Uses positional aggregate initialization instead of a C99 compound + * literal with designated initializers, so the file compiles as C++ + * on legacy toolchains (pre-C++20). Member order must match the + * mpz_context struct declaration above. */ +#define MPZ_CTX_INIT(mrb_ptr, ctx, pool_ptr) \ + mpz_pool_t pool ## _storage = {{0}};\ + mpz_pool_t *pool_ptr = &pool ## _storage;\ + mpz_ctx_t ctx ## _struct = { (mrb_ptr), (pool_ptr) }; \ + mpz_ctx_t *ctx = &(ctx ## _struct); + +/* Access macros for readability */ +#define MPZ_MRB(ctx) ((ctx)->mrb) +#define MPZ_POOL(ctx) ((ctx)->pool) +#define MPZ_HAS_POOL(ctx) ((ctx)->pool != NULL) + +static size_t +pool_save(mpz_ctx_t *ctx) +{ + mpz_pool_t *pool = MPZ_POOL(ctx); + return pool ? pool->used : 0; +} + +static void +pool_restore(mpz_ctx_t *ctx, size_t state) +{ + mpz_pool_t *pool = MPZ_POOL(ctx); + if (pool) { + pool->used = state; + } +} + +/* Forward declarations */ +static void mpz_mul_2exp(mpz_ctx_t *ctx, mpz_t *z, mpz_t *x, mrb_int e); +static void mpz_div_2exp(mpz_ctx_t *ctx, mpz_t *z, mpz_t *x, mrb_int e); +static void mpz_mod_2exp(mpz_ctx_t *ctx, mpz_t *z, mpz_t *x, mrb_int e); +static void mpz_set_int(mpz_ctx_t *ctx, mpz_t *y, mrb_int v); +static void mpz_mul(mpz_ctx_t *ctx, mpz_t *ww, mpz_t *u, mpz_t *v); + +static mp_limb* +pool_alloc(mpz_pool_t *pool, size_t limbs) +{ + if (!pool || pool->used + limbs > MRB_BIGINT_POOL_SIZE) { + return NULL; /* Force fallback to heap */ + } + + mp_limb *ptr = &pool->data[pool->used]; + pool->used += limbs; + return ptr; +} +#endif + +/* Zero n limbs at p */ +static inline void +mpn_zero(mp_limb *p, size_t n) +{ + memset(p, 0, n * sizeof(mp_limb)); +} + static void -mpz_init(mrb_state *mrb, mpz_t *s) +mpz_init(mpz_ctx_t *ctx, mpz_t *s) { s->p = NULL; - s->sn=0; - s->sz=0; + s->sn = 0; + s->sz = 0; +} + +/* Heap-preferred allocation */ +static void +mpz_init_heap(mpz_ctx_t *ctx, mpz_t *s, size_t hint) +{ + s->sn = 0; + if (hint > 0) { + /* Check for overflow in size calculation (same check as mpz_realloc) */ + if (hint > SIZE_MAX / sizeof(mp_limb)) { + mrb_state *mrb = MPZ_MRB(ctx); + mrb_raise(mrb, E_RUNTIME_ERROR, "bigint size too large"); + } + s->p = (mp_limb*)mrb_malloc(MPZ_MRB(ctx), hint * sizeof(mp_limb)); + mpn_zero(s->p, hint); + s->sz = hint; + } + else { + s->p = NULL; /* Lazy allocation via mpz_realloc later */ + s->sz = 0; + } +} + +#if MRB_BIGINT_POOL_SIZE > 0 +/* Pool-preferred allocation (future: mpz_init_temp) */ +static void +mpz_init_temp(mpz_ctx_t *ctx, mpz_t *s, size_t hint) +{ + s->sn = 0; + + + if (hint > 0 && MPZ_HAS_POOL(ctx)) { + mp_limb *pool_ptr = pool_alloc(MPZ_POOL(ctx), hint); + if (pool_ptr) { + s->p = pool_ptr; + s->sz = hint; + return; + } + } + /* Fallback to heap allocation */ + mpz_init_heap(ctx, s, hint); +} +#else +#define mpz_init_temp(ctx, s, hint) mpz_init_heap(ctx, s, hint) +#endif + +/* Check if mpz_t uses pool memory */ +#if MRB_BIGINT_POOL_SIZE > 0 +static int +is_pool_memory(mpz_t *z, mpz_pool_t *pool) +{ + if (!pool || !z->p) return 0; + uintptr_t ptr_addr = (uintptr_t)z->p; + uintptr_t pool_start = (uintptr_t)pool->data; + uintptr_t pool_end = pool_start + sizeof(pool->data); + return ptr_addr >= pool_start && ptr_addr < pool_end; } +#endif static void -mpz_realloc(mrb_state *mrb, mpz_t *x, size_t size) +mpz_realloc(mpz_ctx_t *ctx, mpz_t *x, size_t size) { if (x->sz < size) { - x->p=(mp_limb*)mrb_realloc(mrb, x->p, size*sizeof(mp_limb)); - for (size_t i=x->sz; ip[i] = 0; + /* Check for overflow in size calculation */ + if (size > SIZE_MAX / sizeof(mp_limb)) { + mrb_state *mrb = MPZ_MRB(ctx); + mrb_raise(mrb, E_RUNTIME_ERROR, "bigint size too large"); + } + + size_t old_sz = x->sz; + +#if MRB_BIGINT_POOL_SIZE > 0 + /* Pool memory cannot be reallocated - must use heap */ + if (MPZ_HAS_POOL(ctx) && is_pool_memory(x, MPZ_POOL(ctx))) { + /* Allocate new heap memory and copy from pool */ + mp_limb *new_p = (mp_limb*)mrb_malloc(MPZ_MRB(ctx), size * sizeof(mp_limb)); + if (x->p) { + memcpy(new_p, x->p, old_sz * sizeof(mp_limb)); + } + x->p = new_p; + } + else { +#endif + /* Regular heap reallocation */ + x->p = (mp_limb*)mrb_realloc(MPZ_MRB(ctx), x->p, size * sizeof(mp_limb)); +#if MRB_BIGINT_POOL_SIZE > 0 + } +#endif + + /* Zero-initialize new limbs */ + mpn_zero(x->p + old_sz, size - old_sz); x->sz = size; } } static void -mpz_set(mrb_state *mrb, mpz_t *y, mpz_t *x) +mpz_set(mpz_ctx_t *ctx, mpz_t *y, mpz_t *x) { size_t i, k = x->sz; - mpz_realloc(mrb, y, k); + mpz_realloc(ctx, y, k); for (i=0;i < k; i++) y->p[i] = x->p[i]; - for (;isz;i++) - y->p[i] = 0; - + y->sz = k; y->sn = x->sn; } static void -mpz_init_set(mrb_state *mrb, mpz_t *s, mpz_t *t) +mpz_init_set(mpz_ctx_t *ctx, mpz_t *s, mpz_t *t) { - mpz_init(mrb, s); - mpz_set(mrb, s, t); + mpz_init(ctx, s); + mpz_set(ctx, s, t); } static void -mpz_set_int(mrb_state *mrb, mpz_t *y, mrb_int v) +mpz_set_int(mpz_ctx_t *ctx, mpz_t *y, mrb_int v) { mrb_uint u; if (v == 0) { - y->sn=0; - u = 0; + y->sn = 0; + y->sz = 0; + return; } else if (v > 0) { y->sn = 1; u = v; } - if (v < 0) { + else /* if (v < 0) */ { y->sn = -1; if (v == MRB_INT_MIN) u = v; else u = -v; } #if MRB_INT_BIT > DIG_SIZE if ((u & ~DIG_MASK) != 0) { - mpz_realloc(mrb, y, 2); + mpz_realloc(ctx, y, 2); y->p[1] = (mp_limb)HIGH(u); y->p[0] = (mp_limb)LOW(u); + y->sz = 2; + return; } - else #endif - { - mpz_realloc(mrb, y, 1); - y->p[0] = (mp_limb)u; + mpz_realloc(ctx, y, 1); + y->p[0] = (mp_limb)u; + y->sz = 1; +} + + +static void +mpz_set_uint64(mpz_ctx_t *ctx, mpz_t *y, uint64_t u) +{ + size_t len = 0; + + for (uint64_t u0=u; u0; u0>>=DIG_SIZE,len++) + ; + y->sn = (u != 0); + mpz_realloc(ctx, y, len); + y->sz = len; + for (size_t i=0; ip[i] = (mp_limb)LOW(u); + u >>= DIG_SIZE; + } +} + +#ifdef MRB_INT32 +static void +mpz_set_int64(mpz_ctx_t *ctx, mpz_t *y, int64_t v) +{ + uint64_t u; + + if (v < 0) { + if (v == INT64_MIN) u = v; + else u = -v; + } + else { + u = v; + } + mpz_set_uint64(ctx, y, u); + if (v < 0) { + y->sn = -1; } } +#endif static void -mpz_init_set_int(mrb_state *mrb, mpz_t *y, mrb_int v) +mpz_init_set_int(mpz_ctx_t *ctx, mpz_t *y, mrb_int v) { - mpz_init(mrb, y); - mpz_set_int(mrb, y, v); + mpz_init(ctx, y); + mpz_set_int(ctx, y, v); } static void -mpz_clear(mrb_state *mrb, mpz_t *s) +mpz_clear(mpz_ctx_t *ctx, mpz_t *s) { - if (s->p) mrb_free(mrb, s->p); - s->p = NULL; + if (s->p) { +#if MRB_BIGINT_POOL_SIZE > 0 + if (MPZ_HAS_POOL(ctx) && is_pool_memory(s, MPZ_POOL(ctx))) { + /* Pool memory - don't free, just mark as unused */ + } + else { + mrb_free(MPZ_MRB(ctx), s->p); + } +#else + mrb_free(MPZ_MRB(ctx), s->p); +#endif + s->p = NULL; + } s->sn = 0; s->sz = 0; } static void -mpz_move(mrb_state *mrb, mpz_t *y, mpz_t *x) +mpz_move(mpz_ctx_t *ctx, mpz_t *y, mpz_t *x) { - mpz_clear(mrb, y); +#if MRB_BIGINT_POOL_SIZE > 0 + if (MPZ_HAS_POOL(ctx) && is_pool_memory(x, MPZ_POOL(ctx))) { + /* Source is pool memory - use deep copy instead of pointer transfer */ + mpz_set(ctx, y, x); + mpz_clear(ctx, x); + return; + } +#endif + /* Normal move: transfer ownership */ + mpz_clear(ctx, y); y->sn = x->sn; y->sz = x->sz; y->p = x->p; @@ -125,14 +348,22 @@ mpz_move(mrb_state *mrb, mpz_t *y, mpz_t *x) x->sz = 0; } +static inline void +mpz_swap(mpz_t *a, mpz_t *b) +{ + mpz_t tmp = *a; + *a = *b; + *b = tmp; +} + static size_t digits(mpz_t *x) { size_t i; if (x->sz == 0) return 0; - for (i = x->sz - 1; x->p[i] == 0 ; i--) - if (i == 0) break; + for (i = x->sz - 1; x->p[i] == 0 && i > 0; i--) + ; return i+1; } @@ -142,58 +373,59 @@ trim(mpz_t *x) while (x->sz && x->p[x->sz-1] == 0) { x->sz--; } + /* Maintain invariant: sz == 0 implies sn == 0 (zero is canonical). */ + if (x->sz == 0) x->sn = 0; } /* z = x + y, without regard for sign */ +/* Core addition algorithm for unsigned operands */ +/* Note: mpn_add/mpn_sub are defined later in the file, forward declare here */ +static mp_limb mpn_add(mp_limb*, const mp_limb*, size_t, const mp_limb*, size_t); +static mp_limb mpn_sub(mp_limb*, const mp_limb*, size_t, const mp_limb*, size_t); + static void -uadd(mrb_state *mrb, mpz_t *z, mpz_t *x, mpz_t *y) +uadd(mpz_t *z, mpz_t *x, mpz_t *y) { - if (y->sz < x->sz) { - mpz_t *t; /* swap x,y */ - t=x; x=y; y=t; - } + mp_limb carry; + size_t max_sz = (x->sz > y->sz) ? x->sz : y->sz; - /* now y->sz >= x->sz */ - mpz_realloc(mrb, z, y->sz+1); - - mp_dbl_limb c = 0; - size_t i; - for (i=0; isz; i++) { - c += (mp_dbl_limb)y->p[i] + (mp_dbl_limb)x->p[i]; - z->p[i] = LOW(c); - c >>= DIG_SIZE; + /* Ensure larger array is first argument to mpn_add */ + if (x->sz >= y->sz) { + carry = mpn_add(z->p, x->p, x->sz, y->p, y->sz); } - for (;isz; i++) { - c += y->p[i]; - z->p[i] = LOW(c); - c >>= DIG_SIZE; + else { + carry = mpn_add(z->p, y->p, y->sz, x->p, x->sz); } - z->p[y->sz] = (mp_limb)c; - trim(z); + + /* Store final carry */ + z->p[max_sz] = carry; } /* z = y - x, ignoring sign */ /* precondition: abs(y) >= abs(x) */ +/* Core subtraction algorithm for unsigned operands */ static void -usub(mrb_state *mrb, mpz_t *z, mpz_t *y, mpz_t *x) +usub(mpz_t *z, mpz_t *y, mpz_t *x) { - mpz_realloc(mrb, z, (size_t)(y->sz)); - mp_dbl_limb_signed b = 0; - size_t i; - for (i=0;isz;i++) { - b += (mp_dbl_limb_signed)y->p[i]; - b -= (mp_dbl_limb_signed)x->p[i]; - z->p[i] = LOW(b); - b = HIGH(b); - } - for (;isz; i++) { - b += y->p[i]; - z->p[i] = LOW(b); - b = HIGH(b); - } + /* y->sz >= x->sz is guaranteed by precondition */ + mpn_sub(z->p, y->p, y->sz, x->p, x->sz); + + /* Normalize result size */ z->sz = digits(z); } +/* Compare two same-length limb arrays: returns <0, 0, or >0 */ +static inline int +mpn_cmp(const mp_limb *ap, const mp_limb *bp, size_t n) +{ + while (n-- > 0) { + if (ap[n] != bp[n]) { + return (ap[n] > bp[n]) ? 1 : -1; + } + } + return 0; +} + /* compare abs(x) and abs(y) */ static int ucmp(mpz_t *y, mpz_t *x) @@ -201,22 +433,21 @@ ucmp(mpz_t *y, mpz_t *x) if (y->sz < x->sz) return -1; if (y->sz > x->sz) return 1; if (x->sz == 0) return 0; - for (size_t i=x->sz-1;; i--) { - mp_limb a = y->p[i]; - mp_limb b = x->p[i]; - if (a > b) return 1; - if (a < b) return -1; - if (i == 0) break; - } - return 0; + return mpn_cmp(y->p, x->p, x->sz); } +#define zero_p(x) ((x)->sn == 0) + +/* check if all digits are zero */ static int -uzero(mpz_t *x) +uzero_p(mpz_t *x) { - for (size_t i=0; i < x->sz; i++) + if (x->sz == 0) return 1; + for (size_t i=x->sz-1;; i--) { if (x->p[i] != 0) return 0; + if (i == 0) break; + } return 1; } @@ -235,752 +466,4922 @@ zero(mpz_t *x) /* z = x + y */ static void -mpz_add(mrb_state *mrb, mpz_t *zz, mpz_t *x, mpz_t *y) +mpz_add(mpz_ctx_t *ctx, mpz_t *zz, mpz_t *x, mpz_t *y) { - int mg; - mpz_t z; + if (zero_p(x)) { + mpz_set(ctx, zz, y); + trim(zz); + return; + } + if (zero_p(y)) { + mpz_set(ctx, zz, x); + trim(zz); + return; + } - if (x->sn == 0) { - mpz_set(mrb, zz, y); + /* Fast path: single-limb + multi-limb */ + if (y->sz == 1 && x->sz > 1) { + mp_limb y_limb = y->p[0]; + mpz_t z; + mpz_init_heap(ctx, &z, x->sz + 1); + + if ((x->sn > 0 && y->sn > 0) || (x->sn < 0 && y->sn < 0)) { + /* Same signs: addition */ + mp_dbl_limb carry = y_limb; + carry += x->p[0]; + z.p[0] = (mp_limb)carry; + carry >>= DIG_SIZE; + + /* Propagate carry through remaining limbs */ + for (size_t i = 1; i < x->sz; i++) { + carry += x->p[i]; + z.p[i] = (mp_limb)carry; + carry >>= DIG_SIZE; + } + z.p[x->sz] = (mp_limb)carry; + z.sn = x->sn; + } + else { + /* Different signs: subtraction */ + if (x->sz == 1 && y_limb == x->p[0]) { + /* Equal magnitude: result is zero */ + zero(&z); + } + else if (x->sz == 1 && x->p[0] > y_limb) { + /* |x| > |y|: result has sign of x */ + z.p[0] = x->p[0] - y_limb; + z.p[1] = 0; + z.sn = x->sn; + } + else { + /* |x| > |y|: subtract y from x */ + mp_dbl_limb borrow = y_limb; + if (x->p[0] >= borrow) { + z.p[0] = x->p[0] - (mp_limb)borrow; + borrow = 0; + } + else { + z.p[0] = (mp_limb)(((mp_dbl_limb)1 << DIG_SIZE) + x->p[0] - (mp_limb)borrow); + borrow = 1; + } + + /* Propagate borrow through remaining limbs */ + for (size_t i = 1; i < x->sz; i++) { + if (x->p[i] >= borrow) { + z.p[i] = x->p[i] - (mp_limb)borrow; + borrow = 0; + } + else { + z.p[i] = (mp_limb)(((mp_dbl_limb)1 << DIG_SIZE) + x->p[i] - (mp_limb)borrow); + borrow = 1; + } + } + z.sn = x->sn; + } + } + trim(&z); + mpz_move(ctx, zz, &z); return; } - if (y->sn == 0) { - mpz_set(mrb, zz, x); + + if (x->sz == 1 && y->sz > 1) { + /* Swap and use the same fast path */ + mpz_add(ctx, zz, y, x); return; } - mpz_init(mrb, &z); + + mpz_t z; + size_t estimated_size = ((x->sz > y->sz) ? x->sz : y->sz) + 1; + mpz_init_heap(ctx, &z, estimated_size); if (x->sn > 0 && y->sn > 0) { - uadd(mrb, &z, x, y); + uadd(&z, x, y); z.sn = 1; } else if (x->sn < 0 && y->sn < 0) { - uadd(mrb, &z, x, y); + uadd(&z, x, y); z.sn = -1; } else { + int mg; + /* signs differ */ if ((mg = ucmp(x,y)) == 0) { zero(&z); } else if (mg > 0) { /* abs(y) < abs(x) */ - usub(mrb, &z, x, y); + usub(&z, x, y); z.sn = (x->sn > 0 && y->sn < 0) ? 1 : (-1); } else { /* abs(y) > abs(x) */ - usub(mrb, &z, y, x); + usub(&z, y, x); z.sn = (x->sn < 0 && y->sn > 0) ? 1 : (-1); } } trim(&z); - mpz_move(mrb, zz, &z); + mpz_move(ctx, zz, &z); } -/* z = x - y -- just use mpz_add - I'm lazy */ +/* x += n */ +/* ignores sign of x */ +/* assumes n is positive and small (fits in mp_limb) */ static void -mpz_sub(mrb_state *mrb, mpz_t *z, mpz_t *x, mpz_t *y) +mpz_add_int(mpz_ctx_t *ctx, mpz_t *x, mrb_int n) { - mpz_t u; + // If n is zero, no operation is needed + if (n == 0) return; + + // Assume x is positive and n is a small positive integer + mp_dbl_limb carry = n; // Initialize carry with n + for (size_t i = 0; i < x->sz && carry; i++) { + carry += (mp_dbl_limb)x->p[i]; // Add current limb and carry + x->p[i] = LOW(carry); // Store lower 32 bits in current limb + carry = HIGH(carry); // Update carry with higher bits + } - mpz_init(mrb, &u); - mpz_set(mrb, &u, y); - u.sn = -(u.sn); - mpz_add(mrb, z, x, &u); - mpz_clear(mrb, &u); + if (carry != 0) { + mpz_realloc(ctx, x, x->sz + 1); + x->p[x->sz-1] = (mp_limb)carry; + x->sn = 1; + } + trim(x); } -/* x = y - n */ +/* z = x - y */ static void -mpz_sub_int(mrb_state *mrb, mpz_t *x, mpz_t *y, mrb_int n) +mpz_sub(mpz_ctx_t *ctx, mpz_t *z, mpz_t *x, mpz_t *y) { - mpz_t z; + /* In-place optimization: z == x, both positive, x >= y */ + if (z == x && x->sn > 0 && y->sn > 0 && ucmp(x, y) >= 0) { + mpn_sub(x->p, x->p, x->sz, y->p, y->sz); + x->sz = digits(x); + if (x->sz == 0) x->sn = 0; + return; + } - mpz_init_set_int(mrb, &z, n); - mpz_sub(mrb, x, y, &z); - mpz_clear(mrb, &z); + /* General case: create view of y with negated sign and use mpz_add */ + mpz_t u; + u.p = y->p; + u.sz = y->sz; + u.sn = -(y->sn); + mpz_add(ctx, z, x, &u); } -/* w = u * v */ +/* x -= n */ +/* ignores sign of x */ +/* assumes n is positive and small (fits in mp_limb) */ static void -mpz_mul(mrb_state *mrb, mpz_t *ww, mpz_t *u, mpz_t *v) +mpz_sub_int(mpz_ctx_t *ctx, mpz_t *x, mrb_int n) { - size_t i, j; - mpz_t w; + // If n is zero, no operation is needed + if (n == 0) return; - if (uzero(u) || uzero(v)) { - mpz_set_int(mrb, ww, 0); + // If x is zero, set x to n + if (zero_p(x) || x->sz == 0) { + mpz_set_int(ctx, x, n); return; } - mpz_init(mrb, &w); - mpz_realloc(mrb, &w, u->sz + v->sz); - for (j=0; j < u->sz; j++) { - mp_dbl_limb cc = (mp_limb)0; - mp_limb u0 = u->p[j]; - if (u0 == 0) continue; - for (i=0; i < v->sz; i++) { - mp_limb v0 = v->p[i]; - if (v0 == 0) continue; - cc += (mp_dbl_limb)w.p[i+j] + (mp_dbl_limb)u0 * (mp_dbl_limb)v0; - w.p[i+j] = LOW(cc); - cc = HIGH(cc); - } - if (cc) { - w.p[i+j] = (mp_limb)cc; - } + + // Initialize borrow and start decrement + mp_dbl_limb_signed borrow = (mp_limb)n; + size_t i = 0; + + // Subtract 1 from the least significant limb and propagate if necessary + borrow = (mp_dbl_limb_signed)x->p[i] - borrow; + x->p[i] = LOW(borrow); + borrow = (borrow < 0) ? 1 : 0; + + // Continue through limbs while there is a borrow + for (i = 1; i < x->sz && borrow; i++) { + borrow = (mp_dbl_limb_signed)x->p[i] - borrow; + x->p[i] = LOW(borrow); + borrow = (borrow < 0) ? 1 : 0; } - w.sn = u->sn * v->sn; - trim(&w); - mpz_move(mrb, ww, &w); + + // Trim any unnecessary leading zeros + trim(x); } -static void -mpz_mul_int(mrb_state *mrb, mpz_t *x, mpz_t *y, mrb_int n) +/* Multiply-and-add: rp[0..n-1] += s1p[0..n-1] * limb; return carry (high limb) */ +static inline mp_limb +mpn_addmul_1(mp_limb *rp, const mp_limb *s1p, size_t n, mp_limb limb) { - if (n == 0) { - zero(x); - return; - } +#if defined(__SIZEOF_INT128__) && (__SIZEOF_INT128__ == 16) + /* Use 128-bit arithmetic with 8x unrolling for maximum efficiency */ + unsigned __int128 acc = 0; + size_t i; - mpz_t z; + /* 8x unrolled loop for large operands */ + for (i = 0; i + 8 <= n; i += 8) { + acc += (unsigned __int128)rp[i] + (unsigned __int128)s1p[i] * (unsigned __int128)limb; + rp[i] = (mp_limb)acc; + acc >>= DIG_SIZE; - mpz_init_set_int(mrb, &z, n); - mpz_mul(mrb, x, y, &z); - mpz_clear(mrb, &z); -} + acc += (unsigned __int128)rp[i+1] + (unsigned __int128)s1p[i+1] * (unsigned __int128)limb; + rp[i+1] = (mp_limb)acc; + acc >>= DIG_SIZE; -/* number of leading zero bits in digit */ -static int -lzb(mp_limb x) -{ - if (x == 0) return 0; -#if (defined(__GNUC__) || __has_builtin(__builtin_clz)) - if (sizeof(mp_limb) == sizeof(int64_t)) - return __builtin_clzll(x); - else if (sizeof(mp_limb) == sizeof(int32_t)) - return __builtin_clz(x); -#endif + acc += (unsigned __int128)rp[i+2] + (unsigned __int128)s1p[i+2] * (unsigned __int128)limb; + rp[i+2] = (mp_limb)acc; + acc >>= DIG_SIZE; - int j=0; + acc += (unsigned __int128)rp[i+3] + (unsigned __int128)s1p[i+3] * (unsigned __int128)limb; + rp[i+3] = (mp_limb)acc; + acc >>= DIG_SIZE; - for (mp_limb i = ((mp_limb)1 << (DIG_SIZE-1)); i && !(x&i) ; j++,i>>=1) - ; - return j; -} + acc += (unsigned __int128)rp[i+4] + (unsigned __int128)s1p[i+4] * (unsigned __int128)limb; + rp[i+4] = (mp_limb)acc; + acc >>= DIG_SIZE; -/* c1 = a>>n */ -/* n must be < DIG_SIZE */ -static void -urshift(mrb_state *mrb, mpz_t *c1, mpz_t *a, size_t n) -{ - mrb_assert(n < DIG_SIZE); + acc += (unsigned __int128)rp[i+5] + (unsigned __int128)s1p[i+5] * (unsigned __int128)limb; + rp[i+5] = (mp_limb)acc; + acc >>= DIG_SIZE; + + acc += (unsigned __int128)rp[i+6] + (unsigned __int128)s1p[i+6] * (unsigned __int128)limb; + rp[i+6] = (mp_limb)acc; + acc >>= DIG_SIZE; - if (n == 0) - mpz_set(mrb, c1, a); - else if (uzero(a)) { - mpz_set_int(mrb, c1, 0); + acc += (unsigned __int128)rp[i+7] + (unsigned __int128)s1p[i+7] * (unsigned __int128)limb; + rp[i+7] = (mp_limb)acc; + acc >>= DIG_SIZE; } - else { - mpz_t c; - mp_limb cc = 0; - mp_dbl_limb rm = (((mp_dbl_limb)1<sz); - for (size_t i=a->sz-1;; i--) { - c.p[i] = ((a->p[i] >> n) | cc) & DIG_MASK; - cc = (a->p[i] & rm) << (DIG_SIZE - n); - if (i == 0) break; - } - trim(&c); - mpz_move(mrb, c1, &c); + /* 4x unrolled loop for medium operands */ + for (; i + 4 <= n; i += 4) { + acc += (unsigned __int128)rp[i] + (unsigned __int128)s1p[i] * (unsigned __int128)limb; + rp[i] = (mp_limb)acc; + acc >>= DIG_SIZE; + + acc += (unsigned __int128)rp[i+1] + (unsigned __int128)s1p[i+1] * (unsigned __int128)limb; + rp[i+1] = (mp_limb)acc; + acc >>= DIG_SIZE; + + acc += (unsigned __int128)rp[i+2] + (unsigned __int128)s1p[i+2] * (unsigned __int128)limb; + rp[i+2] = (mp_limb)acc; + acc >>= DIG_SIZE; + + acc += (unsigned __int128)rp[i+3] + (unsigned __int128)s1p[i+3] * (unsigned __int128)limb; + rp[i+3] = (mp_limb)acc; + acc >>= DIG_SIZE; } -} -/* c1 = a<>= DIG_SIZE; } - else { - mp_limb cc = 0; - mpz_t c; - mp_limb rm = (((mp_dbl_limb)1<sz+1); + return (mp_limb)acc; - size_t i; - for (i=0; isz; i++) { - c.p[i] = ((a->p[i] << n) | cc) & DIG_MASK; - cc = (a->p[i] & rm) >> (DIG_SIZE-n); - } - c.p[i] = cc; - trim(&c); - mpz_move(mrb, c1, &c); +#else + /* Portable double-limb path with 4x unrolling */ + mp_dbl_limb acc = 0; + size_t i; + + /* 4x unrolled loop for better performance */ + for (i = 0; i + 4 <= n; i += 4) { + acc += (mp_dbl_limb)rp[i] + (mp_dbl_limb)s1p[i] * (mp_dbl_limb)limb; + rp[i] = LOW(acc); + acc = HIGH(acc); + + acc += (mp_dbl_limb)rp[i+1] + (mp_dbl_limb)s1p[i+1] * (mp_dbl_limb)limb; + rp[i+1] = LOW(acc); + acc = HIGH(acc); + + acc += (mp_dbl_limb)rp[i+2] + (mp_dbl_limb)s1p[i+2] * (mp_dbl_limb)limb; + rp[i+2] = LOW(acc); + acc = HIGH(acc); + + acc += (mp_dbl_limb)rp[i+3] + (mp_dbl_limb)s1p[i+3] * (mp_dbl_limb)limb; + rp[i+3] = LOW(acc); + acc = HIGH(acc); + } + + /* Handle remaining elements */ + for (; i < n; i++) { + acc += (mp_dbl_limb)rp[i] + (mp_dbl_limb)s1p[i] * (mp_dbl_limb)limb; + rp[i] = LOW(acc); + acc = HIGH(acc); } + + return (mp_limb)acc; +#endif } -/* internal routine to compute x/y and x%y ignoring signs */ -/* qq = xx/yy; rr = xx%yy */ +/* w = u * v (optimized schoolbook using mpn_addmul_1) */ static void -udiv(mrb_state *mrb, mpz_t *qq, mpz_t *rr, mpz_t *xx, mpz_t *yy) +mpz_mul_basic(mpz_ctx_t *ctx, mpz_t *ww, mpz_t *u, mpz_t *v) { - /* simple cases */ - int cmp = ucmp(xx, yy); - if (cmp == 0) { - mpz_set_int(mrb, qq, 1); - zero(rr); + if (zero_p(u) || zero_p(v)) { + zero(ww); return; } - else if (cmp < 0) { - zero(qq); - mpz_set(mrb, rr, xx); - return; + + /* Ensure outer loop iterates over the shorter operand for better cache use */ + mpz_t *a, *b; + if (v->sz > u->sz) { + a = v; b = u; + } + else { + a = u; b = v; } - mpz_t q, x, y; - size_t i; + /* Fast path: single-limb * multi-limb */ + if (b->sz == 1) { + mp_limb scalar = b->p[0]; + mpz_t w; + mpz_init_heap(ctx, &w, a->sz + 1); + mpn_zero(w.p, a->sz + 1); - mrb_assert(!uzero(yy)); /* divided by zero */ - mpz_init(mrb, &q); - mpz_init(mrb, &x); - mpz_init(mrb, &y); - mpz_realloc(mrb, &x, xx->sz+1); - size_t yd = digits(yy); - size_t ns = lzb(yy->p[yd-1]); - ulshift(mrb, &x, xx, ns); - ulshift(mrb, &y, yy, ns); - size_t xd = digits(&x); - mpz_realloc(mrb, &q, xd); - mp_dbl_limb z = y.p[yd-1]; - for (size_t j=xd-yd;; j--) { - mp_dbl_limb_signed b=0; - mp_dbl_limb qhat; + mp_limb carry = mpn_addmul_1(w.p, a->p, a->sz, scalar); + w.p[a->sz] = carry; - if (j+yd == xd) - qhat = x.p[j+yd-1] / z; - else - qhat = (((mp_dbl_limb)x.p[j+yd] << DIG_SIZE) + x.p[j+yd-1]) / z; - if (qhat) { - for (i=0; isn * b->sn; + trim(&w); + mpz_move(ctx, ww, &w); + return; + } + + mpz_t w; + mpz_init_heap(ctx, &w, a->sz + b->sz); + mpn_zero(w.p, a->sz + b->sz); + + for (size_t j = 0; j < a->sz; j++) { + mp_limb a_limb = a->p[j]; + if (a_limb == 0) continue; + + mp_limb carry = mpn_addmul_1(w.p + j, b->p, b->sz, a_limb); + + /* Properly handle carry propagation to avoid overflow */ + size_t k = j + b->sz; + while (carry && k < a->sz + b->sz) { + mp_dbl_limb sum = (mp_dbl_limb)w.p[k] + (mp_dbl_limb)carry; + w.p[k] = LOW(sum); + carry = HIGH(sum); + k++; } - q.p[j] = (mp_limb)qhat; - if (j == 0) break; } - x.sz = yy->sz; - urshift(mrb, rr, &x, ns); - trim(&q); - mpz_move(mrb, qq, &q); - mpz_clear(mrb, &x); - mpz_clear(mrb, &y); + + w.sn = a->sn * b->sn; + trim(&w); + mpz_move(ctx, ww, &w); } +/* Allocation-free multiplication helper functions */ + +/* Copy limbs forward: dest[0..n-1] = src[0..n-1] */ static void -mpz_mdiv(mrb_state *mrb, mpz_t *q, mpz_t *x, mpz_t *y) +mpn_copyi(mp_limb *dest, const mp_limb *src, size_t n) { - mpz_t r; - short sn1 = x->sn, sn2 = y->sn, qsign; + if (n > 0) { + memcpy(dest, src, n * sizeof(mp_limb)); + } +} - if (uzero(x)) { - mpz_init_set_int(mrb, q, 0); - return; +/* Add limbs at offset: dest[offset..offset+n-1] += src[0..n-1] */ +static void +limb_add_at(mp_limb *dest, size_t dest_len, const mp_limb *src, size_t n, size_t offset) +{ + mp_limb carry = 0; + size_t i = 0; + for (i = 0; i < n; i++) { + mp_dbl_limb sum = (mp_dbl_limb)dest[offset + i] + (mp_dbl_limb)src[i] + carry; + dest[offset + i] = LOW(sum); + carry = HIGH(sum); + } + /* Propagate final carry */ + i = offset + n; + while (carry && i < dest_len) { + mp_dbl_limb sum = (mp_dbl_limb)dest[i] + carry; + dest[i] = LOW(sum); + carry = HIGH(sum); + i++; } - mpz_init(mrb, &r); - udiv(mrb, q, &r, x, y); - qsign = q->sn = sn1*sn2; - if (uzero(q)) - q->sn = 0; - /* now if r != 0 and q < 0 we need to round q towards -inf */ - if (!uzero(&r) && qsign < 0) - mpz_sub_int(mrb, q, q, 1); - mpz_clear(mrb, &r); } +/* Basic multiplication for small operands */ static void -mpz_mmod(mrb_state *mrb, mpz_t *r, mpz_t *x, mpz_t *y) +mpz_mul_basic_limbs(mp_limb *result, const mp_limb *x, size_t x_len, + const mp_limb *y, size_t y_len) { - mpz_t q; - short sn1 = x->sn, sn2 = y->sn, sn3; + mpn_zero(result, x_len + y_len); - mpz_init(mrb, &q); - if (sn1 == 0) { - zero(r); - return; + for (size_t i = 0; i < x_len; i++) { + if (x[i] == 0) continue; + mp_limb carry = mpn_addmul_1(result + i, y, y_len, x[i]); + if (i + y_len < x_len + y_len) { + result[i + y_len] += carry; + } } - udiv(mrb, &q, r, x, y); - mpz_clear(mrb, &q); - if (uzero(r)) { +} + +/* + * Schoolbook squaring - exploits symmetry for ~1.5x speedup. + * + * For x = [x0, x1, x2, ...], x^2 has terms: + * - Diagonal: xi^2 (computed once) + * - Off-diagonal: 2*xi*xj for i> DIG_SIZE) + HIGH(sq); + + for (size_t k = 2*i + 1; acc && k < result_len; k++) { + acc += result[k]; + result[k] = LOW(acc); + acc >>= DIG_SIZE; + } + } +} + +/* + * Karatsuba Multiplication + * + * Splits inputs into 2 parts: A = A1*B^half + A0, B = B1*B^half + B0 + * Computes: z0 = A0*B0, z2 = A1*B1, z1 = (A0+A1)*(B0+B1) - z0 - z2 + * Result: z2*B^(2*half) + z1*B^half + z0 + * + * Complexity: O(n^1.585) - trades 4 multiplications for 3 plus additions + */ + +#define KARATSUBA_THRESHOLD 32 + +static inline mrb_bool +should_use_karatsuba(size_t n) +{ + return n >= KARATSUBA_THRESHOLD; +} + +/* Calculate scratch space needed for Karatsuba */ +static size_t +karatsuba_scratch_size(size_t n) +{ + if (n < KARATSUBA_THRESHOLD) { + return 0; + } + + size_t half = (n + 1) / 2; + + /* + * Per level storage: + * - 2 evaluation results: (A0+A1), (B0+B1), each up to (half+1) limbs + * - 3 products: z0, z1, z2, each up to 2*(half+1) limbs + */ + size_t eval_len = half + 1; + size_t prod_len = 2 * eval_len; + + size_t eval_size = 2 * eval_len; /* 2 evaluation temps */ + size_t prod_size = 3 * prod_len; /* 3 products */ + size_t current_level = eval_size + prod_size; + + /* Recursive scratch - sequential calls reuse same buffer */ + size_t sub_scratch = karatsuba_scratch_size(eval_len); + + return current_level + sub_scratch + 8; /* +8 safety margin */ +} + +/* Forward declaration for recursive calls */ +static void +mpz_mul_karatsuba_limbs(mp_limb *result, + const mp_limb *x, size_t x_len, + const mp_limb *y, size_t y_len, + mp_limb *scratch); + +/* + * Karatsuba multiplication on raw limb arrays. + * + * result must have space for x_len + y_len limbs. + * scratch must have karatsuba_scratch_size(max(x_len, y_len)) limbs. + */ +static void +mpz_mul_karatsuba_limbs(mp_limb *result, + const mp_limb *x, size_t x_len, + const mp_limb *y, size_t y_len, + mp_limb *scratch) +{ + size_t min_len = (x_len < y_len) ? x_len : y_len; + size_t max_len = (x_len > y_len) ? x_len : y_len; + + /* Base case - use schoolbook */ + if (!should_use_karatsuba(min_len)) { + mpz_mul_basic_limbs(result, x, x_len, y, y_len); + return; + } + + /* + * Split: x = x1*B^half + x0, y = y1*B^half + y0 + * where B = base^half + */ + size_t half = (max_len + 1) / 2; + + /* Determine actual lengths of each part */ + size_t x0_len = (x_len > half) ? half : x_len; + size_t x1_len = (x_len > half) ? x_len - half : 0; + size_t y0_len = (y_len > half) ? half : y_len; + size_t y1_len = (y_len > half) ? y_len - half : 0; + + const mp_limb *x0 = x; + const mp_limb *x1 = x + half; + const mp_limb *y0 = y; + const mp_limb *y1 = y + half; + + /* Allocate scratch space */ + size_t eval_len = half + 1; + size_t prod_len = 2 * eval_len; + + size_t offset = 0; + mp_limb *sum_x = scratch + offset; offset += eval_len; /* x0 + x1 */ + mp_limb *sum_y = scratch + offset; offset += eval_len; /* y0 + y1 */ + mp_limb *z0 = scratch + offset; offset += prod_len; /* x0 * y0 */ + mp_limb *z2 = scratch + offset; offset += prod_len; /* x1 * y1 */ + mp_limb *z1 = scratch + offset; offset += prod_len; /* (x0+x1)*(y0+y1) */ + mp_limb *recursive_scratch = scratch + offset; + + /* Compute sum_x = x0 + x1 */ + mpn_zero(sum_x, eval_len); + mpn_copyi(sum_x, x0, x0_len); + if (x1_len > 0) { + mpn_add(sum_x, sum_x, eval_len, x1, x1_len); + } + size_t sum_x_len = eval_len; + while (sum_x_len > 1 && sum_x[sum_x_len - 1] == 0) sum_x_len--; + + /* Compute sum_y = y0 + y1 */ + mpn_zero(sum_y, eval_len); + mpn_copyi(sum_y, y0, y0_len); + if (y1_len > 0) { + mpn_add(sum_y, sum_y, eval_len, y1, y1_len); + } + size_t sum_y_len = eval_len; + while (sum_y_len > 1 && sum_y[sum_y_len - 1] == 0) sum_y_len--; + + /* z0 = x0 * y0 */ + mpn_zero(z0, prod_len); + if (x0_len > 0 && y0_len > 0) { + mpz_mul_karatsuba_limbs(z0, x0, x0_len, y0, y0_len, recursive_scratch); + } + + /* z2 = x1 * y1 */ + mpn_zero(z2, prod_len); + if (x1_len > 0 && y1_len > 0) { + mpz_mul_karatsuba_limbs(z2, x1, x1_len, y1, y1_len, recursive_scratch); + } + + /* z1 = (x0 + x1) * (y0 + y1) */ + mpn_zero(z1, prod_len); + mpz_mul_karatsuba_limbs(z1, sum_x, sum_x_len, sum_y, sum_y_len, recursive_scratch); + + /* z1 = z1 - z0 - z2 */ + mpn_sub(z1, z1, prod_len, z0, prod_len); + mpn_sub(z1, z1, prod_len, z2, prod_len); + + /* + * Combine: result = z2*B^(2*half) + z1*B^half + z0 + */ + size_t result_len = x_len + y_len; + mpn_zero(result, result_len); + + /* Add z0 at position 0 */ + size_t z0_actual_len = prod_len; + while (z0_actual_len > 0 && z0[z0_actual_len - 1] == 0) z0_actual_len--; + if (z0_actual_len > 0) { + mpn_copyi(result, z0, z0_actual_len); + } + + /* Add z1 at position half */ + size_t z1_actual_len = prod_len; + while (z1_actual_len > 0 && z1[z1_actual_len - 1] == 0) z1_actual_len--; + if (z1_actual_len > 0) { + limb_add_at(result, result_len, z1, z1_actual_len, half); + } + + /* Add z2 at position 2*half */ + size_t z2_actual_len = prod_len; + while (z2_actual_len > 0 && z2[z2_actual_len - 1] == 0) z2_actual_len--; + if (z2_actual_len > 0 && 2 * half < result_len) { + limb_add_at(result, result_len, z2, z2_actual_len, 2 * half); + } +} + +/* + * Toom-3 (Toom-Cook 3-way) Multiplication + * + * Splits inputs into 3 parts and evaluates at 5 points: 0, 1, -1, 2, ∞ + * Complexity: O(n^1.465) vs Karatsuba's O(n^1.585) + * + * For A = a2*B^2 + a1*B + a0 and similarly B: + * - Evaluate polynomials at 5 points + * - Multiply at each point (5 recursive calls) + * - Interpolate to recover result coefficients + */ + +#define TOOM3_THRESHOLD 100 + +static inline mrb_bool +should_use_toom3(size_t n) +{ + return n >= TOOM3_THRESHOLD; +} + +/* Calculate scratch space needed for Toom-3 (including Karatsuba at base) */ +static size_t +toom3_scratch_size(size_t n) +{ + if (!should_use_toom3(n)) { + /* For Karatsuba range, return Karatsuba scratch size */ + if (should_use_karatsuba(n)) { + return karatsuba_scratch_size(n); + } + return 0; + } + + size_t third = n / 3; + + /* + * Per level storage: + * - 6 evaluation results: v1_x, v1_y, vm1_x, vm1_y, v2_x, v2_y + * Each up to (third + 3) limbs for carries + * - 5 product results: w0, w1, wm1, w2, winf + * Each up to 2*(third + 3) + 16 limbs (recursive Toom-3 needs extra margin) + * - 4 interpolation temps: t4, t5, t6, r2_tmp + * Each up to 2*(third + 3) + 16 limbs (in recursive_scratch area) + */ + size_t eval_len = third + 3; + size_t prod_len = 2 * eval_len + 16; + + size_t eval_size = 6 * eval_len; /* 6 evaluation temps */ + size_t prod_size = 5 * prod_len; /* 5 products */ + size_t interp_size = 4 * prod_len; /* 4 interpolation temps */ + size_t current_level = eval_size + prod_size; + + /* Recursive scratch (sequential calls, reuse same buffer) */ + /* Must be at least interp_size for interpolation temps */ + size_t sub_n = eval_len; /* largest sub-multiplication size */ + size_t sub_scratch = toom3_scratch_size(sub_n); + if (sub_scratch < interp_size) { + sub_scratch = interp_size; + } + + return current_level + sub_scratch + 8; /* +8 safety margin */ +} + +/* + * Divide limb array by 2 (right shift by 1 bit). + * Returns the shifted-out bit (0 or 1). + */ +static mp_limb +mpn_rshift1(mp_limb *rp, const mp_limb *ap, size_t n) +{ + mp_limb carry = 0; + for (size_t i = n; i > 0; i--) { + mp_limb a = ap[i-1]; + rp[i-1] = (a >> 1) | (carry << (DIG_SIZE - 1)); + carry = a & 1; + } + return carry; +} + +/* + * Divide limb array by 3 (exact division). + * Precondition: the value is divisible by 3. + */ +static void +mpn_divexact_3(mp_limb *rp, const mp_limb *ap, size_t n) +{ + /* + * Division by 3 using multiplicative inverse. + * For mod 2^32: inverse of 3 is 0xAAAAAAAB + * For mod 2^64: inverse of 3 is 0xAAAAAAAAAAAAAAAB + * x / 3 = x * inverse (mod 2^bits), with borrow propagation + */ +#if DIG_SIZE == 32 + const mp_limb inv3 = 0xAAAAAAABUL; +#else + const mp_limb inv3 = 0xAAAAAAAAAAAAAAABULL; +#endif + + mp_limb borrow = 0; + for (size_t i = 0; i < n; i++) { + mp_limb old_a = ap[i]; /* save before potential in-place overwrite */ + mp_limb a = old_a - borrow; + mp_limb q = a * inv3; + rp[i] = q; + /* borrow = (q * 3 > a) ? ceil((q*3 - a) / 2^DIG_SIZE) : 0 */ + /* Simplified: borrow for next iteration */ + mp_dbl_limb prod = (mp_dbl_limb)q * 3; + borrow = (mp_limb)(prod >> DIG_SIZE); + if ((mp_limb)prod > old_a) borrow++; /* use saved value for in-place safety */ + } +} + +/* + * Add with carry, handling different sizes. + * rp[0..rn-1] = ap[0..an-1] + bp[0..bn-1] + * rn must be >= max(an, bn) + * Returns final carry. + */ +static mp_limb +mpn_add_var(mp_limb *rp, const mp_limb *ap, size_t an, + const mp_limb *bp, size_t bn, size_t rn) +{ + mp_limb carry = 0; + size_t i; + + /* Add common part */ + size_t min_n = (an < bn) ? an : bn; + for (i = 0; i < min_n; i++) { + mp_dbl_limb sum = (mp_dbl_limb)ap[i] + bp[i] + carry; + rp[i] = LOW(sum); + carry = HIGH(sum); + } + + /* Copy and propagate carry through longer operand */ + if (an > bn) { + for (; i < an; i++) { + mp_dbl_limb sum = (mp_dbl_limb)ap[i] + carry; + rp[i] = LOW(sum); + carry = HIGH(sum); + } + } + else { + for (; i < bn; i++) { + mp_dbl_limb sum = (mp_dbl_limb)bp[i] + carry; + rp[i] = LOW(sum); + carry = HIGH(sum); + } + } + + /* Zero-fill remainder and propagate final carry */ + for (; i < rn; i++) { + rp[i] = carry; + carry = 0; + } + + return carry; +} + +/* + * Subtract with borrow, result may be negative. + * rp[0..n-1] = ap[0..n-1] - bp[0..n-1] + * Returns 1 if result is negative (borrow out), 0 otherwise. + */ +static mp_limb +mpn_sub_var(mp_limb *rp, const mp_limb *ap, size_t an, + const mp_limb *bp, size_t bn, size_t n) +{ + mp_dbl_limb_signed borrow = 0; + size_t i; + + for (i = 0; i < n; i++) { + mp_limb a = (i < an) ? ap[i] : 0; + mp_limb b = (i < bn) ? bp[i] : 0; + borrow += (mp_dbl_limb_signed)a - (mp_dbl_limb_signed)b; + rp[i] = LOW(borrow); + borrow = HIGH(borrow); + } + + return (borrow < 0) ? 1 : 0; +} + +/* + * Negate a limb array (two's complement). + * rp[0..n-1] = -ap[0..n-1] + */ +static void +mpn_neg(mp_limb *rp, const mp_limb *ap, size_t n) +{ + mp_limb carry = 1; + for (size_t i = 0; i < n; i++) { + mp_dbl_limb sum = (mp_dbl_limb)(~ap[i]) + carry; + rp[i] = LOW(sum); + carry = HIGH(sum); + } +} + +/* Pool-aware Toom-3 multiplication */ +static void +mpz_mul_toom3(mpz_ctx_t *ctx, mp_limb *result, + const mp_limb *x, size_t x_len, + const mp_limb *y, size_t y_len, + mp_limb *scratch) +{ + /* + * Base case - use Karatsuba or schoolbook. + * Toom-3 requires both operands to be large enough to avoid + * buffer overflow when writing at offset 4*third. + */ + size_t min_len = (x_len < y_len) ? x_len : y_len; + size_t n = (x_len > y_len) ? x_len : y_len; + if (!should_use_toom3(min_len)) { + if (should_use_karatsuba(min_len)) { + mpz_mul_karatsuba_limbs(result, x, x_len, y, y_len, scratch); + } + else { + mpz_mul_basic_limbs(result, x, x_len, y, y_len); + } + return; + } + + /* + * Split: x = x2*B^2 + x1*B + x0, y = y2*B^2 + y1*B + y0 + * where B = base^third + */ + size_t third = n / 3; + size_t x0_len = (x_len > third) ? third : x_len; + size_t x1_len = (x_len > 2*third) ? third : ((x_len > third) ? x_len - third : 0); + size_t x2_len = (x_len > 2*third) ? x_len - 2*third : 0; + size_t y0_len = (y_len > third) ? third : y_len; + size_t y1_len = (y_len > 2*third) ? third : ((y_len > third) ? y_len - third : 0); + size_t y2_len = (y_len > 2*third) ? y_len - 2*third : 0; + + const mp_limb *x0 = x; + const mp_limb *x1 = x + third; + const mp_limb *x2 = x + 2*third; + const mp_limb *y0 = y; + const mp_limb *y1 = y + third; + const mp_limb *y2 = y + 2*third; + + /* Allocate scratch space */ + size_t eval_len = third + 3; /* max size after evaluation with carries */ + /* + * Product buffers need extra space because recursive Toom-3 calls + * write x_len + y_len + 16 limbs. With x_len, y_len <= eval_len, + * maximum is 2*eval_len + 16. + */ + size_t prod_len = 2 * eval_len + 16; + + size_t offset = 0; + mp_limb *v1_x = scratch + offset; offset += eval_len; + mp_limb *v1_y = scratch + offset; offset += eval_len; + mp_limb *vm1_x = scratch + offset; offset += eval_len; + mp_limb *vm1_y = scratch + offset; offset += eval_len; + mp_limb *v2_x = scratch + offset; offset += eval_len; + mp_limb *v2_y = scratch + offset; offset += eval_len; + + mp_limb *w0 = scratch + offset; offset += prod_len; + mp_limb *w1 = scratch + offset; offset += prod_len; + mp_limb *wm1 = scratch + offset; offset += prod_len; + mp_limb *w2 = scratch + offset; offset += prod_len; + mp_limb *winf = scratch + offset; offset += prod_len; + + mp_limb *recursive_scratch = scratch + offset; + + /* + * Evaluation at 5 points: + * v0 = x0, y0 (reuse input) + * v1 = x0 + x1 + x2, y0 + y1 + y2 + * vm1 = x0 - x1 + x2, y0 - y1 + y2 + * v2 = x0 + 2*x1 + 4*x2, y0 + 2*y1 + 4*y2 + * vinf = x2, y2 (reuse input) + */ + + /* v1 = x0 + x1 + x2 */ + mpn_zero(v1_x, eval_len); + mpn_copyi(v1_x, x0, x0_len); + if (x1_len > 0) mpn_add(v1_x, v1_x, eval_len, x1, x1_len); + if (x2_len > 0) mpn_add(v1_x, v1_x, eval_len, x2, x2_len); + size_t v1_x_len = eval_len; + while (v1_x_len > 0 && v1_x[v1_x_len-1] == 0) v1_x_len--; + if (v1_x_len == 0) v1_x_len = 1; + + mpn_zero(v1_y, eval_len); + mpn_copyi(v1_y, y0, y0_len); + if (y1_len > 0) mpn_add(v1_y, v1_y, eval_len, y1, y1_len); + if (y2_len > 0) mpn_add(v1_y, v1_y, eval_len, y2, y2_len); + size_t v1_y_len = eval_len; + while (v1_y_len > 0 && v1_y[v1_y_len-1] == 0) v1_y_len--; + if (v1_y_len == 0) v1_y_len = 1; + + /* vm1 = x0 - x1 + x2 (may be negative, track sign) */ + mp_limb vm1_x_neg = 0, vm1_y_neg = 0; + { + /* t = x0 + x2 */ + mpn_zero(vm1_x, eval_len); + mpn_copyi(vm1_x, x0, x0_len); + if (x2_len > 0) mpn_add(vm1_x, vm1_x, eval_len, x2, x2_len); + /* vm1_x = t - x1 */ + if (x1_len > 0) { + vm1_x_neg = mpn_sub_var(vm1_x, vm1_x, eval_len, x1, x1_len, eval_len); + if (vm1_x_neg) mpn_neg(vm1_x, vm1_x, eval_len); + } + } + size_t vm1_x_len = eval_len; + while (vm1_x_len > 0 && vm1_x[vm1_x_len-1] == 0) vm1_x_len--; + if (vm1_x_len == 0) vm1_x_len = 1; + + { + mpn_zero(vm1_y, eval_len); + mpn_copyi(vm1_y, y0, y0_len); + if (y2_len > 0) mpn_add(vm1_y, vm1_y, eval_len, y2, y2_len); + if (y1_len > 0) { + vm1_y_neg = mpn_sub_var(vm1_y, vm1_y, eval_len, y1, y1_len, eval_len); + if (vm1_y_neg) mpn_neg(vm1_y, vm1_y, eval_len); + } + } + size_t vm1_y_len = eval_len; + while (vm1_y_len > 0 && vm1_y[vm1_y_len-1] == 0) vm1_y_len--; + if (vm1_y_len == 0) vm1_y_len = 1; + + /* v2 = x0 + 2*x1 + 4*x2 */ + { + mpn_zero(v2_x, eval_len); + mpn_copyi(v2_x, x0, x0_len); + /* Add 2*x1 */ + if (x1_len > 0) { + mp_limb carry = 0; + for (size_t i = 0; i < x1_len; i++) { + mp_dbl_limb val = (mp_dbl_limb)v2_x[i] + ((mp_dbl_limb)x1[i] << 1) + carry; + v2_x[i] = LOW(val); + carry = HIGH(val); + } + for (size_t i = x1_len; carry && i < eval_len; i++) { + mp_dbl_limb val = (mp_dbl_limb)v2_x[i] + carry; + v2_x[i] = LOW(val); + carry = HIGH(val); + } + } + /* Add 4*x2 */ + if (x2_len > 0) { + mp_limb carry = 0; + for (size_t i = 0; i < x2_len; i++) { + mp_dbl_limb val = (mp_dbl_limb)v2_x[i] + ((mp_dbl_limb)x2[i] << 2) + carry; + v2_x[i] = LOW(val); + carry = HIGH(val); + } + for (size_t i = x2_len; carry && i < eval_len; i++) { + mp_dbl_limb val = (mp_dbl_limb)v2_x[i] + carry; + v2_x[i] = LOW(val); + carry = HIGH(val); + } + } + } + size_t v2_x_len = eval_len; + while (v2_x_len > 0 && v2_x[v2_x_len-1] == 0) v2_x_len--; + if (v2_x_len == 0) v2_x_len = 1; + + { + mpn_zero(v2_y, eval_len); + mpn_copyi(v2_y, y0, y0_len); + if (y1_len > 0) { + mp_limb carry = 0; + for (size_t i = 0; i < y1_len; i++) { + mp_dbl_limb val = (mp_dbl_limb)v2_y[i] + ((mp_dbl_limb)y1[i] << 1) + carry; + v2_y[i] = LOW(val); + carry = HIGH(val); + } + for (size_t i = y1_len; carry && i < eval_len; i++) { + mp_dbl_limb val = (mp_dbl_limb)v2_y[i] + carry; + v2_y[i] = LOW(val); + carry = HIGH(val); + } + } + if (y2_len > 0) { + mp_limb carry = 0; + for (size_t i = 0; i < y2_len; i++) { + mp_dbl_limb val = (mp_dbl_limb)v2_y[i] + ((mp_dbl_limb)y2[i] << 2) + carry; + v2_y[i] = LOW(val); + carry = HIGH(val); + } + for (size_t i = y2_len; carry && i < eval_len; i++) { + mp_dbl_limb val = (mp_dbl_limb)v2_y[i] + carry; + v2_y[i] = LOW(val); + carry = HIGH(val); + } + } + } + size_t v2_y_len = eval_len; + while (v2_y_len > 0 && v2_y[v2_y_len-1] == 0) v2_y_len--; + if (v2_y_len == 0) v2_y_len = 1; + + /* + * Pointwise multiplication (5 recursive calls) + */ + mpn_zero(w0, prod_len); + mpn_zero(w1, prod_len); + mpn_zero(wm1, prod_len); + mpn_zero(w2, prod_len); + mpn_zero(winf, prod_len); + + /* w0 = v0_x * v0_y = x0 * y0 */ + mpz_mul_toom3(ctx, w0, x0, x0_len, y0, y0_len, recursive_scratch); + + /* w1 = v1_x * v1_y */ + mpz_mul_toom3(ctx, w1, v1_x, v1_x_len, v1_y, v1_y_len, recursive_scratch); + + /* wm1 = vm1_x * vm1_y (sign = vm1_x_neg XOR vm1_y_neg) */ + mp_limb wm1_neg = vm1_x_neg ^ vm1_y_neg; + mpz_mul_toom3(ctx, wm1, vm1_x, vm1_x_len, vm1_y, vm1_y_len, recursive_scratch); + + /* w2 = v2_x * v2_y */ + mpz_mul_toom3(ctx, w2, v2_x, v2_x_len, v2_y, v2_y_len, recursive_scratch); + + /* winf = vinf_x * vinf_y = x2 * y2 */ + if (x2_len > 0 && y2_len > 0) { + mpz_mul_toom3(ctx, winf, x2, x2_len, y2, y2_len, recursive_scratch); + } + + /* + * Interpolation to recover r0, r1, r2, r3, r4 where: + * result = r0 + r1*B + r2*B^2 + r3*B^3 + r4*B^4 + * + * Using the sequence: + * 1. r0 = w0 + * 2. r4 = winf + * 3. t1 = w1 - r0 - r4 (= r1 + r2 + r3) + * 4. t2 = wm1 - r0 - r4 (= -r1 + r2 - r3, may need sign adjustment) + * 5. r2 = (t1 + t2) / 2 + * 6. t3 = w2 - r0 - 16*r4 (= 2r1 + 4r2 + 8r3) + * 7. t4 = t3 / 2 (= r1 + 2r2 + 4r3) + * 8. t5 = (t1 - t2) / 2 (= r1 + r3) + * 9. t6 = t4 - 2*r2 (= r1 + 4r3) + * 10. r3 = (t6 - t5) / 3 + * 11. r1 = t5 - r3 + */ + + /* + * Reuse product buffers for interpolation (prod_len sized): + * - t1 reuses w1 (we compute t1 = w1 - ...) + * - t2 reuses wm1 (we compute t2 = wm1 - ...) + * - t3 reuses w2 (we compute t3 = w2 - ...) + * - t4, t5, t6 need separate space (use recursive_scratch area) + * We keep w0 and winf for final assembly. + */ + size_t w_len = prod_len; + + mp_limb *t1 = w1; /* reuse w1 */ + mp_limb *t2 = wm1; /* reuse wm1 */ + mp_limb *t3 = w2; /* reuse w2 */ + /* t4, t5, t6, r2_tmp use recursive_scratch area */ + mp_limb *t4 = recursive_scratch; + mp_limb *t5 = recursive_scratch + w_len; + mp_limb *t6 = recursive_scratch + 2 * w_len; + mp_limb *r2_tmp = recursive_scratch + 3 * w_len; + + /* t1 = w1 - w0 - winf (computed in-place in w1) */ + mpn_sub(t1, t1, w_len, w0, w_len); + mpn_sub(t1, t1, w_len, winf, w_len); + + /* t2 = wm1 - w0 - winf (with sign handling, computed in-place in wm1) */ + if (wm1_neg) { + /* t2 = -wm1 - w0 - winf = -(wm1 + w0 + winf) */ + mpn_add(t2, t2, w_len, w0, w_len); + mpn_add(t2, t2, w_len, winf, w_len); + mpn_neg(t2, t2, w_len); + } + else { + mpn_sub(t2, t2, w_len, w0, w_len); + mpn_sub(t2, t2, w_len, winf, w_len); + } + + /* r2 = (t1 + t2) / 2 */ + mpn_add_var(r2_tmp, t1, w_len, t2, w_len, w_len); + mpn_rshift1(r2_tmp, r2_tmp, w_len); + + /* t3 = w2 - w0 - 16*winf (computed in-place in w2) */ + mpn_sub(t3, t3, w_len, w0, w_len); + /* Subtract 16*winf */ + { + mp_limb borrow = 0; + for (size_t i = 0; i < w_len; i++) { + mp_dbl_limb_signed diff = (mp_dbl_limb_signed)t3[i] + - ((mp_dbl_limb_signed)winf[i] << 4) - borrow; + t3[i] = LOW(diff); + borrow = (diff < 0) ? (mp_limb)(-(diff >> DIG_SIZE)) : 0; + } + } + + /* t4 = t3 / 2 */ + mpn_rshift1(t4, t3, w_len); + + /* t5 = (t1 - t2) / 2 */ + mpn_sub_var(t5, t1, w_len, t2, w_len, w_len); + mpn_rshift1(t5, t5, w_len); + + /* t6 = t4 - 2*r2 */ + { + mp_limb borrow = 0; + for (size_t i = 0; i < w_len; i++) { + mp_dbl_limb_signed diff = (mp_dbl_limb_signed)t4[i] + - ((mp_dbl_limb_signed)r2_tmp[i] << 1) - borrow; + t6[i] = LOW(diff); + borrow = (diff < 0) ? (mp_limb)(-(diff >> DIG_SIZE)) : 0; + } + } + + /* r3 = (t6 - t5) / 3 */ + mp_limb *r3 = t4; /* reuse t4 */ + mpn_sub(r3, t6, w_len, t5, w_len); + mpn_divexact_3(r3, r3, w_len); + + /* r1 = t5 - r3 */ + mp_limb *r1 = t5; /* in-place */ + mpn_sub(r1, t5, w_len, r3, w_len); + + /* + * Final assembly: result = r0 + r1*B + r2*B^2 + r3*B^3 + r4*B^4 + * where B = base^third + * + * Maximum write position is 4*third + w_len. + * With w_len = 2*(third+3) + 16 = 2*n/3 + 22 (for recursion margin), + * max position = 4*n/3 + 2*n/3 + 22 = 2*n + 22. + * Use 2*n (not x_len + y_len) because third is based on n = max. + */ + size_t result_len = 2 * n + 24; + mpn_zero(result, result_len); + + /* r0 at offset 0 */ + mpn_copyi(result, w0, (2*third < result_len) ? 2*third : result_len); + + /* r1 at offset third */ + limb_add_at(result, result_len, r1, w_len, third); + + /* r2 at offset 2*third */ + limb_add_at(result, result_len, r2_tmp, w_len, 2*third); + + /* r3 at offset 3*third */ + limb_add_at(result, result_len, r3, w_len, 3*third); + + /* r4 at offset 4*third */ + limb_add_at(result, result_len, winf, w_len, 4*third); +} + +/* + * Toom-3 squaring scratch size (smaller than multiplication since no y evaluation). + */ +static size_t +toom3_sqr_scratch_size(size_t n) +{ + if (!should_use_toom3(n)) { + return 0; + } + + size_t third = n / 3; + + /* + * Per level storage for squaring: + * - 3 evaluation results: v1_x, vm1_x, v2_x (no y needed) + * Each up to (third + 3) limbs for carries + * - 5 product results: w0, w1, wm1, w2, winf + * Each up to 2*(third + 3) + 16 limbs + * - 4 interpolation temps: t4, t5, t6, r2_tmp + * Each up to 2*(third + 3) + 16 limbs + */ + size_t eval_len = third + 3; + size_t prod_len = 2 * eval_len + 16; + size_t eval_space = 3 * eval_len; /* v1_x, vm1_x, v2_x */ + size_t prod_space = 5 * prod_len; /* w0, w1, wm1, w2, winf */ + size_t current_level = eval_space + prod_space; + + /* Interpolation temps (4 * prod_len) are reused with recursive scratch */ + size_t interp_size = 4 * prod_len; + + /* Recursively calculate sub-squaring scratch size */ + size_t sub_n = eval_len; + size_t sub_scratch = toom3_sqr_scratch_size(sub_n); + if (sub_scratch < interp_size) { + sub_scratch = interp_size; + } + + return current_level + sub_scratch + 8; +} + +/* Toom-3 squaring: optimized squaring for large numbers */ +static void +mpz_sqr_toom3(mpz_ctx_t *ctx, mp_limb *result, + const mp_limb *x, size_t x_len, + mp_limb *scratch) +{ + /* + * Base case - use schoolbook squaring. + */ + if (!should_use_toom3(x_len)) { + mpz_sqr_basic_limbs(result, x, x_len); + return; + } + + /* + * Split: x = x2*B^2 + x1*B + x0 + * where B = base^third + */ + size_t n = x_len; + size_t third = n / 3; + size_t x0_len = (x_len > third) ? third : x_len; + size_t x1_len = (x_len > 2*third) ? third : ((x_len > third) ? x_len - third : 0); + size_t x2_len = (x_len > 2*third) ? x_len - 2*third : 0; + + const mp_limb *x0 = x; + const mp_limb *x1 = x + third; + const mp_limb *x2 = x + 2*third; + + /* Allocate scratch space (fewer buffers than multiplication) */ + size_t eval_len = third + 3; + size_t prod_len = 2 * eval_len + 16; + + size_t offset = 0; + mp_limb *v1_x = scratch + offset; offset += eval_len; + mp_limb *vm1_x = scratch + offset; offset += eval_len; + mp_limb *v2_x = scratch + offset; offset += eval_len; + + mp_limb *w0 = scratch + offset; offset += prod_len; + mp_limb *w1 = scratch + offset; offset += prod_len; + mp_limb *wm1 = scratch + offset; offset += prod_len; + mp_limb *w2 = scratch + offset; offset += prod_len; + mp_limb *winf = scratch + offset; offset += prod_len; + + mp_limb *recursive_scratch = scratch + offset; + + /* + * Evaluation at 5 points (only x, no y): + * v0 = x0 (reuse input) + * v1 = x0 + x1 + x2 + * vm1 = x0 - x1 + x2 + * v2 = x0 + 2*x1 + 4*x2 + * vinf = x2 (reuse input) + */ + + /* v1 = x0 + x1 + x2 */ + mpn_zero(v1_x, eval_len); + mpn_copyi(v1_x, x0, x0_len); + if (x1_len > 0) mpn_add(v1_x, v1_x, eval_len, x1, x1_len); + if (x2_len > 0) mpn_add(v1_x, v1_x, eval_len, x2, x2_len); + size_t v1_x_len = eval_len; + while (v1_x_len > 0 && v1_x[v1_x_len-1] == 0) v1_x_len--; + if (v1_x_len == 0) v1_x_len = 1; + + /* vm1 = x0 - x1 + x2 (may be negative, track sign) */ + mp_limb vm1_x_neg = 0; + { + mpn_zero(vm1_x, eval_len); + mpn_copyi(vm1_x, x0, x0_len); + if (x2_len > 0) mpn_add(vm1_x, vm1_x, eval_len, x2, x2_len); + if (x1_len > 0) { + vm1_x_neg = mpn_sub_var(vm1_x, vm1_x, eval_len, x1, x1_len, eval_len); + if (vm1_x_neg) mpn_neg(vm1_x, vm1_x, eval_len); + } + } + size_t vm1_x_len = eval_len; + while (vm1_x_len > 0 && vm1_x[vm1_x_len-1] == 0) vm1_x_len--; + if (vm1_x_len == 0) vm1_x_len = 1; + + /* v2 = x0 + 2*x1 + 4*x2 */ + { + mpn_zero(v2_x, eval_len); + mpn_copyi(v2_x, x0, x0_len); + if (x1_len > 0) { + mp_limb carry = 0; + for (size_t i = 0; i < x1_len; i++) { + mp_dbl_limb val = (mp_dbl_limb)v2_x[i] + ((mp_dbl_limb)x1[i] << 1) + carry; + v2_x[i] = LOW(val); + carry = HIGH(val); + } + for (size_t i = x1_len; carry && i < eval_len; i++) { + mp_dbl_limb val = (mp_dbl_limb)v2_x[i] + carry; + v2_x[i] = LOW(val); + carry = HIGH(val); + } + } + if (x2_len > 0) { + mp_limb carry = 0; + for (size_t i = 0; i < x2_len; i++) { + mp_dbl_limb val = (mp_dbl_limb)v2_x[i] + ((mp_dbl_limb)x2[i] << 2) + carry; + v2_x[i] = LOW(val); + carry = HIGH(val); + } + for (size_t i = x2_len; carry && i < eval_len; i++) { + mp_dbl_limb val = (mp_dbl_limb)v2_x[i] + carry; + v2_x[i] = LOW(val); + carry = HIGH(val); + } + } + } + size_t v2_x_len = eval_len; + while (v2_x_len > 0 && v2_x[v2_x_len-1] == 0) v2_x_len--; + if (v2_x_len == 0) v2_x_len = 1; + + /* + * Pointwise squaring (5 recursive calls) + * Key difference from multiplication: squaring instead of multiplication + */ + mpn_zero(w0, prod_len); + mpn_zero(w1, prod_len); + mpn_zero(wm1, prod_len); + mpn_zero(w2, prod_len); + mpn_zero(winf, prod_len); + + /* w0 = v0^2 = x0^2 */ + mpz_sqr_toom3(ctx, w0, x0, x0_len, recursive_scratch); + + /* w1 = v1^2 */ + mpz_sqr_toom3(ctx, w1, v1_x, v1_x_len, recursive_scratch); + + /* wm1 = vm1^2 (sign is always positive for squaring!) */ + (void)vm1_x_neg; /* squaring ignores sign */ + mpz_sqr_toom3(ctx, wm1, vm1_x, vm1_x_len, recursive_scratch); + + /* w2 = v2^2 */ + mpz_sqr_toom3(ctx, w2, v2_x, v2_x_len, recursive_scratch); + + /* winf = vinf^2 = x2^2 */ + if (x2_len > 0) { + mpz_sqr_toom3(ctx, winf, x2, x2_len, recursive_scratch); + } + + /* + * Interpolation (same as multiplication) + * Recover r0, r1, r2, r3, r4 where: + * result = r0 + r1*B + r2*B^2 + r3*B^3 + r4*B^4 + * + * For squaring, wm1 is always positive, simplifying step 4. + */ + size_t w_len = prod_len; + + mp_limb *t1 = w1; /* reuse w1 */ + mp_limb *t2 = wm1; /* reuse wm1 */ + mp_limb *t3 = w2; /* reuse w2 */ + mp_limb *t4 = recursive_scratch; + mp_limb *t5 = recursive_scratch + w_len; + mp_limb *t6 = recursive_scratch + 2 * w_len; + mp_limb *r2_tmp = recursive_scratch + 3 * w_len; + + /* t1 = w1 - w0 - winf */ + mpn_sub(t1, t1, w_len, w0, w_len); + mpn_sub(t1, t1, w_len, winf, w_len); + + /* t2 = wm1 - w0 - winf (no sign handling for squaring!) */ + mpn_sub(t2, t2, w_len, w0, w_len); + mpn_sub(t2, t2, w_len, winf, w_len); + + /* r2 = (t1 + t2) / 2 */ + mpn_add_var(r2_tmp, t1, w_len, t2, w_len, w_len); + mpn_rshift1(r2_tmp, r2_tmp, w_len); + + /* t3 = w2 - w0 - 16*winf */ + mpn_sub(t3, t3, w_len, w0, w_len); + { + mp_limb borrow = 0; + for (size_t i = 0; i < w_len; i++) { + mp_dbl_limb_signed diff = (mp_dbl_limb_signed)t3[i] + - ((mp_dbl_limb_signed)winf[i] << 4) - borrow; + t3[i] = LOW(diff); + borrow = (diff < 0) ? (mp_limb)(-(diff >> DIG_SIZE)) : 0; + } + } + + /* t4 = t3 / 2 */ + mpn_rshift1(t4, t3, w_len); + + /* t5 = (t1 - t2) / 2 */ + mpn_sub_var(t5, t1, w_len, t2, w_len, w_len); + mpn_rshift1(t5, t5, w_len); + + /* t6 = t4 - 2*r2 */ + { + mp_limb borrow = 0; + for (size_t i = 0; i < w_len; i++) { + mp_dbl_limb_signed diff = (mp_dbl_limb_signed)t4[i] + - ((mp_dbl_limb_signed)r2_tmp[i] << 1) - borrow; + t6[i] = LOW(diff); + borrow = (diff < 0) ? (mp_limb)(-(diff >> DIG_SIZE)) : 0; + } + } + + /* r3 = (t6 - t5) / 3 */ + mp_limb *r3 = t4; + mpn_sub(r3, t6, w_len, t5, w_len); + mpn_divexact_3(r3, r3, w_len); + + /* r1 = t5 - r3 */ + mp_limb *r1 = t5; + mpn_sub(r1, t5, w_len, r3, w_len); + + /* + * Final assembly: result = r0 + r1*B + r2*B^2 + r3*B^3 + r4*B^4 + */ + size_t result_len = 2 * n + 24; + mpn_zero(result, result_len); + + mpn_copyi(result, w0, (2*third < result_len) ? 2*third : result_len); + limb_add_at(result, result_len, r1, w_len, third); + limb_add_at(result, result_len, r2_tmp, w_len, 2*third); + limb_add_at(result, result_len, r3, w_len, 3*third); + limb_add_at(result, result_len, winf, w_len, 4*third); +} + +/* + * Check if mpz is "all ones" pattern (2^n - 1). + * For such a number: + * - All limbs except possibly the top one equal DIG_MASK + * - The top limb equals (1 << k) - 1 for some k in 1..DIG_SIZE + * Returns the bit count n if all-ones, 0 otherwise. + */ +static size_t +mpz_all_ones_p(mpz_t *x) +{ + if (x->sn <= 0 || x->sz == 0) return 0; + + /* Check all but top limb */ + for (size_t i = 0; i + 1 < x->sz; i++) { + if (x->p[i] != DIG_MASK) return 0; + } + + /* Check top limb: must be (1 << k) - 1 for some k */ + mp_limb top = x->p[x->sz - 1]; + if (top == 0) return 0; + + /* Check if top is all-ones pattern: (top & (top + 1)) == 0 */ + if ((top & (top + 1)) != 0) return 0; + + /* Count bits in top limb */ + size_t top_bits = 0; + while (top) { top_bits++; top >>= 1; } + + return (x->sz - 1) * DIG_SIZE + top_bits; +} + +/* + * Check if x is a power of 2 (2^n). + * Returns n if x = 2^n, or 0 otherwise. + * This is the "mostly-zero" pattern common in fuzzing tests. + */ +static size_t +mpz_power_of_2_exp(mpz_t *x) +{ + if (x->sn <= 0 || x->sz == 0) return 0; + + /* All limbs except top must be zero */ + for (size_t i = 0; i + 1 < x->sz; i++) { + if (x->p[i] != 0) return 0; + } + + /* Top limb must be a power of 2: (v & (v - 1)) == 0 */ + mp_limb top = x->p[x->sz - 1]; + if (top == 0 || (top & (top - 1)) != 0) return 0; + + /* Count trailing zeros in top limb to get bit position */ + size_t bit_pos = 0; + while ((top & 1) == 0) { bit_pos++; top >>= 1; } + + return (x->sz - 1) * DIG_SIZE + bit_pos; +} + +/* Count set bits in a limb */ +static inline int +limb_popcount(mp_limb x) +{ +#if defined(__GNUC__) || __has_builtin(__builtin_popcount) + if (sizeof(mp_limb) == sizeof(unsigned long long)) + return __builtin_popcountll(x); + else + return __builtin_popcount(x); +#else + int count = 0; + while (x) { + count++; + x &= x - 1; /* Clear lowest set bit */ + } + return count; +#endif +} + +/* + * Count total set bits in mpz. + * Returns popcount, or max_count+1 if exceeded (for early exit). + */ +static size_t +mpz_popcount(mpz_t *x, size_t max_count) +{ + if (x->sn <= 0 || x->sz == 0) return 0; + + size_t count = 0; + for (size_t i = 0; i < x->sz && count <= max_count; i++) { + count += limb_popcount(x->p[i]); + } + return count; +} + +/* Maximum bits for sparse multiplication optimization */ +#define SPARSE_MAX_BITS 8 + +/* + * Check if x is sparse (few bits set) and worth optimizing. + * Only worthwhile for large numbers where Karatsuba would be used. + * Returns popcount if sparse and optimizable, 0 otherwise. + */ +static size_t +mpz_sparse_p(mpz_t *x) +{ + if (x->sn <= 0 || x->sz < TOOM3_THRESHOLD) return 0; + + size_t popcount = mpz_popcount(x, SPARSE_MAX_BITS); + if (popcount > SPARSE_MAX_BITS) return 0; + + return popcount; +} + +/* + * Multiply sparse number by dense number using shift-add. + * sparse * dense = sum of (dense << bit_position) for each set bit + * + * O(k * n) where k = popcount, much faster than Karatsuba when k is small. + */ +struct mpz_mul_sparse_data { + mpz_ctx_t *ctx; + mpz_t *w; + mpz_t *sparse; + mpz_t *dense; + mpz_t shifted, temp; /* cleanup targets */ +}; + +static mrb_value +mpz_mul_sparse_body(mrb_state *mrb, void *userdata) +{ + struct mpz_mul_sparse_data *d = (struct mpz_mul_sparse_data *)userdata; + mpz_ctx_t *ctx = d->ctx; + mpz_t *w = d->w; + mpz_t *sparse = d->sparse; + mpz_t *dense = d->dense; + + mpz_init(ctx, &d->shifted); + mpz_init(ctx, &d->temp); + zero(w); + + for (size_t i = 0; i < sparse->sz; i++) { + mp_limb limb = sparse->p[i]; + size_t base_bit = i * DIG_SIZE; + + while (limb) { + /* Find position of lowest set bit */ + int bit = 0; +#if defined(__GNUC__) || __has_builtin(__builtin_ctz) + if (sizeof(mp_limb) == sizeof(unsigned long long)) + bit = __builtin_ctzll(limb); + else + bit = __builtin_ctz(limb); +#else + while ((limb & ((mp_limb)1 << bit)) == 0) bit++; +#endif + + /* Add dense << (base_bit + bit) to result */ + mpz_mul_2exp(ctx, &d->shifted, dense, base_bit + bit); + mpz_add(ctx, &d->temp, w, &d->shifted); + mpz_set(ctx, w, &d->temp); + + /* Clear this bit */ + limb &= limb - 1; + } + } + + /* Handle sign */ + if (sparse->sn < 0) w->sn = -w->sn; + + return mrb_nil_value(); +} + +static void +mpz_mul_sparse(mpz_ctx_t *ctx, mpz_t *w, mpz_t *sparse, mpz_t *dense) +{ + struct mpz_mul_sparse_data d = {ctx, w, sparse, dense, {0,0,0}, {0,0,0}}; + mrb_value exc; + MRB_ENSURE(MPZ_MRB(ctx), exc, mpz_mul_sparse_body, &d) { + /* Cleanup always runs (mpz_clear is safe on zero-initialized mpz_t) */ + mpz_clear(ctx, &d.shifted); + mpz_clear(ctx, &d.temp); + } +} + +/* + * Multiply two "all ones" numbers using algebraic identity: + * (2^n - 1) * (2^m - 1) = 2^(n+m) - 2^n - 2^m + 1 + * + * For squaring (n == m): + * (2^n - 1)^2 = 2^(2n) - 2^(n+1) + 1 + * + * This is O(n) instead of O(n^1.585) for Karatsuba. + */ +struct mpz_mul_all_ones_data { + mpz_ctx_t *ctx; + mpz_t *w; + size_t n, m; + mpz_t a, b; /* cleanup targets */ +}; + +static mrb_value +mpz_mul_all_ones_body(mrb_state *mrb, void *userdata) +{ + struct mpz_mul_all_ones_data *d = (struct mpz_mul_all_ones_data *)userdata; + mpz_ctx_t *ctx = d->ctx; + mpz_t *w = d->w; + size_t n = d->n, m = d->m; + + if (n == m) { + /* Squaring: (2^n - 1)^2 = 2^(2n) - 2^(n+1) + 1 */ + /* Start with 2^(2n) */ + mpz_init(ctx, &d->a); + mpz_set_int(ctx, &d->a, 1); + mpz_mul_2exp(ctx, w, &d->a, 2*n); + + /* Subtract 2^(n+1) */ + mpz_set_int(ctx, &d->a, 1); + mpz_mul_2exp(ctx, &d->a, &d->a, n+1); + mpz_sub(ctx, w, w, &d->a); + + /* Add 1 */ + mpz_add_int(ctx, w, 1); + } + else { + /* General: (2^n - 1) * (2^m - 1) = 2^(n+m) - 2^n - 2^m + 1 */ + mpz_init(ctx, &d->a); + mpz_init(ctx, &d->b); + + /* Start with 2^(n+m) */ + mpz_set_int(ctx, &d->a, 1); + mpz_mul_2exp(ctx, w, &d->a, n+m); + + /* Subtract 2^n */ + mpz_set_int(ctx, &d->a, 1); + mpz_mul_2exp(ctx, &d->a, &d->a, n); + mpz_sub(ctx, w, w, &d->a); + + /* Subtract 2^m */ + mpz_set_int(ctx, &d->b, 1); + mpz_mul_2exp(ctx, &d->b, &d->b, m); + mpz_sub(ctx, w, w, &d->b); + + /* Add 1 */ + mpz_add_int(ctx, w, 1); + } + + return mrb_nil_value(); +} + +static void +mpz_mul_all_ones(mpz_ctx_t *ctx, mpz_t *w, size_t n, size_t m) +{ + struct mpz_mul_all_ones_data d = {ctx, w, n, m, {0,0,0}, {0,0,0}}; + mrb_value exc; + MRB_ENSURE(MPZ_MRB(ctx), exc, mpz_mul_all_ones_body, &d) { + /* Cleanup always runs (mpz_clear is safe on zero-initialized mpz_t) */ + mpz_clear(ctx, &d.a); + mpz_clear(ctx, &d.b); + } +} + +/* w = u^2 (squaring - faster than general multiplication) */ +static void +mpz_sqr(mpz_ctx_t *ctx, mpz_t *ww, mpz_t *u) +{ + if (zero_p(u)) { + zero(ww); + return; + } + + /* Fast path for power of 2: (2^n)^2 = 2^(2n) */ + size_t u_pow2 = mpz_power_of_2_exp(u); + if (u_pow2) { + mpz_t one; + mpz_init(ctx, &one); + mpz_set_int(ctx, &one, 1); + mpz_mul_2exp(ctx, ww, &one, 2 * u_pow2); + mpz_clear(ctx, &one); + return; + } + + /* Fast path for all-ones: (2^n - 1)^2 = 2^(2n) - 2^(n+1) + 1 */ + size_t u_ones = mpz_all_ones_p(u); + if (u_ones) { + mpz_mul_all_ones(ctx, ww, u_ones, u_ones); + return; + } + + /* Use schoolbook squaring for small numbers */ + if (!should_use_toom3(u->sz)) { + mpz_t w; + mpz_init_heap(ctx, &w, 2 * u->sz); + mpz_sqr_basic_limbs(w.p, u->p, u->sz); + w.sz = 2 * u->sz; + w.sn = 1; /* Square is always positive */ + trim(&w); + mpz_move(ctx, ww, &w); + return; + } + + /* + * Toom-3 squaring: use optimized squaring for large numbers. + * Toom-3 writes at offset 4*third with products up to 2*(third+3)+16 limbs. + * Maximum write position: 4*n/3 + 2*n/3 + 22 = 2*n + 22. + * Add extra space to prevent buffer overflow. + */ + size_t result_size = 2 * u->sz + 24; + mpz_realloc(ctx, ww, result_size); + + size_t scratch_size = toom3_sqr_scratch_size(u->sz); + scratch_size += (scratch_size >> 3) + 16; + size_t pool_state = pool_save(ctx); + mp_limb *scratch = NULL; + + if (MPZ_HAS_POOL(ctx)) { + scratch = pool_alloc(MPZ_POOL(ctx), scratch_size); + } + + if (scratch) { + mpz_sqr_toom3(ctx, ww->p, u->p, u->sz, scratch); + pool_restore(ctx, pool_state); + } + else { + scratch = (mp_limb*)mrb_malloc(MPZ_MRB(ctx), scratch_size * sizeof(mp_limb)); + mpz_sqr_toom3(ctx, ww->p, u->p, u->sz, scratch); + mrb_free(MPZ_MRB(ctx), scratch); + } + + ww->sz = result_size; + ww->sn = 1; /* Square is always positive */ + trim(ww); +} + +/* + * Balance multiplication for asymmetric operands. + * When one operand is much larger than the other (max >= 2*min), + * split the larger into chunks of size equal to the smaller, + * multiply each chunk, and combine with shifts. + */ +static void +mpz_mul_balance(mpz_ctx_t *ctx, mpz_t *ww, mpz_t *a, mpz_t *b) +{ + /* Ensure 'a' is the larger operand */ + if (a->sz < b->sz) { + mpz_t *t = a; a = b; b = t; + } + + size_t bsize = b->sz; /* chunk size = smaller operand size */ + size_t nblocks = a->sz / bsize; /* number of full chunks */ + + size_t pool_state = pool_save(ctx); + mpz_t chunk, tmp, result; + mpz_init(ctx, &result); + mpz_init_heap(ctx, &chunk, bsize); + mpz_init(ctx, &tmp); + + size_t j = 0; + for (size_t i = 0; i < nblocks; i++) { + /* Copy chunk from a */ + memcpy(chunk.p, a->p + j, bsize * sizeof(mp_limb)); + chunk.sz = bsize; + chunk.sn = 1; + trim(&chunk); + j += bsize; + + if (!zero_p(&chunk)) { + /* Multiply chunk * b */ + mpz_mul(ctx, &tmp, &chunk, b); + + /* Shift tmp left by (i * bsize) limbs */ + if (i > 0) { + size_t shift_limbs = i * bsize; + size_t old_sz = tmp.sz; + size_t new_sz = old_sz + shift_limbs; + mpz_realloc(ctx, &tmp, new_sz); + memmove(tmp.p + shift_limbs, tmp.p, old_sz * sizeof(mp_limb)); + memset(tmp.p, 0, shift_limbs * sizeof(mp_limb)); + tmp.sz = new_sz; + } + + /* Add to result */ + mpz_add(ctx, &result, &result, &tmp); + } + } + + /* Handle leftover (remaining limbs after full chunks) */ + if (j < a->sz) { + size_t remaining = a->sz - j; + mpz_realloc(ctx, &chunk, remaining); + memcpy(chunk.p, a->p + j, remaining * sizeof(mp_limb)); + chunk.sz = remaining; + chunk.sn = 1; + trim(&chunk); + + if (!zero_p(&chunk)) { + mpz_mul(ctx, &tmp, &chunk, b); + + /* Shift by j limbs */ + if (j > 0) { + size_t old_sz = tmp.sz; + size_t new_sz = old_sz + j; + mpz_realloc(ctx, &tmp, new_sz); + memmove(tmp.p + j, tmp.p, old_sz * sizeof(mp_limb)); + memset(tmp.p, 0, j * sizeof(mp_limb)); + tmp.sz = new_sz; + } + + mpz_add(ctx, &result, &result, &tmp); + } + } + + /* Apply sign: result sign = a->sn * b->sn */ + result.sn = a->sn * b->sn; + + mpz_move(ctx, ww, &result); + mpz_clear(ctx, &chunk); + mpz_clear(ctx, &tmp); + pool_restore(ctx, pool_state); +} + +/* w = u * v */ +static void +mpz_mul(mpz_ctx_t *ctx, mpz_t *ww, mpz_t *u, mpz_t *v) +{ + if (zero_p(u) || zero_p(v)) { + zero(ww); + return; + } + + /* Fast path for squaring: u * u uses optimized squaring algorithm */ + if (u == v) { + mpz_sqr(ctx, ww, u); + return; + } + + /* Fast path for "all ones" numbers (2^n - 1) */ + size_t u_ones = mpz_all_ones_p(u); + size_t v_ones = mpz_all_ones_p(v); + if (u_ones && v_ones) { + mpz_mul_all_ones(ctx, ww, u_ones, v_ones); + return; + } + + /* Fast path: (2^n - 1) * y = (y << n) - y */ + if (u_ones && u_ones >= TOOM3_THRESHOLD * DIG_SIZE) { + mpz_t shifted; + mpz_init(ctx, &shifted); + mpz_mul_2exp(ctx, &shifted, v, u_ones); + mpz_sub(ctx, ww, &shifted, v); + mpz_clear(ctx, &shifted); + return; + } + if (v_ones && v_ones >= TOOM3_THRESHOLD * DIG_SIZE) { + mpz_t shifted; + mpz_init(ctx, &shifted); + mpz_mul_2exp(ctx, &shifted, u, v_ones); + mpz_sub(ctx, ww, &shifted, u); + mpz_clear(ctx, &shifted); + return; + } + + /* Fast path for power of 2: x * 2^n = x << n */ + size_t u_pow2 = mpz_power_of_2_exp(u); + if (u_pow2) { + mpz_mul_2exp(ctx, ww, v, u_pow2); + return; + } + size_t v_pow2 = mpz_power_of_2_exp(v); + if (v_pow2) { + mpz_mul_2exp(ctx, ww, u, v_pow2); + return; + } + + /* Fast path for sparse numbers (few bits set): use shift-add */ + size_t u_sparse = mpz_sparse_p(u); + if (u_sparse) { + mpz_mul_sparse(ctx, ww, u, v); + return; + } + size_t v_sparse = mpz_sparse_p(v); + if (v_sparse) { + mpz_mul_sparse(ctx, ww, v, u); + return; + } + + size_t min_sz = (u->sz < v->sz) ? u->sz : v->sz; + size_t max_sz = (u->sz > v->sz) ? u->sz : v->sz; + + /* + * Balance multiplication for highly asymmetric operands. + * When max >= 2*min and the smaller is above Toom-3 threshold, + * split the larger operand into chunks for better efficiency. + */ + if (max_sz >= 2 * min_sz && should_use_toom3(min_sz)) { + mpz_mul_balance(ctx, ww, u, v); + return; + } + + /* + * Use schoolbook for small operands below Karatsuba threshold. + */ + if (!should_use_karatsuba(min_sz)) { + mpz_mul_basic(ctx, ww, u, v); + return; + } + + /* + * Karatsuba for medium-sized operands (KARATSUBA_THRESHOLD <= min_sz < TOOM3_THRESHOLD). + */ + if (!should_use_toom3(min_sz)) { + size_t result_size = u->sz + v->sz; + mpz_realloc(ctx, ww, result_size); + + size_t scratch_size = karatsuba_scratch_size(max_sz); + scratch_size += (scratch_size >> 3) + 16; /* safety margin */ + size_t pool_state = pool_save(ctx); + mp_limb *scratch = NULL; + + if (MPZ_HAS_POOL(ctx)) { + scratch = pool_alloc(MPZ_POOL(ctx), scratch_size); + } + + if (scratch) { + mpz_mul_karatsuba_limbs(ww->p, u->p, u->sz, v->p, v->sz, scratch); + pool_restore(ctx, pool_state); + } + else { + scratch = (mp_limb*)mrb_malloc(MPZ_MRB(ctx), scratch_size * sizeof(mp_limb)); + mpz_mul_karatsuba_limbs(ww->p, u->p, u->sz, v->p, v->sz, scratch); + mrb_free(MPZ_MRB(ctx), scratch); + } + + ww->sz = result_size; + ww->sn = u->sn * v->sn; + trim(ww); + return; + } + + /* + * Toom-3 writes at offset 4*third with products up to 2*(third+3)+16 limbs. + * Maximum write position: 4*n/3 + 2*n/3 + 22 = 2*n + 22, where n = max size. + * Use 2*max_sz (not u->sz + v->sz) because third is based on max. + */ + size_t result_size = 2 * max_sz + 24; + mpz_realloc(ctx, ww, result_size); + + size_t scratch_size = toom3_scratch_size(max_sz); + /* Add safety margin proportional to scratch size. */ + scratch_size += (scratch_size >> 3) + 16; + size_t pool_state = pool_save(ctx); + mp_limb *scratch = NULL; + + if (MPZ_HAS_POOL(ctx)) { + scratch = pool_alloc(MPZ_POOL(ctx), scratch_size); + } + + if (scratch) { + mpz_mul_toom3(ctx, ww->p, u->p, u->sz, v->p, v->sz, scratch); + pool_restore(ctx, pool_state); + } + else { + /* Fallback to heap allocation for scratch space if pool fails */ + scratch = (mp_limb*)mrb_malloc(MPZ_MRB(ctx), scratch_size * sizeof(mp_limb)); + mpz_mul_toom3(ctx, ww->p, u->p, u->sz, v->p, v->sz, scratch); + mrb_free(MPZ_MRB(ctx), scratch); + } + + ww->sz = result_size; + ww->sn = u->sn * v->sn; + trim(ww); +} + +/* number of leading zero bits in digit */ +static int +lzb(mp_limb x) +{ + if (x == 0) return 0; +#if (defined(__GNUC__) || __has_builtin(__builtin_clz)) + if (sizeof(mp_limb) == sizeof(int64_t)) + return __builtin_clzll(x); + else if (sizeof(mp_limb) == sizeof(int32_t)) + return __builtin_clz(x); +#endif + + int j=0; + + for (mp_limb i = ((mp_limb)1 << (DIG_SIZE-1)); i && !(x&i); j++,i>>=1) + ; + return j; +} + +/* + * mpn-style low-level shift functions. + * These operate directly on limb arrays without mpz_t overhead. + */ + +/* + * Right shift limb array by cnt bits (0 < cnt < DIG_SIZE). + * rp[0..n-1] = ap[0..n-1] >> cnt + * Returns the bits shifted out from the low end. + * Supports in-place operation (rp == ap). + * + * Processing order: LOW to HIGH + * - rp[i] depends on ap[i] and ap[i+1] + * - Writing rp[i] doesn't affect ap[i+1], so in-place is safe + */ +static mp_limb +mpn_rshift(mp_limb *rp, const mp_limb *ap, size_t n, unsigned int cnt) +{ + mp_limb shifted_out; + size_t i; + + mrb_assert(cnt > 0 && cnt < DIG_SIZE); + + shifted_out = (ap[0] << (DIG_SIZE - cnt)) & DIG_MASK; + + for (i = 0; i < n - 1; i++) { + rp[i] = ((ap[i] >> cnt) | (ap[i + 1] << (DIG_SIZE - cnt))) & DIG_MASK; + } + rp[n - 1] = (ap[n - 1] >> cnt) & DIG_MASK; + + return shifted_out; +} + +/* + * Left shift limb array by cnt bits (0 < cnt < DIG_SIZE). + * rp[0..n-1] = ap[0..n-1] << cnt (lower n limbs) + * Returns the bits shifted out from the high end (carry). + * Supports in-place operation (rp == ap). + * + * Processing order: HIGH to LOW + * - rp[i] depends on ap[i] and ap[i-1] + * - Writing rp[i] doesn't affect ap[i-1], so in-place is safe + */ +static mp_limb +mpn_lshift(mp_limb *rp, const mp_limb *ap, size_t n, unsigned int cnt) +{ + mp_limb carry; + size_t i; + + mrb_assert(cnt > 0 && cnt < DIG_SIZE); + + carry = (ap[n - 1] >> (DIG_SIZE - cnt)) & DIG_MASK; + + for (i = n - 1; i > 0; i--) { + rp[i] = ((ap[i] << cnt) | (ap[i - 1] >> (DIG_SIZE - cnt))) & DIG_MASK; + } + rp[0] = (ap[0] << cnt) & DIG_MASK; + + return carry; +} + +/* + * Add two limb arrays of the same size. + * rp[0..n-1] = ap[0..n-1] + bp[0..n-1] + * Returns carry (0 or 1). + * Supports in-place operation (rp == ap or rp == bp). + */ +static inline mp_limb +mpn_add_n(mp_limb *rp, const mp_limb *ap, const mp_limb *bp, size_t n) +{ + mp_dbl_limb carry = 0; + + for (size_t i = 0; i < n; i++) { + carry += (mp_dbl_limb)ap[i] + (mp_dbl_limb)bp[i]; + rp[i] = LOW(carry); + carry = HIGH(carry); + } + return (mp_limb)carry; +} + +/* + * Add two limb arrays of different sizes. + * rp[0..an-1] = ap[0..an-1] + bp[0..bn-1] + * Precondition: an >= bn + * Returns carry (0 or 1). + * Supports in-place operation (rp == ap). + */ +static mp_limb +mpn_add(mp_limb *rp, const mp_limb *ap, size_t an, const mp_limb *bp, size_t bn) +{ + mp_dbl_limb carry; + + mrb_assert(an >= bn); + + /* Add overlapping part */ + carry = mpn_add_n(rp, ap, bp, bn); + + /* Propagate carry through remaining limbs of ap */ + for (size_t i = bn; i < an; i++) { + carry += (mp_dbl_limb)ap[i]; + rp[i] = LOW(carry); + carry = HIGH(carry); + } + return (mp_limb)carry; +} + +/* + * Subtract two limb arrays of the same size. + * rp[0..n-1] = ap[0..n-1] - bp[0..n-1] + * Returns borrow (0 or 1). + * Supports in-place operation (rp == ap). + */ +static inline mp_limb +mpn_sub_n(mp_limb *rp, const mp_limb *ap, const mp_limb *bp, size_t n) +{ + mp_dbl_limb_signed borrow = 0; + + for (size_t i = 0; i < n; i++) { + borrow += (mp_dbl_limb_signed)ap[i] - (mp_dbl_limb_signed)bp[i]; + rp[i] = LOW(borrow); + borrow = HIGH(borrow); + } + return (mp_limb)(-borrow); +} + +/* + * Subtract two limb arrays of different sizes. + * rp[0..an-1] = ap[0..an-1] - bp[0..bn-1] + * Precondition: an >= bn and ap >= bp (result is non-negative) + * Returns borrow (0 or 1, should be 0 if precondition met). + * Supports in-place operation (rp == ap). + */ +static mp_limb +mpn_sub(mp_limb *rp, const mp_limb *ap, size_t an, const mp_limb *bp, size_t bn) +{ + mp_dbl_limb_signed borrow; + + mrb_assert(an >= bn); + + /* Subtract overlapping part */ + borrow = -(mp_dbl_limb_signed)mpn_sub_n(rp, ap, bp, bn); + + /* Propagate borrow through remaining limbs of ap */ + for (size_t i = bn; i < an; i++) { + borrow += (mp_dbl_limb_signed)ap[i]; + rp[i] = LOW(borrow); + borrow = HIGH(borrow); + } + return (mp_limb)(-borrow); +} + +/* c1 = a>>n */ +/* n must be < DIG_SIZE */ +static void +urshift(mpz_ctx_t *ctx, mpz_t *c1, mpz_t *a, size_t n) +{ + mrb_assert(n < DIG_SIZE); + + if (n == 0) { + mpz_set(ctx, c1, a); + trim(c1); + } + else if (uzero_p(a)) { + zero(c1); + } + else { + mpz_realloc(ctx, c1, a->sz); + mpn_rshift(c1->p, a->p, a->sz, (unsigned int)n); + c1->sz = a->sz; + trim(c1); + } +} + +/* c1 = a<sz; + + mpz_realloc(ctx, c1, old_sz + 1); + carry = mpn_lshift(c1->p, c1->p, old_sz, (unsigned int)n); + c1->p[old_sz] = carry; + c1->sz = old_sz + 1; + trim(c1); + } + else { + mpz_t c; + mp_limb carry; + + mpz_init_heap(ctx, &c, a->sz + 1); + carry = mpn_lshift(c.p, a->p, a->sz, (unsigned int)n); + c.p[a->sz] = carry; + c.sz = a->sz + 1; + trim(&c); + mpz_move(ctx, c1, &c); + } +} + +/* Fast division by single limb */ +static void +div_limb(mpz_ctx_t *ctx, mpz_t *q, mpz_t *r, mpz_t *x, mp_limb d) +{ + mrb_state *mrb = MPZ_MRB(ctx); + size_t pool_state = pool_save(ctx); + mpz_t temp_q, temp_r; + size_t n; + mp_dbl_limb remainder; + + if (zero_p(x)) { + zero(q); + zero(r); + goto cleanup; + } + + if (d == 0) { + mrb_raise(mrb, E_ZERODIV_ERROR, "divided by 0"); + } + + /* Power-of-2 divisor optimization */ + if ((d & (d - 1)) == 0) { + /* d is power of 2, use bit operations */ + int shift = 0; + mp_limb temp = d; + while (temp > 1) { + temp >>= 1; + shift++; + } + + /* Quotient = x >> shift */ + if (shift == 0) { + mpz_init(ctx, &temp_q); + mpz_init(ctx, &temp_r); + mpz_set(ctx, &temp_q, x); + } + else { + /* Manual right shift implementation */ + size_t limb_shift = shift / DIG_SIZE; + size_t bit_shift = shift % DIG_SIZE; + + if (limb_shift >= x->sz) { + mpz_init(ctx, &temp_q); + mpz_init(ctx, &temp_r); + zero(&temp_q); + } + else { + size_t new_size = x->sz - limb_shift; + mpz_init_temp(ctx, &temp_q, new_size); + mpz_init(ctx, &temp_r); + + if (bit_shift == 0) { + /* Simple limb copy */ + for (size_t i = 0; i < new_size; i++) { + temp_q.p[i] = x->p[i + limb_shift]; + } + } + else { + /* Bit shift within limbs */ + mp_limb carry = 0; + for (size_t i = new_size; i > 0; i--) { + mp_limb current = x->p[i - 1 + limb_shift]; + temp_q.p[i - 1] = (current >> bit_shift) | carry; + carry = (current << (DIG_SIZE - bit_shift)) & DIG_MASK; + } + } + temp_q.sz = new_size; + trim(&temp_q); + temp_q.sn = (temp_q.sz == 0) ? 0 : 1; + } + } + + /* Remainder = x & (d - 1) */ + /* temp_r is already initialized in all code paths above */ + mpz_realloc(ctx, &temp_r, 1); + temp_r.p[0] = x->p[0] & (d - 1); + temp_r.sz = (temp_r.p[0] == 0) ? 0 : 1; + temp_r.sn = (temp_r.sz == 0) ? 0 : 1; + mpz_move(ctx, q, &temp_q); + mpz_move(ctx, r, &temp_r); + goto cleanup; + } + + /* General single-limb division */ + if (x->sz == 1) { + /* Both dividend and divisor are single limb */ + mpz_init_temp(ctx, &temp_q, 1); + mpz_init_temp(ctx, &temp_r, 1); + + temp_q.p[0] = x->p[0] / d; + temp_r.p[0] = x->p[0] % d; + + temp_q.sz = (temp_q.p[0] == 0) ? 0 : 1; + temp_q.sn = (temp_q.sz == 0) ? 0 : 1; + + temp_r.sz = (temp_r.p[0] == 0) ? 0 : 1; + temp_r.sn = (temp_r.sz == 0) ? 0 : 1; + mpz_move(ctx, q, &temp_q); + mpz_move(ctx, r, &temp_r); + goto cleanup; + } + + /* Multi-limb dividend, single-limb divisor */ + n = x->sz; + mpz_init_temp(ctx, &temp_q, n); + mpz_init_temp(ctx, &temp_r, 1); + + remainder = 0; + + /* Process from most significant limb to least significant */ + for (size_t i = n; i > 0; i--) { + remainder = (remainder << DIG_SIZE) + x->p[i-1]; + temp_q.p[i-1] = (mp_limb)(remainder / d); + remainder = remainder % d; + } + + /* Set remainder */ + temp_r.p[0] = (mp_limb)remainder; + temp_r.sz = (remainder == 0) ? 0 : 1; + temp_r.sn = (temp_r.sz == 0) ? 0 : 1; + + /* Trim leading zeros from quotient */ + trim(&temp_q); + temp_q.sn = (temp_q.sz == 0) ? 0 : 1; + + /* Copy results to avoid pool/heap mixing */ + mpz_move(ctx, q, &temp_q); + mpz_move(ctx, r, &temp_r); + +cleanup: + pool_restore(ctx, pool_state); +} + +static void +udiv(mpz_ctx_t *ctx, mpz_t *qq, mpz_t *rr, mpz_t *xx, mpz_t *yy) +{ + /* Handle simple cases */ + int cmp = ucmp(xx, yy); + if (cmp == 0) { + mpz_set_int(ctx, qq, 1); + zero(rr); + return; + } + else if (cmp < 0) { + zero(qq); + mpz_set(ctx, rr, xx); + return; + } + + /* Fast path for single-limb divisor */ + if (yy->sz == 1) { + div_limb(ctx, qq, rr, xx, yy->p[0]); + return; + } + + mrb_assert(yy->sn != 0); /* divided by zero */ + mrb_assert(yy->sz > 0); /* divided by zero */ + mrb_assert(!uzero_p(yy)); /* divided by zero */ + + /* Pre-check size constraints to avoid memory leaks on exception. + * Check both dividend and divisor sizes as ulshift uses size+1 for both. + * Fail early before any allocation to prevent leaks. */ + size_t max_mpz_size = SIZE_MAX / sizeof(mp_limb); + if (xx->sz + 1 > max_mpz_size || yy->sz + 1 > max_mpz_size) { + mrb_state *mrb = MPZ_MRB(ctx); + mrb_raise(mrb, E_RUNTIME_ERROR, "bigint size too large"); + } + + /* Use new context architecture with automatic pool/heap management */ + size_t pool_state = pool_save(ctx); + mpz_t q, x, y; + mpz_init_temp(ctx, &q, xx->sz - yy->sz + 1); /* Quotient size estimate */ + mpz_init_temp(ctx, &x, xx->sz + 1); /* Dividend with potential carry */ + mpz_init_temp(ctx, &y, yy->sz); /* Divisor copy */ + mpz_realloc(ctx, &x, xx->sz+1); + size_t yd = digits(yy); + size_t ns = lzb(yy->p[yd-1]); + ulshift(ctx, &x, xx, ns); + ulshift(ctx, &y, yy, ns); + trim(&y); /* Trim after shift to remove any zero limbs */ + size_t xd = digits(&x); + yd = digits(&y); + + /* Handle edge case: divisor became zero after normalization */ + if (yd == 0 || y.p[yd-1] == 0) { + /* This should not happen with valid inputs, but handle gracefully */ + zero(qq); + zero(rr); + mpz_clear(ctx, &q); + mpz_clear(ctx, &x); + mpz_clear(ctx, &y); + pool_restore(ctx, pool_state); + return; + } + + mpz_realloc(ctx, &q, xd-yd+1); // Quotient has xd-yd+1 digits maximum + + /* Core Knuth Algorithm D division loop */ + mp_dbl_limb z = y.p[yd-1]; + mrb_assert(z != 0); /* Divisor high limb must be non-zero after normalization */ + + if (xd >= yd) { + for (size_t j = xd - yd;; j--) { + mp_dbl_limb qhat; + mp_dbl_limb rhat; + + if (j + yd == xd) { + /* Only one high limb available */ + mp_dbl_limb dividend_val = (((mp_dbl_limb)0 << DIG_SIZE) + x.p[j+yd-1]); + qhat = dividend_val / z; + rhat = dividend_val % z; + } + else { + /* Two limbs available - standard Knuth estimation */ + mp_dbl_limb dividend_val = ((mp_dbl_limb)x.p[j+yd] << DIG_SIZE) + x.p[j+yd-1]; + qhat = dividend_val / z; + rhat = dividend_val % z; + } + + /* Standard Knuth Algorithm D qhat refinement (2-limb check) */ + if (yd >= 2) { + mp_dbl_limb y_second = y.p[yd-2]; + mp_dbl_limb x_third = (j+yd-2 < x.sz) ? x.p[j+yd-2] : 0; + mp_dbl_limb left_side = qhat * y_second; + mp_dbl_limb right_side = (rhat << DIG_SIZE) + x_third; + + while (qhat >= ((mp_dbl_limb)1 << DIG_SIZE) || (left_side > right_side)) { + qhat--; + rhat += z; + if (rhat >= ((mp_dbl_limb)1 << DIG_SIZE)) break; + left_side -= y_second; + right_side = (rhat << DIG_SIZE) + x_third; + } + } + + if (qhat > 0) { + /* Subtract qhat * divisor from dividend */ + /* Use pointers to avoid repeated i+j index calculation */ + mp_dbl_limb_signed borrow = 0; + mp_limb *xp = x.p + j; + mp_limb *xp_end = x.p + x.sz; + const mp_limb *yp = y.p; + + for (size_t i = 0; i < yd; i++) { + mp_dbl_limb product = qhat * *yp++; + mp_dbl_limb_signed diff = (mp_dbl_limb_signed)*xp - (mp_dbl_limb_signed)LOW(product) + borrow; + *xp++ = LOW(diff); + borrow = HIGH(diff) - (mp_dbl_limb_signed)HIGH(product); + } + + /* Handle final borrow propagation */ + if (xp < xp_end) { + borrow += (mp_dbl_limb_signed)*xp; + *xp = LOW(borrow); + borrow = HIGH(borrow); + } + + /* Correction: if borrow is negative, qhat was too large, add back */ + if (borrow < 0) { + qhat--; + mp_dbl_limb carry = 0; + xp = x.p + j; + yp = y.p; + for (size_t i = 0; i < yd; i++) { + carry += (mp_dbl_limb)*xp + (mp_dbl_limb)*yp++; + *xp++ = LOW(carry); + carry = HIGH(carry); + } + if (xp < xp_end && carry > 0) { + *xp += (mp_limb)carry; + } + } + } + + q.p[j] = (mp_limb)qhat; + if (j == 0) break; + } + } + x.sz = yd; + urshift(ctx, rr, &x, ns); + trim(&q); + mpz_move(ctx, qq, &q); + mpz_clear(ctx, &q); + mpz_clear(ctx, &x); + mpz_clear(ctx, &y); + pool_restore(ctx, pool_state); +} + +static void +mpz_mdiv(mpz_ctx_t *ctx, mpz_t *q, mpz_t *x, mpz_t *y) +{ + mpz_t r; + short sn1 = x->sn, sn2 = y->sn, qsign; + + if (zero_p(x)) { + mpz_init_set_int(ctx, q, 0); + return; + } + mpz_init(ctx, &r); + udiv(ctx, q, &r, x, y); + qsign = q->sn = sn1 * sn2; + if (uzero_p(q)) + q->sn = 0; + /* now if r != 0 and q < 0 we need to round q towards -inf */ + if (!uzero_p(&r) && qsign < 0) { + /* add 1 to magnitude */ + mpz_add_int(ctx, q, 1); + /* force negative sign in case the value of q was zero before rounding */ + q->sn = -1; + } + mpz_clear(ctx, &r); +} + +static void +mpz_mmod(mpz_ctx_t *ctx, mpz_t *r, mpz_t *x, mpz_t *y) +{ + mpz_t q; + short sn1 = x->sn, sn2 = y->sn, sn3; + + mpz_init(ctx, &q); + if (sn1 == 0) { + zero(r); + return; + } + udiv(ctx, &q, r, x, y); + mpz_clear(ctx, &q); + if (uzero_p(r)) { + r->sn = 0; + return; + } + sn3 = sn1 * sn2; + if (sn3 > 0) + r->sn = sn1; + else if (sn1 < 0 && sn2 > 0) { + r->sn = 1; + mpz_sub(ctx, r, y, r); + } + else { + r->sn = 1; + mpz_add(ctx, r, y, r); + } +} + +static void +mpz_mdivmod(mpz_ctx_t *ctx, mpz_t *q, mpz_t *r, mpz_t *x, mpz_t *y) +{ + short sn1 = x->sn, sn2 = y->sn, qsign; + + if (sn1 == 0) { + zero(q); + zero(r); + return; + } + udiv(ctx, q, r, x, y); + qsign = q->sn = sn1 * sn2; + if (uzero_p(r)) { + /* q != 0, since q=r=0 would mean x=0, which was tested above */ + r->sn = 0; + return; + } + if (q->sn > 0) + r->sn = sn1; + else if (sn1 < 0 && sn2 > 0) { + r->sn = 1; + mpz_sub(ctx, r, y, r); + } + else { + r->sn = 1; + mpz_add(ctx, r, y, r); + } + if (uzero_p(q)) + q->sn = 0; + /* now if r != 0 and q < 0 we need to round q towards -inf */ + if (!uzero_p(r) && qsign < 0) { + /* add 1 to magnitude */ + mpz_add_int(ctx, q, 1); + /* force negative sign in case the value of q was zero before rounding */ + q->sn = -1; + } +} + +/* Fast modular reduction for single-limb modulus */ +static void +mpz_mod_limb(mpz_ctx_t *ctx, mpz_t *r, mpz_t *x, mp_limb m) +{ + if (zero_p(x)) { + zero(r); + return; + } + + if (x->sz == 1) { + /* Single limb case - simple modulo */ + mp_limb result = x->p[0] % m; + mpz_set_int(ctx, r, result); + if (result == 0) + r->sn = 0; + else + r->sn = x->sn; + return; + } + + /* Multi-limb case - use repeated division */ + mp_dbl_limb remainder = 0; + for (size_t i = x->sz; i > 0; i--) { + remainder = (remainder << DIG_SIZE) | x->p[i-1]; + remainder %= m; + } + + mpz_set_int(ctx, r, (mp_limb)remainder); + r->sn = x->sn; + if (remainder == 0) + r->sn = 0; +} + +/* Forward declarations for Barrett reduction functions */ +static void mpz_barrett_mu(mpz_ctx_t *ctx, mpz_t *mu, mpz_t *m); +static void mpz_barrett_reduce(mpz_ctx_t *ctx, mpz_t *r, mpz_t *x, mpz_t *m, mpz_t *mu); + +static void +mpz_mod(mpz_ctx_t *ctx, mpz_t *r, mpz_t *x, mpz_t *y) +{ + short sn = x->sn; + + if (zero_p(x)) { + zero(r); + return; + } + + /* Fast path for single-limb modulus */ + if (y->sz == 1) { + mpz_mod_limb(ctx, r, x, y->p[0]); + if (y->sn < 0) r->sn = -r->sn; + return; + } + + /* Barrett reduction for moderate-sized moduli (>= 4 limbs where setup is worthwhile). + * Barrett's precondition is x < 2^(2*bits(m)); inputs beyond ~2*m.sz limbs + * violate it and the algorithm silently truncates high limbs. Fall through + * to general division for those. */ + if (y->sz >= 4 && y->sz <= 16 && x->sz >= y->sz + 2 && x->sz <= 2 * y->sz) { + mpz_t mu; + mpz_init_temp(ctx, &mu, y->sz + 1); + mpz_barrett_mu(ctx, &mu, y); + mpz_realloc(ctx, r, y->sz); + mpz_barrett_reduce(ctx, r, x, y, &mu); + r->sn = sn; + if (uzero_p(r)) + r->sn = 0; + mpz_clear(ctx, &mu); + return; + } + + /* General division fallback */ + mpz_t q; + mpz_init_temp(ctx, &q, x->sz); + mpz_realloc(ctx, r, y->sz); + udiv(ctx, &q, r, x, y); + r->sn = sn; + if (uzero_p(r)) r->sn = 0; + mpz_clear(ctx, &q); +} + +static mrb_int +mpz_cmp(mpz_ctx_t *ctx, mpz_t *x, mpz_t *y) +{ + if (x->sn < 0 && y->sn > 0) + return (-1); + if (x->sn > 0 && y->sn < 0) + return 1; + int abscmp=ucmp(x, y); + if (x->sn >=0 && y->sn >=0) + return abscmp; + return (-abscmp); // if (x->sn <=0 && y->sn <=0) +} + +/* 2<=base<=36 - this overestimates the optimal value, which is OK */ +static size_t +mpz_sizeinbase(mpz_t *x, mrb_int base) +{ + size_t i, j; + + size_t bits = digits(x) * DIG_SIZE; + mrb_assert(2 <= base && base <= 36); + + if (zero_p(x) || x->sz == 0) return 0; + for (j=0,i=1; i<=(size_t)base; i*=2,j++) + ; + return bits/(j-1)+1; +} + +/* x = y * n (only called from mpz_init_set_str) */ +/* assumes x and n are positive or zero */ +/* assumes n is small (fits in mp_limb) */ +static void +mpz_mul_int(mpz_ctx_t *ctx, mpz_t *x, mrb_int n) +{ + if (n == 0 || zero_p(x)) { + zero(x); + return; + } + + size_t x_sz = x->sz; + size_t new_sz = x_sz + 1; // Maximum possible size after multiplication + + // Reallocate x if necessary + mpz_realloc(ctx, x, new_sz); + + mp_dbl_limb cc = 0; + mp_limb n_limb = (mp_limb)n; + + for (size_t i = 0; i < x_sz; i++) { + // Multiply each limb and add carry + cc += (mp_dbl_limb)x->p[i] * n_limb; + x->p[i] = LOW(cc); + cc = HIGH(cc); + } + + if (cc) { + // If there is a remaining carry, store it and update size + x->p[x_sz] = (mp_limb)cc; + x->sz = x_sz + 1; + } + else { + x->sz = x_sz; + } + + x->sn = 1; + trim(x); +} + +/* Forward declarations and constants for decimal base conversion */ +#ifdef MRB_NO_MPZ64BIT +#define DECIMAL_BASE_CONV 10000UL /* 10^4 for 16-bit limbs */ +#define DECIMAL_DIGITS_CONV 4 +#else +#define DECIMAL_BASE_CONV 1000000000UL /* 10^9 for 32-bit limbs */ +#define DECIMAL_DIGITS_CONV 9 +#endif + +static size_t mpz_str_to_decimal(const char *s, mrb_int len, mp_limb *decimal_out, mrb_int *effective_len); +static size_t mpz_decimal_to_binary(const mp_limb *decimal, size_t decimal_size, mp_limb *limbs_out); + +static int +mpz_init_set_str(mpz_ctx_t *ctx, mpz_t *x, const char *s, mrb_int len, mrb_int base) +{ + int retval = 0; + short sn; + uint8_t k; + + mpz_init(ctx, x); + if (*s == '-') { + sn = -1; s++; len--; + } + else if (*s == '+') { + sn = 1; s++; len--; + } + else + sn = 1; + + if (base == 10) { + /* Use optimized decimal parsing: parse 9 digits at a time */ + /* First validate that all characters are valid decimal digits */ + for (mrb_int i = 0; i < len; i++) { + if (s[i] == '_') continue; + if (s[i] < '0' || s[i] > '9') { + retval = (-1); + break; + } + } + + if (retval == 0) { + /* Estimate size: ceil(len / DECIMAL_DIGITS_CONV) decimal chunks */ + size_t decimal_alloc = (size_t)((len + DECIMAL_DIGITS_CONV - 1) / DECIMAL_DIGITS_CONV) + 1; + + /* Try pool first (no Karatsuba in this path), fall back to malloc */ + size_t pool_state = pool_save(ctx); + mp_limb *decimal = pool_alloc(MPZ_POOL(ctx), decimal_alloc); + mrb_bool use_heap = (decimal == NULL); + if (use_heap) { + decimal = (mp_limb*)mrb_malloc(MPZ_MRB(ctx), decimal_alloc * sizeof(mp_limb)); + } + + mrb_int effective_len; + size_t decimal_size = mpz_str_to_decimal(s, len, decimal, &effective_len); + + if (decimal_size > 0) { + /* Estimate binary limbs needed: roughly (effective_len * 10) / (32 * 3) limbs */ + size_t limb_alloc = (size_t)((effective_len * 4 + 9) / 10) + 2; + /* Use realloc to reuse x->p buffer, avoiding extra malloc+free */ + x->p = (mp_limb*)mrb_realloc(MPZ_MRB(ctx), x->p, limb_alloc * sizeof(mp_limb)); + memset(x->p, 0, limb_alloc * sizeof(mp_limb)); + + size_t limb_size = mpz_decimal_to_binary(decimal, decimal_size, x->p); + + x->sz = (mrb_int)limb_size; + x->sn = limb_size == 0 ? 0 : sn; + } + + if (use_heap) { + mrb_free(MPZ_MRB(ctx), decimal); + } + else { + pool_restore(ctx, pool_state); + } + } + } + else { + /* Use schoolbook algorithm for other bases */ + for (mrb_int i = 0; i < len; i++) { + if (s[i]=='_') continue; + if (s[i] >= '0' && s[i] <= '9') + k = (uint8_t)s[i] - (uint8_t)'0'; + else if (s[i] >= 'A' && s[i] <= 'Z') + k = (uint8_t)s[i] - (uint8_t)'A'+10; + else if (s[i] >= 'a' && s[i] <= 'z') + k = (uint8_t)s[i] - (uint8_t)'a'+10; + else { + retval = (-1); + break; + } + if (k >= base) { + retval = (-1); + break; + } + mpz_mul_int(ctx, x, base); + mpz_add_int(ctx, x, k); + } + x->sn = x->sz == 0 ? 0 : sn; + } + + return retval; +} + +/* power of base no bigger than DIG_BASE */ +/* power of 2 is handled differently */ +static const mp_limb base_limit[34*2] = { +#ifdef MRB_NO_MPZ64BIT + 59049, // 3^10 + 0, // 4^8 (skip) + 15625, // 5^6 + 46656, // 6^6 + 16807, // 7^5 + 0, // 8^5 (skip) + 59049, // 9^5 + 10000, // 10^4 + 14641, // 11^4 + 20736, // 12^4 + 28561, // 13^4 + 38416, // 14^4 + 50625, // 15^4 + 0, // 16^4 (skip) + 4913, // 17^3 + 5832, // 18^3 + 6859, // 19^3 + 8000, // 20^3 + 9261, // 21^3 + 10648, // 22^3 + 12167, // 23^3 + 13824, // 24^3 + 15625, // 25^3 + 17576, // 26^3 + 19683, // 27^3 + 21952, // 28^3 + 24389, // 29^3 + 27000, // 30^3 + 29791, // 31^3 + 0, // 32^3 (skip) + 35937, // 33^3 + 39304, // 34^3 + 42875, // 35^3 + 46656, // 36^3 +#else + 3486784401UL, // 3^20 + 0, // 4^16 (skip) + 1220703125UL, // 5^13 + 2176782336UL, // 6^12 + 1977326743UL, // 7^11 + 0, // 8^10 (skip) + 3486784401UL, // 9^10 + 1000000000UL, // 10^9 + 2357947691UL, // 11^9 + 429981696UL, // 12^8 + 815730721UL, // 13^8 + 1475789056UL, // 14^8 + 2562890625UL, // 15^8 + 0, // 16^8 (skip) + 410338673UL, // 17^7 + 612220032UL, // 18^7 + 893871739UL, // 19^7 + 1280000000UL, // 20^7 + 1801088541UL, // 21^7 + 2494357888UL, // 22^7 + 3404825447UL, // 23^7 + 191102976UL, // 24^6 + 244140625UL, // 25^6 + 308915776UL, // 26^6 + 387420489UL, // 27^6 + 481890304UL, // 28^6 + 594823321UL, // 29^6 + 729000000UL, // 30^6 + 887503681UL, // 31^6 + 0, // 32^6 (skip) + 1291467969UL, // 33^6 + 1544804416UL, // 34^6 + 1838265625UL, // 35^6 + 2176782336UL, // 36^6 +#endif +}; + +/* + * Divide-and-conquer decimal string conversion. + * For numbers with > DC_GET_STR_THRESHOLD digits, this is O(n log^2 n) + * instead of O(n^2) for the simple algorithm. + */ +#define DC_GET_STR_THRESHOLD 700 + +/* + * Scratch buffer for D&C get_str to avoid repeated allocations. + * Contains preallocated work areas that are resized as needed. + * + * The lo_stack provides a separate buffer for each recursion depth, + * preventing overwrites between parent and child calls. Each entry + * is allocated on first use with appropriate size for that depth. + */ +#define DC_MAX_DEPTH 32 /* Covers up to 2^32 * 1000 digits */ + +typedef struct { + mpz_t lo_stack[DC_MAX_DEPTH]; /* Depth-indexed lo buffers */ + mrb_bool lo_init[DC_MAX_DEPTH]; /* Initialized flags per depth */ + mpz_t q5; /* Quotient from division by 5^k */ + mpz_t r5; /* Remainder from division by 5^k */ + mpz_t q5_low; /* Low bits of q5 for mod 2^k */ + mpz_t tmp; /* Temporary for base case (in-place division) */ + mrb_bool initialized; +} dc_get_str_scratch_t; + +static void +dc_scratch_init(mpz_ctx_t *ctx, dc_get_str_scratch_t *scratch, size_t max_limbs) +{ + if (scratch->initialized) return; + + /* lo_stack entries are initialized on demand per depth, not here */ + for (size_t i = 0; i < DC_MAX_DEPTH; i++) { + scratch->lo_init[i] = FALSE; + } + + /* Preallocate other scratch buffers with estimated sizes */ + mpz_init_heap(ctx, &scratch->q5, max_limbs); + mpz_init_heap(ctx, &scratch->r5, max_limbs); + mpz_init_heap(ctx, &scratch->q5_low, max_limbs); + mpz_init_heap(ctx, &scratch->tmp, max_limbs); + scratch->initialized = TRUE; +} + +static void +dc_scratch_clear(mpz_ctx_t *ctx, dc_get_str_scratch_t *scratch) +{ + if (!scratch->initialized) return; + + /* Clear lo_stack entries that were initialized */ + for (size_t i = 0; i < DC_MAX_DEPTH; i++) { + if (scratch->lo_init[i]) { + mpz_clear(ctx, &scratch->lo_stack[i]); + } + } + + mpz_clear(ctx, &scratch->q5); + mpz_clear(ctx, &scratch->r5); + mpz_clear(ctx, &scratch->q5_low); + mpz_clear(ctx, &scratch->tmp); + scratch->initialized = FALSE; +} + +/* + * Recursive D&C conversion helper. + * Converts x to decimal string, writing exactly num_digits characters. + * The caller must ensure num_digits >= actual digits in x. + * Leading zeros are added if x has fewer digits than num_digits. + */ +/* Batch divisor for extracting multiple digits at once */ +#ifdef MRB_NO_MPZ64BIT +/* 16-bit limbs: 10^4 fits in 16 bits */ +#define BATCH_DIVISOR 10000UL +#define BATCH_DIGITS 4 +#else +/* 32-bit limbs: 10^9 fits in 32 bits */ +#define BATCH_DIVISOR 1000000000UL +#define BATCH_DIGITS 9 +#endif + +/* + * Divide limb array by BATCH_DIVISOR in place, returning remainder. + * Used for extracting BATCH_DIGITS decimal digits at a time. + * Compilers optimize constant division to multiplication+shift. + */ +static inline mp_limb +mpn_div_batch(mp_limb *p, size_t sz) +{ + mp_dbl_limb rem = 0; + for (size_t i = sz; i > 0; i--) { + mp_dbl_limb n = (rem << DIG_SIZE) | p[i-1]; + mp_dbl_limb q = n / BATCH_DIVISOR; + rem = n - q * BATCH_DIVISOR; + p[i-1] = (mp_limb)q; + } + return (mp_limb)rem; +} + +/* Lookup table for fast 2-digit conversion (Lemire's small table technique) */ +static const char digit_pairs[] = + "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; + +static void +mpz_get_str_dc_recur(mpz_ctx_t *ctx, char *s, mpz_t *x, size_t num_digits, + mpz_t *pow5, size_t num_powers, size_t depth, + dc_get_str_scratch_t *scratch) +{ + /* Base case: use simple conversion for small numbers */ + if (num_digits <= DC_GET_STR_THRESHOLD || num_powers == 0) { + /* Convert to string in reverse order using batch extraction */ + size_t pos = num_digits; + + /* Use scratch buffer for working copy */ + mpz_t *tmp = &scratch->tmp; + + /* Ensure tmp has enough capacity and copy x */ + mpz_realloc(ctx, tmp, x->sz); + mpz_set(ctx, tmp, x); + + while (pos > 0 && !zero_p(tmp)) { + /* Divide by BATCH_DIVISOR in place, get remainder as BATCH_DIGITS digits */ + mp_limb batch = mpn_div_batch(tmp->p, tmp->sz); + trim(tmp); + + /* Convert remainder to BATCH_DIGITS digits using table lookup */ +#if (BATCH_DIGITS % 2) == 1 + /* Extract last digit separately since BATCH_DIGITS is odd */ + if (pos > 0) { + s[--pos] = '0' + (char)(batch % 10); + batch /= 10; + } +#endif + /* Extract remaining digits as pairs using lookup table */ + for (int d = 0; d < BATCH_DIGITS / 2 && pos >= 2; d++) { + mp_limb pair = batch % 100; + batch /= 100; + s[--pos] = digit_pairs[pair * 2 + 1]; + s[--pos] = digit_pairs[pair * 2]; + } + /* Extract any remaining single digit when pos == 1 */ + if (pos > 0 && batch > 0) { + s[--pos] = '0' + (char)(batch % 10); + } + } + + /* Fill remaining positions with zeros */ + while (pos > 0) { + s[--pos] = '0'; + } + + return; + } + + /* Find appropriate power of 10 to split on */ + /* We want the largest power that gives roughly half the digits */ + size_t split_idx = 0; + size_t split_digits = 1; + for (size_t i = 0; i < num_powers; i++) { + size_t d = (size_t)1 << i; /* digits for this power */ + if (d * 2 <= num_digits) { + split_idx = i; + split_digits = d; + } + } + + /* + * Optimization: Use the factorization 10^k = 2^k * 5^k + * + * Instead of dividing by 10^k directly: + * 1. Divide by 5^k (smaller divisor = faster division) + * 2. Use bit operations to handle the 2^k part + * + * If x = hi * 10^k + lo, and we compute q5 = x / 5^k, r5 = x % 5^k: + * hi = q5 >> k (right shift by k bits) + * lo = (q5 & ((1<q5; + mpz_t *r5 = &scratch->r5; + mpz_t *q5_low = &scratch->q5_low; + + /* Step 1: Divide by 5^k (cheaper than dividing by 10^k) */ + mpz_mdivmod(ctx, q5, r5, x, &pow5[split_idx]); + r5->sn = (r5->sn < 0) ? -r5->sn : r5->sn; + + /* Step 2: Extract q5_low = q5 mod 2^k BEFORE modifying q5 */ + mpz_mod_2exp(ctx, q5_low, q5, (mrb_int)split_digits); + + /* Step 3: hi = q5 >> k - shift q5 in place to reuse as hi */ + /* This avoids allocating a separate hi buffer */ + mpz_t *hi = q5; /* Reuse q5 as hi after shifting */ + { + size_t limb_shift = split_digits / DIG_SIZE; + size_t bit_shift = split_digits % DIG_SIZE; + + if (limb_shift >= q5->sz) { + zero(q5); + } + else { + size_t new_sz = q5->sz - limb_shift; + /* Shift limbs down */ + memmove(q5->p, q5->p + limb_shift, new_sz * sizeof(mp_limb)); + q5->sz = new_sz; + /* Apply bit shift if needed */ + if (bit_shift > 0) { + mpn_rshift(q5->p, q5->p, q5->sz, (unsigned int)bit_shift); + } + trim(q5); + } + } + + /* Step 4: lo = q5_low * 5^k + r5 - use depth-indexed lo buffer */ + if (depth >= DC_MAX_DEPTH) { + /* Fallback: allocate fresh if depth exceeds limit (shouldn't happen) */ + mpz_t lo; + mpz_init(ctx, &lo); + mpz_mul(ctx, &lo, q5_low, &pow5[split_idx]); + mpz_add(ctx, &lo, &lo, r5); + lo.sn = (lo.sn < 0) ? -lo.sn : lo.sn; + + size_t hi_digits = num_digits - split_digits; + mpz_get_str_dc_recur(ctx, s, hi, hi_digits, pow5, split_idx, depth + 1, scratch); + mpz_get_str_dc_recur(ctx, s + hi_digits, &lo, split_digits, pow5, split_idx, depth + 1, scratch); + mpz_clear(ctx, &lo); + return; + } + + /* Initialize lo_stack[depth] on first use at this depth */ + mpz_t *lo = &scratch->lo_stack[depth]; + if (!scratch->lo_init[depth]) { + /* Allocate with appropriate size for this depth level */ + /* At depth d, lo is roughly x->sz / 2^(d+1) limbs, plus some margin */ + size_t est_limbs = (x->sz >> 1) + 2; + mpz_init_heap(ctx, lo, est_limbs); + scratch->lo_init[depth] = TRUE; + } + + mpz_mul(ctx, lo, q5_low, &pow5[split_idx]); + mpz_add(ctx, lo, lo, r5); + lo->sn = (lo->sn < 0) ? -lo->sn : lo->sn; + + /* Recursively convert high part (using q5 which now contains hi) */ + size_t hi_digits = num_digits - split_digits; + mpz_get_str_dc_recur(ctx, s, hi, hi_digits, pow5, split_idx, depth + 1, scratch); + + /* Recursively convert low part (exactly split_digits digits with padding) */ + mpz_get_str_dc_recur(ctx, s + hi_digits, lo, split_digits, pow5, split_idx, depth + 1, scratch); +} + +/* + * D&C decimal string conversion entry point. + * Returns pointer to start of string (after optional sign). + * + * Optimization: Uses 10^k = 2^k * 5^k factorization. + * Dividing by 5^k is ~30% faster than dividing by 10^k because + * 5^k has fewer bits (2.32k vs 3.32k). The 2^k part is handled + * with fast bit shifts. + */ +#define MAX_POWERS 64 + +struct mpz_get_str_dc_data { + mpz_ctx_t *ctx; + char *s; /* output buffer (after sign) */ + mpz_t *x; + size_t num_digits; + mpz_t pow5[MAX_POWERS]; + mpz_t tmp; + dc_get_str_scratch_t scratch; + size_t num_powers; /* cleanup target count */ +}; + +static mrb_value +mpz_get_str_dc_body(mrb_state *mrb, void *userdata) +{ + struct mpz_get_str_dc_data *d = (struct mpz_get_str_dc_data *)userdata; + mpz_ctx_t *ctx = d->ctx; + char *s = d->s; + mpz_t *x = d->x; + size_t num_digits = d->num_digits; + + /* 5^1 */ + mpz_init(ctx, &d->pow5[0]); + mpz_set_int(ctx, &d->pow5[0], 5); + d->num_powers = 1; + + /* Build powers by squaring: 5^(2^k) = (5^(2^(k-1)))^2 */ + while (d->num_powers < MAX_POWERS) { + size_t power_digits = (size_t)1 << d->num_powers; + if (power_digits > num_digits) break; + + mpz_init(ctx, &d->pow5[d->num_powers]); + mpz_sqr(ctx, &d->pow5[d->num_powers], &d->pow5[d->num_powers - 1]); + d->num_powers++; + } + + /* Make a copy of x for conversion (to preserve original) */ + mpz_init_set(ctx, &d->tmp, x); + d->tmp.sn = 1; /* Work with absolute value */ + + /* Initialize scratch buffers for base case optimization */ + dc_scratch_init(ctx, &d->scratch, x->sz); + + /* Do the recursive conversion (starting at depth 0) */ + mpz_get_str_dc_recur(ctx, s, &d->tmp, num_digits, d->pow5, d->num_powers, 0, &d->scratch); + + /* Null-terminate the string */ + s[num_digits] = '\0'; + + return mrb_nil_value(); +} + +static char* +mpz_get_str_dc(mpz_ctx_t *ctx, char *s, mpz_t *x) +{ + /* Handle sign */ + char *result = s; + if (x->sn < 0) { + *s++ = '-'; + } + + /* Calculate number of decimal digits needed */ + /* Use log10(2) ≈ 0.30103, so bits * 0.30103 + 1 gives upper bound */ + size_t bits = digits(x) * DIG_SIZE; + size_t num_digits = (size_t)(bits * 30103UL / 100000UL) + 2; + + struct mpz_get_str_dc_data d; + memset(&d, 0, sizeof(d)); + d.ctx = ctx; + d.s = s; + d.x = x; + d.num_digits = num_digits; + d.num_powers = 0; + + mrb_value exc; + MRB_ENSURE(MPZ_MRB(ctx), exc, mpz_get_str_dc_body, &d) { + /* Cleanup always runs (mpz_clear is safe on zero-initialized mpz_t) */ + for (size_t i = 0; i < d.num_powers; i++) { + mpz_clear(ctx, &d.pow5[i]); + } + mpz_clear(ctx, &d.tmp); + dc_scratch_clear(ctx, &d.scratch); + } + + /* Remove leading zeros (but keep at least one digit) */ + char *p = s; + while (*p == '0' && *(p+1) != '\0') p++; + if (p > s) { + memmove(s, p, strlen(p) + 1); + } + + return result; +} + +/* + * CPython-style base conversion for decimal strings. + * + * Converts from base 2^DIG_SIZE to base 10^k (DECIMAL_BASE_CONV). + * This is faster than repeated division because: + * - Each input limb is processed only once (MSB to LSB) + * - The inner loop multiplies/divides by constants (compiler optimizes) + * - Fewer total operations than dividing entire number repeatedly + * + * Returns number of decimal base digits written to decimal_out. + */ +static size_t +mpz_base_convert_decimal(const mp_limb *limbs, size_t size, mp_limb *decimal_out) +{ + size_t decimal_size = 0; + + /* Process input limbs from MSB to LSB */ + for (size_t i = size; i > 0; i--) { + mp_limb hi = limbs[i - 1]; + + /* Multiply existing decimal digits by 2^DIG_SIZE and add hi */ + for (size_t j = 0; j < decimal_size; j++) { + mp_dbl_limb z = ((mp_dbl_limb)decimal_out[j] << DIG_SIZE) | hi; + hi = (mp_limb)(z / DECIMAL_BASE_CONV); + decimal_out[j] = (mp_limb)(z - (mp_dbl_limb)hi * DECIMAL_BASE_CONV); + } + + /* Handle remaining carries into new decimal digits */ + while (hi) { + decimal_out[decimal_size++] = hi % DECIMAL_BASE_CONV; + hi /= DECIMAL_BASE_CONV; + } + } + + return decimal_size; +} + +/* + * Parse decimal string into decimal-base array. + * Parses in chunks of DECIMAL_DIGITS_CONV digits from right to left. + * Returns number of decimal base digits written to decimal_out. + * Also returns the effective string length (excluding underscores) via *effective_len. + */ +static size_t +mpz_str_to_decimal(const char *s, mrb_int len, mp_limb *decimal_out, mrb_int *effective_len) +{ + /* First pass: count effective digits (excluding underscores) */ + mrb_int eff_len = 0; + for (mrb_int i = 0; i < len; i++) { + if (s[i] != '_') eff_len++; + } + *effective_len = eff_len; + + if (eff_len == 0) return 0; + + /* Parse from right to left in chunks of DECIMAL_DIGITS_CONV digits */ + size_t decimal_size = 0; + mrb_int pos = len - 1; + mrb_int digits_in_chunk = 0; + mp_limb chunk = 0; + mp_limb multiplier = 1; + + while (pos >= 0) { + char c = s[pos--]; + if (c == '_') continue; + + /* Accumulate digit into chunk */ + mp_limb digit = (mp_limb)(c - '0'); + chunk += digit * multiplier; + multiplier *= 10; + digits_in_chunk++; + + if (digits_in_chunk == DECIMAL_DIGITS_CONV) { + decimal_out[decimal_size++] = chunk; + chunk = 0; + multiplier = 1; + digits_in_chunk = 0; + } + } + + /* Handle remaining partial chunk (MSB group) */ + if (digits_in_chunk > 0) { + decimal_out[decimal_size++] = chunk; + } + + return decimal_size; +} + +/* + * Convert decimal-base representation to binary limbs. + * decimal[]: array of values < DECIMAL_BASE_CONV, LSB first + * This is the reverse of mpz_base_convert_decimal. + * Returns number of binary limbs written to limbs_out. + */ +static size_t +mpz_decimal_to_binary(const mp_limb *decimal, size_t decimal_size, mp_limb *limbs_out) +{ + if (decimal_size == 0) return 0; + + size_t limb_size = 0; + + /* Process decimal digits from MSB to LSB */ + for (size_t i = decimal_size; i > 0; i--) { + mp_limb d = decimal[i - 1]; + + /* Multiply existing binary limbs by DECIMAL_BASE_CONV and add d */ + mp_dbl_limb carry = d; + for (size_t j = 0; j < limb_size; j++) { + mp_dbl_limb z = (mp_dbl_limb)limbs_out[j] * DECIMAL_BASE_CONV + carry; + limbs_out[j] = (mp_limb)z; + carry = z >> DIG_SIZE; + } + + /* Handle remaining carry into new limbs */ + while (carry) { + limbs_out[limb_size++] = (mp_limb)carry; + carry >>= DIG_SIZE; + } + } + + return limb_size; +} + +/* + * Convert decimal base representation to string. + * decimal[]: array of values < DECIMAL_BASE_CONV, LSB first + * Returns pointer past last character written. + */ +static char* +mpz_decimal_to_str(const mp_limb *decimal, size_t decimal_size, char *str) +{ + if (decimal_size == 0) { + *str++ = '0'; + return str; + } + + char *s = str; + + /* Output all but MSB group with exactly DECIMAL_DIGITS_CONV digits each */ + /* Use digit_pairs for 2 digits at a time (Lemire's small table technique) */ + for (size_t i = 0; i < decimal_size - 1; i++) { + mp_limb d = decimal[i]; + for (int j = 0; j < DECIMAL_DIGITS_CONV / 2; j++) { + mp_limb pair = d % 100; + d /= 100; + *s++ = digit_pairs[pair * 2 + 1]; + *s++ = digit_pairs[pair * 2]; + } +#if (DECIMAL_DIGITS_CONV & 1) + /* Handle odd digit (9th digit for 32-bit limbs) */ + *s++ = '0' + (char)d; +#endif + } + + /* Output MSB group without leading zeros, 2 digits at a time */ + mp_limb d = decimal[decimal_size - 1]; + do { + if (d >= 10) { + mp_limb pair = d % 100; + d /= 100; + *s++ = digit_pairs[pair * 2 + 1]; + *s++ = digit_pairs[pair * 2]; + } + else { + *s++ = '0' + (char)d; + break; + } + } while (d > 0); + + return s; +} + +static char* +mpz_get_str(mpz_ctx_t *ctx, char *s, mrb_int sz, mrb_int base, mpz_t *x) +{ + mrb_state *mrb = MPZ_MRB(ctx); + + mrb_assert(2 <= base && base <= 36); + if (zero_p(x)) { + *s='0'; + *(s+1)='\0'; + return s; + } + + char *ps = s; + char *se = s+sz; + int xlen = (int)digits(x); + + if ((base & (base - 1)) == 0) { // base is a power of 2 + int shift = 0; + while (((uint64_t)1 << shift) < (uint64_t)base) shift++; + mp_limb mask = (mp_limb)base - 1; + mp_dbl_limb value = 0; + int bits = 0; + + /* Process all limbs */ + for (int i = 0; i < xlen; i++) { + value |= (mp_dbl_limb)x->p[i] << bits; + bits += DIG_SIZE; + while (bits >= shift) { + mp_limb digit = value & mask; + value >>= shift; + bits -= shift; + + if (digit < 10) *s++ = '0' + digit; + else *s++ = 'a' + digit - 10; + } + } + + /* Handle any remaining bits */ + while (bits > 0) { + mp_limb digit = value & mask; + value >>= shift; + bits -= shift; + + if (digit < 10) *s++ = '0' + digit; + else *s++ = 'a' + digit - 10; + } + } + else { + /* Check for overflow in size calculation */ + if ((size_t)xlen > SIZE_MAX / sizeof(mp_limb)) { + mrb_raise(mrb, E_RUNTIME_ERROR, "bigint size too large for string conversion"); + } + + /* Use D&C algorithm for large base-10 numbers */ + size_t est_digits = (size_t)(xlen * DIG_SIZE * 30103UL / 100000UL) + 2; + if (base == 10 && est_digits > DC_GET_STR_THRESHOLD) { + return mpz_get_str_dc(ctx, s, x); + } + + if (base == 10) { + /* Use CPython-style base conversion for decimal (faster) */ + size_t decimal_alloc = (size_t)xlen * 2 + 2; + mp_limb *decimal = (mp_limb*)mrb_malloc(mrb, decimal_alloc * sizeof(mp_limb)); + memset(decimal, 0, decimal_alloc * sizeof(mp_limb)); + + size_t dsz = mpz_base_convert_decimal(x->p, (size_t)xlen, decimal); + s = mpz_decimal_to_str(decimal, dsz, s); + + mrb_free(mrb, decimal); + } + else { + /* Use division-style for other bases */ + mp_limb *t = (mp_limb*)mrb_malloc(mrb, xlen * sizeof(mp_limb)); + + mp_limb *tend = t + xlen; + memcpy(t, x->p, xlen * sizeof(mp_limb)); + mp_limb b2 = base_limit[base-3]; + + for (;;) { + mp_limb *d = tend; + mp_dbl_limb a = 0; + while (--d >= t) { + mp_limb d0 = *d; + a = (a<=(mp_limb)base; b/=(mp_limb)base) { + char a0 = (char)(a % base); + if (a0 < 10) a0 += '0'; + else a0 += 'a' - 10; + if (s == se) break; + *s++ = a0; + a /= base; + } + + // check if number is zero + for (d = t; d < tend; d++) { + if (*d != 0) break; + } + if (d == tend) break; + } + mrb_free(mrb, t); + } + } + + while (pssn < 0) { + *s++ = '-'; + } + + /* reverse string */ + for (char *u = ps,*v=s-1; u < v; u++,v--) { + char temp = *u; + *u = *v; + *v = temp; + } + *s = '\0'; /* null termination */ + return ps; +} + +static int +mpz_get_int(mpz_t *y, mrb_int *v) +{ + if (zero_p(y)) { + *v = 0; + return TRUE; + } + +#ifdef MRB_NO_MPZ64BIT + /* When using 16-bit limbs, we need to handle larger accumulation */ + mrb_uint i = 0; + mp_limb *d = y->p + y->sz; + + while (d-- > y->p) { + /* Check for overflow before shifting */ + if (i > (mrb_uint)(MRB_INT_MAX >> DIG_SIZE)) { + return FALSE; + } + i = (i << DIG_SIZE) | *d; + } + + if (i > (mrb_uint)MRB_INT_MAX) { + return FALSE; + } +#else + /* Original logic for 32-bit limbs */ + mp_dbl_limb i = 0; + mp_limb *d = y->p + y->sz; + + while (d-- > y->p) { + if (HIGH(i) != 0) { + /* will overflow */ + return FALSE; + } + i = (i << DIG_SIZE) | *d; + } + if (i > MRB_INT_MAX) { + /* overflow */ + return FALSE; + } +#endif + + if (y->sn < 0) { + *v = -(mrb_int)i; + } + else { + *v = (mrb_int)i; + } + return TRUE; +} + +/* Maximum bits for bigint operations (128MB of limb data) */ +#define MRB_BIGINT_BIT_LIMIT (128 * 1024 * 1024 * (size_t)8) + +static void +mpz_mul_2exp(mpz_ctx_t *ctx, mpz_t *z, mpz_t *x, mrb_int e) +{ + if (e==0) { + mpz_set(ctx, z, x); + trim(z); + } + else { + short sn = x->sn; + size_t digs = e / DIG_SIZE; + size_t bs = e % DIG_SIZE; + mpz_t y; + + /* Check for result size overflow before allocation */ + size_t result_bits = (size_t)x->sz * DIG_SIZE + (size_t)e; + if ((size_t)e > MRB_BIGINT_BIT_LIMIT || result_bits > MRB_BIGINT_BIT_LIMIT) { + mrb_state *mrb = MPZ_MRB(ctx); + mrb_raise(mrb, E_RANGE_ERROR, "shift width too large"); + } + mpz_init_heap(ctx, &y, x->sz+digs); + for (size_t i=0;isz;i++) + y.p[i+digs] = x->p[i]; + if (bs) { + ulshift(ctx, z, &y, bs); + mpz_clear(ctx, &y); + } + else { + mpz_move(ctx, z, &y); + trim(z); + } + if (uzero_p(z)) + z->sn = 0; + else + z->sn = sn; + } +} + +static void +mpz_div_2exp(mpz_ctx_t *ctx, mpz_t *z, mpz_t *x, mrb_int e) +{ + short sn = x->sn; + if (e == 0) { + if (z != x) { + mpz_clear(ctx, z); + mpz_init_heap(ctx, z, x->sz); + mpz_set(ctx, z, x); + trim(z); + } + /* else: z == x, nothing to do */ + } + else { + size_t digs = e / DIG_SIZE; + size_t bs = e % DIG_SIZE; + + /* If shifting by more limbs than we have, result is zero */ + if (digs >= x->sz) { + zero(z); + return; + } + + size_t new_size = x->sz - digs; + + /* In-place optimization: when z == x, no allocation needed */ + if (z == x) { + /* Shift by whole limbs: memmove in place */ + if (digs > 0) { + memmove(z->p, z->p + digs, new_size * sizeof(mp_limb)); + } + z->sz = new_size; + /* Shift by remaining bits: mpn_rshift supports in-place */ + if (bs) { + mpn_rshift(z->p, z->p, new_size, (unsigned int)bs); + } + trim(z); + if (uzero_p(z)) + z->sn = 0; + else + z->sn = sn; + return; + } + + /* General case: z != x, use temporary */ + mpz_t y; + mpz_init_temp(ctx, &y, new_size); + mpz_realloc(ctx, &y, new_size); + for (size_t i = 0; i < new_size; i++) + y.p[i] = x->p[i + digs]; + if (bs) { + mpz_clear(ctx, z); + mpz_init_heap(ctx, z, new_size); + urshift(ctx, z, &y, bs); + mpz_clear(ctx, &y); + } + else { + mpz_move(ctx, z, &y); + trim(z); + } + if (uzero_p(z)) + z->sn = 0; + else { + z->sn = sn; + } + } +} + +static void +mpz_neg(mpz_ctx_t *ctx, mpz_t *x, mpz_t *y) +{ + /* In-place optimization: just flip the sign */ + if (x == y) { + x->sn = -(y->sn); return; } - sn3 = sn1*sn2; - if (sn3 > 0) - r->sn = sn1; - else if (sn1 < 0 && sn2 > 0) { - r->sn = 1; - mpz_sub(mrb, r, y, r); + mpz_init_heap(ctx, x, y->sz); + mpz_set(ctx, x, y); + trim(x); + x->sn = -(y->sn); +} + +/* Fast modular reduction by power of 2: z = x mod 2^e */ +static void +mpz_mod_2exp(mpz_ctx_t *ctx, mpz_t *z, mpz_t *x, mrb_int e) +{ + if (e <= 0) { + mpz_clear(ctx, z); + mpz_init(ctx, z); + zero(z); + return; + } + + size_t eint = e / DIG_SIZE; + size_t bs = e % DIG_SIZE; + size_t sz = x->sz; + + if (eint >= sz) { + /* x < 2^e, so x mod 2^e = x */ + if (z != x) { + mpz_clear(ctx, z); + mpz_init_heap(ctx, z, x->sz); + mpz_set(ctx, z, x); + trim(z); + } + return; + } + + /* Need to mask off high bits */ + size_t result_sz = eint + (bs > 0 ? 1 : 0); + + /* Handle the case where z == x (in-place operation) */ + if (z == x) { + /* In-place modification */ + z->sz = result_sz; + + /* Mask partial limb if needed */ + if (bs > 0) { + mp_limb mask = (1UL << bs) - 1; + z->p[eint] &= mask; + } } else { - r->sn = 1; - mpz_add(mrb, r, y, r); + /* z != x, need to copy */ + mpz_clear(ctx, z); + mpz_init_heap(ctx, z, result_sz); + z->sn = x->sn; + z->sz = result_sz; + + /* Copy full limbs */ + for (size_t i = 0; i < eint; i++) { + z->p[i] = x->p[i]; + } + + /* Mask partial limb if needed */ + if (bs > 0) { + mp_limb mask = (1UL << bs) - 1; + z->p[eint] = x->p[eint] & mask; + } } + + trim(z); } +#define make_2comp(v,c) do { v=~(v)+(c); c=((v)==0 && (c));} while (0) + static void -mpz_mdivmod(mrb_state *mrb, mpz_t *q, mpz_t *r, mpz_t *x, mpz_t *y) +mpz_and(mpz_ctx_t *ctx, mpz_t *z, mpz_t *x, mpz_t *y) { - short sn1 = x->sn, sn2 = y->sn, qsign; + if (zero_p(x) || zero_p(y)) { + mpz_init(ctx, z); + zero(z); + return; + } + mrb_assert(x->sz > 0 || y->sz > 0); - if (sn1 == 0) { - zero(q); - zero(r); + /* Fast path: both positive - just AND the limbs */ + if (x->sn > 0 && y->sn > 0) { + size_t min_sz = (x->sz < y->sz) ? x->sz : y->sz; + mpz_init_heap(ctx, z, min_sz); + for (size_t i = 0; i < min_sz; i++) { + z->p[i] = x->p[i] & y->p[i]; + } + z->sn = 1; + trim(z); return; } - udiv(mrb, q, r, x, y); - qsign = q->sn = sn1*sn2; - if (uzero(r)) { - /* q != 0, since q=r=0 would mean x=0, which was tested above */ - r->sn = 0; + + /* Slow path: at least one negative operand */ + size_t max_sz = (x->sz > y->sz) ? x->sz : y->sz; + mpz_init_heap(ctx, z, max_sz); + z->sn = (x->sn < 0 && y->sn < 0) ? -1 : 1; + + char c1 = 1, c2 = 1, c3 = 1; + for (size_t i = 0; i < max_sz; i++) { + mp_limb xv = (i < x->sz) ? x->p[i] : 0; + mp_limb yv = (i < y->sz) ? y->p[i] : 0; + + if (x->sn < 0) make_2comp(xv, c1); + if (y->sn < 0) make_2comp(yv, c2); + mp_limb zv = xv & yv; + if (z->sn < 0) make_2comp(zv, c3); + z->p[i] = zv; + } + trim(z); +} + +static void +mpz_or(mpz_ctx_t *ctx, mpz_t *z, mpz_t *x, mpz_t *y) +{ + if (zero_p(x)) { + mpz_init_heap(ctx, z, y->sz); + mpz_set(ctx, z, y); + trim(z); return; } - if (q->sn > 0) - r->sn = sn1; - else if (sn1 < 0 && sn2 > 0) { - r->sn = 1; - mpz_sub(mrb, r, y, r); + if (zero_p(y)) { + mpz_init_heap(ctx, z, x->sz); + mpz_set(ctx, z, x); + trim(z); + return; + } + mrb_assert(x->sz > 0 || y->sz > 0); + + size_t max_sz = (x->sz > y->sz) ? x->sz : y->sz; + + /* Fast path: both positive - just OR the limbs */ + if (x->sn > 0 && y->sn > 0) { + mpz_init_heap(ctx, z, max_sz); + size_t min_sz = (x->sz < y->sz) ? x->sz : y->sz; + for (size_t i = 0; i < min_sz; i++) { + z->p[i] = x->p[i] | y->p[i]; + } + /* Copy remaining limbs from the longer operand */ + if (x->sz > y->sz) { + for (size_t i = min_sz; i < max_sz; i++) z->p[i] = x->p[i]; + } + else { + for (size_t i = min_sz; i < max_sz; i++) z->p[i] = y->p[i]; + } + z->sn = 1; + trim(z); + return; + } + + /* Slow path: at least one negative operand */ + mpz_init_heap(ctx, z, max_sz); + z->sn = (x->sn == y->sn) ? x->sn : -1; + + char c1 = 1, c2 = 1, c3 = 1; + for (size_t i = 0; i < max_sz; i++) { + mp_limb xv = (i < x->sz) ? x->p[i] : 0; + mp_limb yv = (i < y->sz) ? y->p[i] : 0; + + if (x->sn < 0) make_2comp(xv, c1); + if (y->sn < 0) make_2comp(yv, c2); + mp_limb zv = xv | yv; + if (z->sn < 0) make_2comp(zv, c3); + z->p[i] = zv; + } + trim(z); +} + +static void +mpz_xor(mpz_ctx_t *ctx, mpz_t *z, mpz_t *x, mpz_t *y) +{ + if (zero_p(x)) { + mpz_init_heap(ctx, z, y->sz); + mpz_set(ctx, z, y); + trim(z); + return; + } + if (zero_p(y)) { + mpz_init_heap(ctx, z, x->sz); + mpz_set(ctx, z, x); + trim(z); + return; + } + mrb_assert(x->sz > 0 || y->sz > 0); + + size_t max_sz = (x->sz > y->sz) ? x->sz : y->sz; + + /* Fast path: both positive - just XOR the limbs */ + if (x->sn > 0 && y->sn > 0) { + mpz_init_heap(ctx, z, max_sz); + size_t min_sz = (x->sz < y->sz) ? x->sz : y->sz; + for (size_t i = 0; i < min_sz; i++) { + z->p[i] = x->p[i] ^ y->p[i]; + } + /* Copy remaining limbs from the longer operand (XOR with 0) */ + if (x->sz > y->sz) { + for (size_t i = min_sz; i < max_sz; i++) z->p[i] = x->p[i]; + } + else { + for (size_t i = min_sz; i < max_sz; i++) z->p[i] = y->p[i]; + } + z->sn = 1; + trim(z); + return; + } + + /* Slow path: at least one negative operand */ + mpz_init_heap(ctx, z, max_sz); + z->sn = (x->sn == y->sn) ? 1 : -1; + + char c1 = 1, c2 = 1, c3 = 1; + for (size_t i = 0; i < max_sz; i++) { + mp_limb xv = (i < x->sz) ? x->p[i] : 0; + mp_limb yv = (i < y->sz) ? y->p[i] : 0; + + if (x->sn < 0) make_2comp(xv, c1); + if (y->sn < 0) make_2comp(yv, c2); + mp_limb zv = xv ^ yv; + if (z->sn < 0) make_2comp(zv, c3); + z->p[i] = zv; + } + trim(z); +} + +static void +mpz_pow(mpz_ctx_t *ctx, mpz_t *zz, mpz_t *x, mrb_int e) +{ + if (e == 0) { + mpz_init_set_int(ctx, zz, 1L); + return; + } + + mrb_uint mask = 1ULL << (sizeof(mrb_int) * 8 - 1); + while (mask != 0 && !(mask & e)) { + mask >>= 1; + } + + /* Set initial value to x for exponentiation */ + mpz_init_set(ctx, zz, x); + + if (mask == 0) { /* e is 0 or 1 */ + if (e == 0) mpz_set_int(ctx, zz, 1L); + return; + } + + mask >>= 1; + + /* Pre-allocate a single temporary variable */ + mpz_t temp; + mpz_init(ctx, &temp); + + for (; mask != 0; mask >>= 1) { + /* squaring: temp = zz * zz */ + mpz_mul(ctx, &temp, zz, zz); + + if (e & mask) { + /* multiplication: zz = temp * x */ + mpz_mul(ctx, zz, &temp, x); + } + else { + /* move result: zz = temp */ + mpz_move(ctx, zz, &temp); + } + } + mpz_clear(ctx, &temp); +} + +/* Forward declaration for Montgomery modular exponentiation */ +static void mpz_powm_montgomery(mpz_ctx_t *ctx, mpz_t *result, + const mpz_t *base, const mpz_t *exp, const mpz_t *n); + +static void +mpz_powm(mpz_ctx_t *ctx, mpz_t *zz, mpz_t *x, mpz_t *ex, mpz_t *n) +{ + /* Handle special cases */ + if (zero_p(ex) || uzero_p(ex)) { + mpz_set_int(ctx, zz, 1); + return; + } + + if (ex->sn < 0) { + return; + } + + /* Check modulus size before allocating large temporaries. */ + { + size_t mod_bits = (size_t)n->sz * DIG_SIZE; + if (mod_bits > MRB_BIGINT_BIT_LIMIT / 2) { + mrb_state *mrb = MPZ_MRB(ctx); + mrb_raise(mrb, E_RANGE_ERROR, "modulus too large"); + } + } + + /* + * Use Montgomery reduction for odd moduli >= 4 limbs. + * Montgomery is faster because it replaces division with multiplication. + * For smaller moduli, Barrett has less setup overhead. + */ + if (n->sz >= 4 && (n->p[0] & 1) == 1) { + mpz_powm_montgomery(ctx, zz, x, ex, n); + return; + } + + /* + * Fall back to Barrett reduction for even moduli or small moduli. + * Barrett works for any modulus and is faster than division. + */ + size_t pool_state = pool_save(ctx); + mpz_t t, b; + mpz_init_set_int(ctx, &t, 1); + mpz_init_set(ctx, &b, x); + + /* + * Use Barrett reduction for moduli >= 4 limbs. + * For smaller moduli (2-3 limbs), simple division is faster because + * Barrett's 2 multiplications + overhead exceed 1 division cost. + */ + mpz_t mu, temp; + int use_barrett = (n->sz >= 4); + mpz_init_temp(ctx, &temp, n->sz * 2); /* For intermediate calculations */ + if (use_barrett) { + mpz_init_temp(ctx, &mu, n->sz + 1); /* Barrett parameter */ + mpz_barrett_mu(ctx, &mu, n); + } + + size_t len = digits(ex); + for (size_t i = 0; i < len; i++) { + mp_limb e = ex->p[i]; + for (size_t j = 0; j < sizeof(mp_limb) * 8; j++) { + if ((e & 1) == 1) { + mpz_mul(ctx, &temp, &t, &b); + if (use_barrett) { + mpz_barrett_reduce(ctx, &t, &temp, n, &mu); + } + else { + mpz_mod(ctx, &t, &temp, n); + } + } + e >>= 1; + mpz_mul(ctx, &temp, &b, &b); + if (use_barrett) { + mpz_barrett_reduce(ctx, &b, &temp, n, &mu); + } + else { + mpz_mod(ctx, &b, &temp, n); + } + } } - else { - r->sn = 1; - mpz_add(mrb, r, y, r); + + mpz_move(ctx, zz, &t); + mpz_clear(ctx, &t); + mpz_clear(ctx, &b); + mpz_clear(ctx, &temp); + if (use_barrett) { + mpz_clear(ctx, &mu); } - if (uzero(q)) - q->sn = 0; - /* now if r != 0 and q < 0 we need to round q towards -inf */ - if (!uzero(r) && qsign < 0) - mpz_sub_int(mrb, q, q, 1); + pool_restore(ctx, pool_state); } static void -mpz_mod(mrb_state *mrb, mpz_t *r, mpz_t *x, mpz_t *y) +mpz_powm_i(mpz_ctx_t *ctx, mpz_t *zz, mpz_t *x, mrb_int ex, mpz_t *n) { - mpz_t q; - short sn = x->sn; - mpz_init(mrb, &q); - if (x->sn == 0) { - zero(r); + if (ex == 0) { + mpz_set_int(ctx, zz, 1); return; } - udiv(mrb, &q, r, x, y); - r->sn = sn; - if (uzero(r)) - r->sn = 0; - mpz_clear(mrb, &q); -} -static mrb_int -mpz_cmp(mrb_state *mrb, mpz_t *x, mpz_t *y) -{ - int abscmp; - if (x->sn < 0 && y->sn > 0) - return (-1); - if (x->sn > 0 && y->sn < 0) - return 1; - abscmp=ucmp(x, y); - if (x->sn >=0 && y->sn >=0) - return abscmp; - return (-abscmp); // if (x->sn <=0 && y->sn <=0) -} + if (ex < 0) { + return; + } -/* 2<=base<=36 - this overestimates the optimal value, which is OK */ -static size_t -mpz_sizeinbase(mpz_t *x, mrb_int base) -{ - size_t i, j; + /* Check modulus size before allocating large temporaries. + * Both Barrett and Montgomery need 2^(2k) internally, + * which would exceed MRB_BIGINT_BIT_LIMIT for large moduli. */ + { + size_t mod_bits = (size_t)n->sz * DIG_SIZE; + if (mod_bits > MRB_BIGINT_BIT_LIMIT / 2) { + mrb_state *mrb = MPZ_MRB(ctx); + mrb_raise(mrb, E_RANGE_ERROR, "modulus too large"); + } + } - size_t bits = digits(x) * DIG_SIZE; - mrb_assert(2 <= base && base <= 36); + /* + * Use Montgomery reduction for odd moduli (common in cryptography). + * Convert integer exponent to mpz_t and use the main Montgomery implementation. + */ + if (n->sz >= 2 && (n->p[0] & 1) == 1) { + mpz_t exp_mpz; + mpz_init_set_int(ctx, &exp_mpz, ex); + mpz_powm_montgomery(ctx, zz, x, &exp_mpz, n); + mpz_clear(ctx, &exp_mpz); + return; + } - if (x->sz == 0) return 0; - for (j=0,i=1; i<=(size_t)base; i*=2,j++) - ; - return bits/(j-1)+1; + /* + * Fall back to Barrett reduction for even moduli or small moduli. + */ + size_t pool_state = pool_save(ctx); + mpz_t t, b; + mpz_init_set_int(ctx, &t, 1); + mpz_init_set(ctx, &b, x); + + /* + * Use Barrett reduction for moduli >= 4 limbs. + * For smaller moduli (2-3 limbs), simple division is faster. + */ + mpz_t mu, temp; + int use_barrett = (n->sz >= 4); + + mpz_init_temp(ctx, &temp, n->sz * 2); /* For intermediate calculations */ + if (use_barrett) { + mpz_init_temp(ctx, &mu, n->sz + 1); /* Barrett parameter */ + mpz_barrett_mu(ctx, &mu, n); + } + + while (ex > 0) { + if ((ex & 1) == 1) { + mpz_mul(ctx, &temp, &t, &b); + if (use_barrett) { + mpz_barrett_reduce(ctx, &t, &temp, n, &mu); + } + else { + mpz_mod(ctx, &t, &temp, n); + } + } + ex >>= 1; + if (ex > 0) { /* Skip final squaring when ex becomes 0 */ + mpz_mul(ctx, &temp, &b, &b); + if (use_barrett) { + mpz_barrett_reduce(ctx, &b, &temp, n, &mu); + } + else { + mpz_mod(ctx, &b, &temp, n); + } + } + } + + mpz_move(ctx, zz, &t); + mpz_clear(ctx, &t); + mpz_clear(ctx, &b); + mpz_clear(ctx, &temp); + if (use_barrett) { + mpz_clear(ctx, &mu); + } + pool_restore(ctx, pool_state); } +/* Helper functions for pool-based GCD operations */ static int -mpz_init_set_str(mrb_state *mrb, mpz_t *x, const char *s, mrb_int len, mrb_int base) -{ - int retval = 0; - mpz_t t, m, bb; - short sn; - uint8_t k; - mpz_init(mrb, x); - mpz_init_set_int(mrb, &m, 1); - mpz_init(mrb, &t); - zero(x); - if (*s == '-') { - sn = -1; s++; +mpz_abs_copy(mpz_ctx_t *ctx, mpz_t *result, mpz_t *operand) { + if (!operand || operand->sz == 0) { + result->sz = 0; + result->sn = 0; + return 1; } - else - sn = 1; - mpz_init_set_int(mrb, &bb, base); - for (mrb_int i = len-1; i>=0; i--) { - if (s[i]=='_') continue; - if (s[i] >= '0' && s[i] <= '9') - k = (uint8_t)s[i] - (uint8_t)'0'; - else if (s[i] >= 'A' && s[i] <= 'Z') - k = (uint8_t)s[i] - (uint8_t)'A'+10; - else if (s[i] >= 'a' && s[i] <= 'z') - k = (uint8_t)s[i] - (uint8_t)'a'+10; - else { - retval = (-1); - break; - } - if (k >= base) { - retval = (-1); - break; - } - mpz_mul_int(mrb, &t, &m, (mrb_int)k); - mpz_add(mrb, x, x, &t); - mpz_mul(mrb, &m, &m, &bb); + + /* Copy limbs */ + for (size_t i = 0; i < operand->sz && i < result->sz; i++) { + result->p[i] = operand->p[i]; } - x->sn = sn; - mpz_clear(mrb, &m); - mpz_clear(mrb, &bb); - mpz_clear(mrb, &t); - return retval; + result->sz = (operand->sz < result->sz) ? operand->sz : result->sz; + result->sn = (operand->sn < 0) ? -operand->sn : operand->sn; /* Always positive */ + + return 1; } -static char* -mpz_get_str(mrb_state *mrb, char *s, mrb_int sz, mrb_int base, mpz_t *x) +static void +mpz_abs(mpz_ctx_t *ctx, mpz_t *x, mpz_t *y) { - mrb_assert(2 <= base && base <= 36); - if (uzero(x)) { - *s='0'; - *(s+1)='\0'; - return s; + /* In-place optimization: just make sign positive */ + if (x == y) { + if (x->sn < 0) x->sn = -x->sn; + return; } + mpz_init_heap(ctx, x, y->sz); + mpz_abs_copy(ctx, x, y); +} - char *ps = s; - char *se = s+sz; - int xlen = digits(x); - mp_limb *t = (mp_limb*)mrb_malloc(mrb, xlen*sizeof(mp_limb)); - mp_limb *tend = t + xlen; - memcpy(t, x->p, xlen*sizeof(mp_limb)); - mp_limb b2 = base; - const int blim = (sizeof(mp_limb)<4)?(base<=10?4:3):(base<=10?9:5); - for (int i=1; i>= 1; + b >>= 1; + shift++; } - for (;;) { - mp_limb *d = tend; - mp_dbl_limb a = 0; - while (--d >= t) { - mp_limb d0 = *d; - a = (a<>= 1; + } - // convert to character - for (int i=0; i>= 1; } - // check if number is zero - for (d = t; d < tend; d++) { - if (*d != 0) break; + /* Now both a and b are odd. Ensure a >= b */ + if (a < b) { + mp_limb temp = a; + a = b; + b = temp; } - if (d == tend) goto done; - } - done: - while (pssn < 0) { - *s++ = '-'; + /* Replace b with (b - a) */ + b = b - a; + + } while (b != 0); + + /* Restore common factors of 2 */ + return a << shift; +} + +/* Count trailing zero bits in a multi-precision integer */ +static size_t +mpz_trailing_zeros(mpz_t *x) +{ + if (zero_p(x) || x->sz == 0) return 0; + + size_t zeros = 0; + + /* Count complete zero limbs */ + size_t i = 0; + while (i < x->sz && x->p[i] == 0) { + zeros += DIG_SIZE; + i++; } - /* reverse string */ - for (char *u = ps,*v=s-1; u < v; ++u,--v) { - char temp = *u; - *u = *v; - *v = temp; + /* Count trailing zeros in first non-zero limb */ + if (i < x->sz) { + mp_limb limb = x->p[i]; +#if (defined(__GNUC__) || __has_builtin(__builtin_ctzll)) + if (sizeof(mp_limb) == sizeof(unsigned long long)) { + zeros += __builtin_ctzll(limb); + } + else if (sizeof(mp_limb) == sizeof(unsigned long)) { + zeros += __builtin_ctzl(limb); + } + else { + zeros += __builtin_ctz(limb); + } +#else + /* Fallback bit counting */ + while ((limb & 1) == 0) { + limb >>= 1; + zeros++; + } +#endif } - *s = '\0'; /* null termination */ - return ps; + + return zeros; } +/* Check if a number is a power of 2 */ static int -mpz_get_int(mpz_t *y, mrb_int *v) +mpz_power_of_2_p(mpz_t *x) { - if (uzero(y)) { - *v = 0; - return TRUE; - } + if (zero_p(x) || x->sz == 0) return 0; - mp_dbl_limb i = 0; - mp_limb *d = y->p + y->sz; + /* Count non-zero limbs */ + size_t non_zero_limbs = 0; + size_t non_zero_index = 0; - while (d-- > y->p) { - if (HIGH(i) != 0) { - /* will overflow */ - return FALSE; + for (size_t i = 0; i < x->sz; i++) { + if (x->p[i] != 0) { + non_zero_limbs++; + non_zero_index = i; + if (non_zero_limbs > 1) return 0; /* More than one non-zero limb */ } - i = (i << DIG_SIZE) | *d; - } - if (i > MRB_INT_MAX) { - /* overflow */ - return FALSE; } - if (y->sn < 0) { - *v = -(mrb_int)i; - } - else { - *v = (mrb_int)i; - } - return TRUE; + + if (non_zero_limbs == 0) return 0; /* All zero */ + if (non_zero_limbs > 1) return 0; /* Multiple non-zero limbs */ + + /* Check if the single non-zero limb is a power of 2 */ + mp_limb limb = x->p[non_zero_index]; + return (limb != 0) && ((limb & (limb - 1)) == 0); } +/* Binary GCD (Stein's algorithm): factor out common powers of 2, + then iterate on odd operands with subtract + trailing-zero shift. + For heavily unbalanced pairs (one operand has at least two more + limbs than the other) a single Euclidean step via mpz_mod replaces + many Stein subtracts. */ static void -mpz_mul_2exp(mrb_state *mrb, mpz_t *z, mpz_t *x, mrb_int e) +mpz_gcd(mpz_ctx_t *ctx, mpz_t *gg, mpz_t *aa, mpz_t *bb) { - if (e==0) - mpz_set(mrb, z, x); - else { - short sn = x->sn; - size_t digs = (e / DIG_SIZE); - size_t bs = (e % (DIG_SIZE)); - mpz_t y; + size_t pool_state = pool_save(ctx); + mpz_t a, b; + size_t shift; + size_t a_zeros; + size_t b_zeros; + + /* Handle special cases */ + if (zero_p(aa)) { + mpz_abs(ctx, gg, bb); + goto cleanup; + } + if (zero_p(bb)) { + mpz_abs(ctx, gg, aa); + goto cleanup; + } - mpz_init(mrb, &y); - mpz_realloc(mrb, &y, x->sz+digs); - for (size_t i=0;isz;i++) - y.p[i+digs] = x->p[i]; - if (bs) { - ulshift(mrb, z, &y, bs); + /* Fast path for single-limb numbers */ + if (aa->sz <= 1 && bb->sz <= 1) { + mp_limb a_limb = (aa->sz == 0) ? 0 : aa->p[0]; + mp_limb b_limb = (bb->sz == 0) ? 0 : bb->p[0]; + mp_limb result = limb_gcd(a_limb, b_limb); + + mpz_init(ctx, gg); + if (result == 0) { + gg->sn = 0; + gg->sz = 0; } else { - mpz_move(mrb, z, &y); + mpz_realloc(ctx, gg, 1); + gg->p[0] = result; + gg->sn = 1; } - z->sn = sn; - mpz_clear(mrb, &y); + goto cleanup; } -} -static void -mpz_div_2exp(mrb_state *mrb, mpz_t *z, mpz_t *x, mrb_int e) -{ - short sn = x->sn; - if (e==0) - mpz_set(mrb, z, x); - else { - size_t digs = (e / DIG_SIZE); - size_t bs = (e % (DIG_SIZE)); - mpz_t y; + /* Fast path for powers of 2 */ + if (mpz_power_of_2_p(aa)) { + a_zeros = mpz_trailing_zeros(aa); + b_zeros = mpz_trailing_zeros(bb); + size_t min_zeros = (a_zeros < b_zeros) ? a_zeros : b_zeros; - mpz_init(mrb, &y); - mpz_realloc(mrb, &y, x->sz-digs); - for (size_t i=0; i < x->sz-digs; i++) - y.p[i] = x->p[i+digs]; - if (bs) { - urshift(mrb, z, &y, bs); - mpz_clear(mrb, &y); + mpz_init_set_int(ctx, gg, 1); + mpz_mul_2exp(ctx, gg, gg, min_zeros); + goto cleanup; + } + if (mpz_power_of_2_p(bb)) { + a_zeros = mpz_trailing_zeros(aa); + b_zeros = mpz_trailing_zeros(bb); + size_t min_zeros = (a_zeros < b_zeros) ? a_zeros : b_zeros; + + mpz_init_set_int(ctx, gg, 1); + mpz_mul_2exp(ctx, gg, gg, min_zeros); + goto cleanup; + } + + mpz_init(ctx, &a); + mpz_abs(ctx, &a, aa); + mpz_init(ctx, &b); + mpz_abs(ctx, &b, bb); + + shift = 0; + a_zeros = mpz_trailing_zeros(&a); + b_zeros = mpz_trailing_zeros(&b); + shift = (a_zeros < b_zeros) ? a_zeros : b_zeros; + + mpz_div_2exp(ctx, &a, &a, a_zeros); + mpz_div_2exp(ctx, &b, &b, b_zeros); + + /* Stein main loop. Invariant: a and b are positive and odd. + Euclidean fallback when b has >=2 more limbs than a. */ + while (!zero_p(&b)) { + if (mpz_cmp(ctx, &a, &b) > 0) { + mpz_swap(&a, &b); } - else { - mpz_move(mrb, z, &y); + if (b.sz >= a.sz + 2) { + mpz_t temp; + mpz_init_temp(ctx, &temp, a.sz); + mpz_mod(ctx, &temp, &b, &a); + mpz_move(ctx, &b, &temp); + mpz_clear(ctx, &temp); + if (zero_p(&b)) break; + size_t bz = mpz_trailing_zeros(&b); + if (bz > 0) + mpz_div_2exp(ctx, &b, &b, bz); } - if (uzero(z)) - z->sn = 0; else { - z->sn = sn; + mpz_sub(ctx, &b, &b, &a); + if (zero_p(&b)) break; + size_t bz = mpz_trailing_zeros(&b); + mpz_div_2exp(ctx, &b, &b, bz); } } + mpz_mul_2exp(ctx, gg, &a, shift); + mpz_clear(ctx, &a); + mpz_clear(ctx, &b); +cleanup: + pool_restore(ctx, pool_state); } -static void -mpz_neg(mrb_state *mrb, mpz_t *x, mpz_t *y) + +static size_t +mpz_bits(const mpz_t *x) { - if (x!=y) - mpz_set(mrb, x, y); - x->sn = -(y->sn); + if (x->sz == 0 || x->sn == 0) return 0; + + size_t limb_bits = sizeof(mp_limb) * 8; + + // Get the most significant limb + size_t i = x->sz - 1; + mp_limb high = x->p[i]; + + // Number of bits = total full limbs + significant bits in top limb + return i * limb_bits + (limb_bits - lzb(high)); } +/* Compute Barrett parameter mu = floor(2^(2k) / m) where k ~ log2(m) */ static void -mpz_and(mrb_state *mrb, mpz_t *z, mpz_t *x, mpz_t *y) /* not the most efficient way to do this */ +mpz_barrett_mu(mpz_ctx_t *ctx, mpz_t *mu, mpz_t *m) { - size_t sz = imin(x->sz, y->sz); + size_t k = mpz_bits(m); + mpz_t temp; - mpz_realloc(mrb, z, sz); - for (size_t i=0; i < sz; i++) - z->p[i] = x->p[i] & y->p[i]; - if (x->sn < 0 && y->sn < 0) - z->sn = (-1); - else - z->sn = 1; - if (uzero(z)) - z->sn = 0; + mpz_init_set_int(ctx, &temp, 1); + mpz_mul_2exp(ctx, &temp, &temp, 2 * k); /* temp = 2^(2k) */ + mpz_mdiv(ctx, mu, &temp, m); /* mu = floor(2^(2k) / m) */ + mpz_clear(ctx, &temp); } +/* Barrett reduction: r = x mod m using precomputed mu */ static void -mpz_or(mrb_state *mrb, mpz_t *z, mpz_t *x, mpz_t *y) /* not the most efficient way to do this */ +mpz_barrett_reduce(mpz_ctx_t *ctx, mpz_t *r, mpz_t *x, mpz_t *m, mpz_t *mu) { - size_t i; - size_t sz = imax(x->sz, y->sz); + size_t k = mpz_bits(m); - mpz_realloc(mrb, z, sz); - for (i=0; i < sz; i++) - z->p[i] = dg(x,i) | dg(y,i); - if (x->sn < 0 || y->sn < 0) - z->sn = (-1); - else - z->sn = 1; - if (uzero(z)) - z->sn = 0; + /* If x < m, then x mod m = x */ + if (mpz_cmp(ctx, x, m) < 0) { + mpz_set(ctx, r, x); + return; + } + + /* Save pool state for proper cleanup of temporary allocations */ + size_t pool_state = pool_save(ctx); + + mpz_t q1, q2, q3, r1, r2; + /* Conservative size estimates for Barrett reduction temporaries */ + size_t q_size = x->sz + mu->sz + 1; /* For multiplication results */ + size_t r_size = m->sz + 1; /* For modular reduction results */ + + mpz_init_temp(ctx, &q1, x->sz + 1); + mpz_init_temp(ctx, &q2, q_size); + mpz_init_temp(ctx, &q3, q_size); + mpz_init_temp(ctx, &r1, r_size); + mpz_init_temp(ctx, &r2, r_size); + + /* Step 1: q1 = floor(x / 2^(k-1)) */ + if (k > 1) { + mpz_div_2exp(ctx, &q1, x, k - 1); + } + else { + mpz_set(ctx, &q1, x); + } + + /* Step 2: q2 = q1 * mu */ + mpz_mul(ctx, &q2, &q1, mu); + + /* Step 3: q3 = floor(q2 / 2^(k+1)) */ + mpz_div_2exp(ctx, &q3, &q2, k + 1); + + /* Step 4: r1 = x mod 2^(k+1) */ + mpz_mod_2exp(ctx, &r1, x, k + 1); + + /* Step 5: r2 = (q3 * m) mod 2^(k+1) */ + mpz_mul(ctx, &r2, &q3, m); + mpz_mod_2exp(ctx, &r2, &r2, k + 1); + + /* Step 6: r = r1 - r2 */ + if (mpz_cmp(ctx, &r1, &r2) >= 0) { + mpz_sub(ctx, r, &r1, &r2); + } + else { + /* r1 < r2, so add 2^(k+1) to r1 */ + mpz_t power; + mpz_init_set_int(ctx, &power, 1); + mpz_mul_2exp(ctx, &power, &power, k + 1); + mpz_add(ctx, &r1, &r1, &power); + mpz_sub(ctx, r, &r1, &r2); + mpz_clear(ctx, &power); + } + + /* Step 7: Final correction - ensure 0 <= r < m */ + while (mpz_cmp(ctx, r, m) >= 0) { + mpz_sub(ctx, r, r, m); + } + + /* Cleanup temporaries */ + mpz_clear(ctx, &q1); + mpz_clear(ctx, &q2); + mpz_clear(ctx, &q3); + mpz_clear(ctx, &r1); + mpz_clear(ctx, &r2); + + /* Restore pool state to free any temporary pool allocations */ + pool_restore(ctx, pool_state); } static void -mpz_xor(mrb_state *mrb, mpz_t *z, mpz_t *x, mpz_t *y) /* not the most efficient way to do this */ +mpz_sqrt(mpz_ctx_t *ctx, mpz_t *z, mpz_t *x) { - size_t i; + mrb_assert(x->sn >= 0); - size_t sz = imax(x->sz, y->sz); - mpz_realloc(mrb, z, sz); - for (i=0; i < sz; i++) - z->p[i] = dg(x,i) ^ dg(y,i); - if ((x->sn <= 0 && y->sn > 0) || (x->sn > 0 && y->sn <=0)) - z->sn = (-1); - else - z->sn = 1; - if (uzero(z)) + if (x->sz == 0) { + // sqrt(0) = 0 + mpz_init(ctx, z); z->sn = 0; + z->sz = 0; + return; + } + + // Use heap-only implementation for now + size_t xbits = mpz_bits(x); + size_t sbit = (xbits + 1) / 2; + mpz_t s, t; + mpz_init_set_int(ctx, &s, 1); + mpz_mul_2exp(ctx, &s, &s, sbit); + + mpz_init_temp(ctx, &t, x->sz + 1); + + // Iteratively refine s using Newton-Raphson method: + // s = (s + x / s) / 2 + for (;;) { + mpz_mdiv(ctx, &t, x, &s); // t = x / s + mpz_add(ctx, &t, &t, &s); // t = s + x/s + mpz_div_2exp(ctx, &t, &t, 1); // t = (s + x/s) / 2 + + if (mpz_cmp(ctx, &t, &s) >= 0) { + // Converged: t >= s + break; + } + + mpz_set(ctx, &s, &t); + } + + mpz_move(ctx, z, &s); + mpz_clear(ctx, &t); +} + + +/* Barrett reduction for efficient modular arithmetic with repeated operations */ + +/* + * Montgomery Reduction + * + * Montgomery reduction computes x * R^(-1) mod n without division, where + * R = 2^(k*64) for a k-limb modulus. This is faster than Barrett for + * repeated modular operations with the same modulus (e.g., modular exponentiation). + * + * Requirements: + * - n must be ODD (n[0] & 1 == 1) + * - R > n (automatically satisfied since R = 2^(k*64) >= 2^64 > any k-limb number) + * + * Key insight: Instead of computing x mod n directly, we work in "Montgomery form" + * where a' = a * R mod n. Multiplication in Montgomery form: + * a' * b' = (aR)(bR) * R^(-1) mod n = abR mod n = (ab)' + */ + +/* + * Compute rho = -n[0]^(-1) mod 2^64 using Newton's method. + * This is the Montgomery constant needed for reduction. + */ +static mp_limb +montgomery_setup(mp_limb n0) +{ + /* n must be odd for this to work */ + mrb_assert((n0 & 1) == 1); + + /* + * Newton's method for modular inverse: + * x_{i+1} = x_i * (2 - n0 * x_i) mod 2^k + * + * Starting with x = 1 (which satisfies x * n0 ≡ 1 mod 2), + * each iteration doubles the number of correct bits. + */ + mp_limb x = 1; + + /* 6 iterations: 1 -> 2 -> 4 -> 8 -> 16 -> 32 -> 64 bits */ + for (int i = 0; i < 6; i++) { + x = x * (2 - n0 * x); /* Implicit mod 2^64 via overflow */ + } + + /* Return -x mod 2^64 = negation of modular inverse */ + return (mp_limb)0 - x; } +/* + * Montgomery reduction: result = x * R^(-1) mod n + * + * Algorithm (REDC): + * m = (x mod R) * rho mod R + * t = (x + m * n) / R + * if t >= n: t = t - n + * return t + * + * The key insight is that (x + m*n) is always divisible by R, + * so the division is just a right shift (drop low limbs). + */ static void -mpz_pow(mrb_state *mrb, mpz_t *zz, mpz_t *x, mrb_int e) +mpz_montgomery_reduce(mpz_ctx_t *ctx, mpz_t *result, + const mpz_t *x, const mpz_t *n, mp_limb rho) { - mpz_t t; - mrb_uint mask = 1ULL<<(sizeof(mrb_int)*8-1); + size_t k = n->sz; /* Number of limbs in modulus */ + size_t x_len = x->sz; + + /* Allocate workspace: Montgomery reduction writes k limbs at work[i] for i=0..k-1, + * so the maximum index accessed is work[2k-1] (from carry propagation). + * We need at least 2k limbs, plus extra if x_len > k. */ + size_t work_size = (x_len > k) ? (x_len + k + 2) : (2 * k + 2); + size_t pool_state = pool_save(ctx); + + mp_limb *work = NULL; + if (MPZ_HAS_POOL(ctx)) { + work = pool_alloc(MPZ_POOL(ctx), work_size); + } + mrb_bool heap_alloc = (work == NULL); + if (heap_alloc) { + work = (mp_limb*)mrb_malloc(MPZ_MRB(ctx), work_size * sizeof(mp_limb)); + } - if (e==0) { - mpz_set_int(mrb, zz, 1L); - return; + /* Copy x to work buffer, zero-extend if necessary */ + mpn_copyi(work, x->p, x_len); + mpn_zero(work + x_len, work_size - x_len); + + /* + * Main Montgomery reduction loop: + * For i = 0 to k-1: + * m_i = work[i] * rho mod 2^64 + * work += m_i * n * 2^(i*64) + */ + for (size_t i = 0; i < k; i++) { + mp_limb m = work[i] * rho; + + /* Add m * n at position i */ + mp_limb carry = mpn_addmul_1(work + i, n->p, k, m); + + /* Propagate carry */ + for (size_t j = i + k; carry && j < work_size; j++) { + mp_dbl_limb sum = (mp_dbl_limb)work[j] + carry; + work[j] = LOW(sum); + carry = HIGH(sum); + } } - mpz_init(mrb, &t); - mpz_set(mrb, &t, x); - for (;!(mask &e); mask>>=1) - ; - mask>>=1; - for (;mask!=0; mask>>=1) { - mpz_mul(mrb, &t, &t, &t); - if (e & mask) - mpz_mul(mrb, &t, &t, x); + /* Result is work[k..2k-1] (the upper k limbs after dividing by R) */ + mpz_realloc(ctx, result, k + 1); + mpn_copyi(result->p, work + k, k + 1); + result->sz = k + 1; + result->sn = 1; + trim(result); + + /* Final subtraction if result >= n */ + if (ucmp(result, (mpz_t*)n) >= 0) { + mpz_sub(ctx, result, result, (mpz_t*)n); + } + + /* Cleanup */ + if (heap_alloc) { + mrb_free(MPZ_MRB(ctx), work); } - mpz_move(mrb, zz, &t); + pool_restore(ctx, pool_state); } +/* + * Compute R^2 mod n, needed for converting to Montgomery form. + * a' = REDC(a * R^2) = a * R mod n + */ static void -mpz_powm(mrb_state *mrb, mpz_t *zz, mpz_t *x, mrb_int ex, mpz_t *n) +mpz_montgomery_calc_R2(mpz_ctx_t *ctx, mpz_t *R2, const mpz_t *n) { - mpz_t t, b; + size_t k = n->sz; - if (ex == 0) { - mpz_set_int(mrb, zz, 1); - return; - } + /* R = 2^(k*64), so R^2 = 2^(2*k*64) */ + mpz_init_set_int(ctx, R2, 1); + mpz_mul_2exp(ctx, R2, R2, 2 * k * DIG_SIZE); - if (ex < 0) { - return; - } + /* R2 = R^2 mod n */ + mpz_mod(ctx, R2, R2, (mpz_t*)n); +} - mpz_init_set_int(mrb, &t, 1); - mpz_init_set(mrb, &b, x); +/* + * Montgomery modular exponentiation: result = base^exp mod n + * Requires n to be odd. + */ +static void +mpz_powm_montgomery(mpz_ctx_t *ctx, mpz_t *result, + const mpz_t *base, const mpz_t *exp, const mpz_t *n) +{ + size_t pool_state = pool_save(ctx); + + /* Setup Montgomery parameters */ + mp_limb rho = montgomery_setup(n->p[0]); + + /* Compute R^2 mod n for conversion to Montgomery form */ + mpz_t R2; + mpz_montgomery_calc_R2(ctx, &R2, n); + + /* Compute R mod n = REDC(R^2) for initializing accumulator to 1 in Montgomery form */ + mpz_t one_mont; + mpz_init(ctx, &one_mont); + mpz_montgomery_reduce(ctx, &one_mont, &R2, n, rho); + + /* Convert base to Montgomery form: base_mont = base * R mod n = REDC(base * R^2). + * REDC requires its input T to satisfy T < R*N. If `base` is not already + * reduced (e.g. base >= n), `base * R^2` can exceed R*N and REDC produces + * a wrong result. Pre-reduce base modulo n via mpz_mmod (the general + * division path) -- both operands are non-negative here so this is + * semantically equivalent to mpz_mod. */ + mpz_t base_mont, base_reduced, temp; + mpz_init(ctx, &base_mont); + mpz_init(ctx, &base_reduced); + mpz_init_temp(ctx, &temp, n->sz * 4); + + mpz_mmod(ctx, &base_reduced, (mpz_t*)base, (mpz_t*)n); + mpz_mul(ctx, &temp, &base_reduced, &R2); + mpz_montgomery_reduce(ctx, &base_mont, &temp, n, rho); + + /* Initialize accumulator to 1 in Montgomery form */ + mpz_t acc; + mpz_init_set(ctx, &acc, &one_mont); + + /* Binary exponentiation in Montgomery form */ + size_t exp_len = exp->sz; + for (size_t i = 0; i < exp_len; i++) { + mp_limb e = exp->p[i]; + for (size_t j = 0; j < sizeof(mp_limb) * 8; j++) { + if ((e & 1) == 1) { + /* acc = acc * base_mont in Montgomery form */ + mpz_mul(ctx, &temp, &acc, &base_mont); + mpz_montgomery_reduce(ctx, &acc, &temp, n, rho); + } + e >>= 1; - while (ex > 0) { - if ((ex & 1) == 1) { - mpz_mul(mrb, &t, &t, &b); - mpz_mod(mrb, &t, &t, n); + /* base_mont = base_mont^2 in Montgomery form */ + mpz_mul(ctx, &temp, &base_mont, &base_mont); + mpz_montgomery_reduce(ctx, &base_mont, &temp, n, rho); } - ex >>= 1; - mpz_mul(mrb, &b, &b, &b); - mpz_mod(mrb, &b, &b, n); } - mpz_move(mrb, zz, &t); - mpz_clear(mrb, &b); + + /* Convert result back from Montgomery form: result = REDC(acc) */ + mpz_montgomery_reduce(ctx, result, &acc, n, rho); + + /* Cleanup */ + mpz_clear(ctx, &R2); + mpz_clear(ctx, &one_mont); + mpz_clear(ctx, &base_mont); + mpz_clear(ctx, &base_reduced); + mpz_clear(ctx, &temp); + mpz_clear(ctx, &acc); + pool_restore(ctx, pool_state); } /* --- mruby functions --- */ +/* initialize mpz_t from RBigint (not need to clear) */ +static void +bint_as_mpz(struct RBigint *b, mpz_t *x) +{ + x->p = RBIGINT_ARY(b); + x->sz = RBIGINT_SIZE(b); + x->sn = RBIGINT_SIGN(b); +} + +/* Transfer mpz_t data to RBigint structure */ +static void +bint_set(mpz_ctx_t *ctx, struct RBigint *b, mpz_t *x) +{ + if (x->sz <= RBIGINT_EMBED_SIZE_MAX) { + RBIGINT_SET_EMBED_SIZE(b, x->sz); + RBIGINT_SET_EMBED_SIGN(b, x->sn); + if (x->p) { + memcpy(RBIGINT_EMBED_ARY(b), x->p, x->sz*sizeof(mp_limb)); + } + else { + /* Initialize embedded array to zero when x->p is NULL */ + memset(RBIGINT_EMBED_ARY(b), 0, x->sz*sizeof(mp_limb)); + } + mpz_clear(ctx, x); + } + else { + RBIGINT_SET_HEAP(b); + mpz_init(ctx, &b->as.heap); /* Initialize before mpz_move */ + mpz_move(ctx, &b->as.heap, x); + } +} + static struct RBigint* -bint_new(mrb_state *mrb) +bint_new(mpz_ctx_t *ctx, mpz_t *x) { - struct RBigint *b = MRB_OBJ_ALLOC(mrb, MRB_TT_BIGINT, mrb->integer_class); - mpz_init(mrb, &b->mp); + struct RBigint *b = MRB_OBJ_ALLOC(MPZ_MRB(ctx), MRB_TT_BIGINT, MPZ_MRB(ctx)->integer_class); + bint_set(ctx, b, x); return b; } static struct RBigint* -bint_new_int(mrb_state *mrb, mrb_int x) +bint_new_int(mpz_ctx_t *ctx, mrb_int n) { - struct RBigint *b = MRB_OBJ_ALLOC(mrb, MRB_TT_BIGINT, mrb->integer_class); - mpz_init_set_int(mrb, &b->mp, x); - return b; + mpz_t x; + mpz_init_set_int(ctx, &x, n); + return bint_new(ctx, &x); } mrb_value -mrb_bint_new(mrb_state *mrb) +mrb_bint_new_int(mrb_state *mrb, mrb_int x) { - struct RBigint *b = bint_new(mrb); + MPZ_CTX_INIT(mrb, ctx, pool); + struct RBigint *b = bint_new_int(ctx, x); return mrb_obj_value(b); } +#ifdef MRB_INT32 mrb_value -mrb_bint_new_int(mrb_state *mrb, mrb_int x) +mrb_bint_new_int64(mrb_state *mrb, int64_t n) { - struct RBigint *b = bint_new_int(mrb, x); + mpz_t x; + MPZ_CTX_INIT(mrb, ctx, pool); + + mpz_set_int64(ctx, &x, n); + struct RBigint *b = bint_new(ctx, &x); return mrb_obj_value(b); } +#endif mrb_value -mrb_bint_new_str(mrb_state *mrb, const char *x, mrb_int len, mrb_int base) +mrb_bint_new_uint64(mrb_state *mrb, uint64_t x) { - struct RBigint *b = MRB_OBJ_ALLOC(mrb, MRB_TT_BIGINT, mrb->integer_class); - int sn = 1; - if (base < 0) { - base = -base; - sn = -1; - } - mrb_assert(2 <= base && base <= 36); - mpz_init_set_str(mrb, &b->mp, x, len, base); - if (sn < 0) { - b->mp.sn = sn; - } + mpz_t z; + MPZ_CTX_INIT(mrb, ctx, pool); + + mpz_init(ctx, &z); + mpz_set_uint64(ctx, &z, x); + struct RBigint *b = bint_new(ctx, &z); return mrb_obj_value(b); } @@ -988,18 +5389,44 @@ static mrb_value bint_norm(mrb_state *mrb, struct RBigint *b) { mrb_int i; + mpz_t a; - if (mpz_get_int(&b->mp, &i)) { + bint_as_mpz(b, &a); + if (mpz_get_int(&a, &i)) { return mrb_int_value(mrb, i); } return mrb_obj_value(b); } +mrb_value +mrb_bint_new_str(mrb_state *mrb, const char *x, mrb_int len, mrb_int base) +{ + mpz_t z; + int sn = 1; + + if (base < 0) { + base = -base; + sn = -1; + } + mrb_assert(2 <= base && base <= 36); + + MPZ_CTX_INIT(mrb, ctx, pool); + mpz_init_set_str(ctx, &z, x, len, base); + if (sn < 0) { + z.sn = sn; + } + return bint_norm(mrb, bint_new(ctx, &z)); +} + void mrb_gc_free_bint(mrb_state *mrb, struct RBasic *x) { struct RBigint *b = (struct RBigint*)x; - mpz_clear(mrb, &b->mp); + MPZ_CTX_INIT(mrb, ctx, pool); + + if (!RBIGINT_EMBED_P(b)) { + mpz_clear(ctx, &b->as.heap); + } } #ifndef MRB_NO_FLOAT @@ -1009,6 +5436,10 @@ mrb_bint_new_float(mrb_state *mrb, mrb_float x) /* x should not be NaN nor Infinity */ mrb_assert(x == x && x != x * 0.5); + if (FIXABLE_FLOAT(x)) { + return mrb_int_value(mrb, (mrb_int)x); + } + int sn; if (x < 0.0) { x = -x; @@ -1021,44 +5452,44 @@ mrb_bint_new_float(mrb_state *mrb, mrb_float x) return mrb_fixnum_value(0); } - struct RBigint *bint = bint_new(mrb); - mpz_t *r = &bint->mp; - r->sn = sn; + MPZ_CTX_INIT(mrb, ctx, pool); + mpz_t r; + mpz_init(ctx, &r); + r.sn = sn; mrb_float b = (double)DIG_BASE; mrb_float bi = 1.0 / b; size_t rn; - mp_limb *rp; - mp_limb f; for (rn = 1; x >= b; rn++) x *= bi; - mpz_realloc(mrb, r, rn); - rp = r->p; + mpz_realloc(ctx, &r, rn); + mp_limb *rp = r.p; for (size_t i=rn-1;;i--) { - f = LOW((mp_limb)x); + mp_limb f = LOW((mp_limb)x); x -= f; mrb_assert(x < 1.0); rp[i] = f; if (i == 0) break; } - return bint_norm(mrb, bint); + return bint_norm(mrb, bint_new(ctx, &r)); } mrb_float mrb_bint_as_float(mrb_state *mrb, mrb_value self) { - struct RBigint *b = RBIGINT(self); - mpz_t *i = &b->mp; - mp_limb *d = i->p + i->sz; + mpz_t m; + bint_as_mpz(RBIGINT(self), &m); + + mp_limb *d = m.p + m.sz; mrb_float val = 0; - while (d-- > i->p) { + while (d-- > m.p) { val = val * DIG_BASE + *d; } - if (i->sn < 0) { + if (m.sn < 0) { val = -val; } return val; @@ -1075,15 +5506,110 @@ mrb_as_bint(mrb_state *mrb, mrb_value x) mrb_int mrb_bint_as_int(mrb_state *mrb, mrb_value x) { - struct RBigint *b = RBIGINT(x); + mpz_t m; mrb_int i; - if (!mpz_get_int(&b->mp, &i)) { + bint_as_mpz(RBIGINT(x), &m); + if (!mpz_get_int(&m, &i)) { mrb_raise(mrb, E_RANGE_ERROR, "integer out of range"); } return i; } +#ifdef MRB_INT32 +int64_t +mrb_bint_as_int64(mrb_state *mrb, mrb_value x) +{ + mpz_t m; + bint_as_mpz(RBIGINT(x), &m); + + uint64_t u = 0; + size_t len = digits(&m); + + if (len*sizeof(mp_limb) > sizeof(uint64_t)) { + out_of_range: + mrb_raise(mrb, E_RANGE_ERROR, "integer out of range"); + } + for (size_t i=len-1; ; i--) { + u <<= DIG_SIZE; + u |= m.p[i]; + if (i==0) break; + } + if (u > INT64_MAX) goto out_of_range; + if (m.sn < 0) return -(int64_t)u; + return (int64_t)u; +} +#endif + +uint64_t +mrb_bint_as_uint64(mrb_state *mrb, mrb_value x) +{ + mpz_t m; + bint_as_mpz(RBIGINT(x), &m); + + uint64_t u = 0; + size_t len = digits(&m); + + if (m.sn < 0 || len*sizeof(mp_limb) > sizeof(uint64_t)) { + mrb_raise(mrb, E_RANGE_ERROR, "integer out of range"); + } + for (size_t i=len-1; ; i--) { + u <<= DIG_SIZE; + u |= m.p[i]; + if (i==0) break; + } + return u; +} + +static mrb_bool +int_fit_limb_p(mrb_int i) +{ +#if DIG_SIZE == 32 +# ifdef MRB_INT64 + // if mp_limb is int32_t + return (i > INT32_MIN && i <= INT32_MAX); +# else + // if mp_limb is also int32_t, it always fits + return TRUE; +# endif +#else /* if DIG_SIZE == 16 */ + // if mp_limb is int16_t + return (i > INT16_MIN && i <= INT16_MAX); +#endif +} + +/* unnormalize version of mrb_bint_add */ +mrb_value +mrb_bint_add_n(mrb_state *mrb, mrb_value x, mrb_value y) +{ + mpz_t a, b, z; + + bint_as_mpz(RBIGINT(x), &a); + + MPZ_CTX_INIT(mrb, ctx, pool); + + if (mrb_integer_p(y)) { + mrb_int n = mrb_integer(y); + if (int_fit_limb_p(n)) { + mpz_init_set(ctx, &z, &a); + if ((n > 0) ^ (z.sn > 0)) { + mpz_sub_int(ctx, &z, n<0 ? -n : n); + } + else { + mpz_add_int(ctx, &z, n<0 ? -n : n); + } + struct RBigint *v = bint_new(ctx, &z); + return mrb_obj_value(v); + } + } + y = mrb_as_bint(mrb, y); + bint_as_mpz(RBIGINT(y), &b); + mpz_init(ctx, &z); + mpz_add(ctx, &z, &a, &b); + struct RBigint *v = bint_new(ctx, &z); + return mrb_obj_value(v); +} + mrb_value mrb_bint_add(mrb_state *mrb, mrb_value x, mrb_value y) { @@ -1094,12 +5620,38 @@ mrb_bint_add(mrb_state *mrb, mrb_value x, mrb_value y) return mrb_float_value(mrb,v1+v2); } #endif + x = mrb_bint_add_n(mrb, x, y); + return bint_norm(mrb, RBIGINT(x)); +} + +/* unnormalize version of mrb_bint_sub */ +mrb_value +mrb_bint_sub_n(mrb_state *mrb, mrb_value x, mrb_value y) +{ + mpz_t a, b, z; + MPZ_CTX_INIT(mrb, ctx, pool); + + bint_as_mpz(RBIGINT(x), &a); + if (mrb_integer_p(y)) { + mrb_int n = mrb_integer(y); + if (int_fit_limb_p(n)) { + mpz_init_set(ctx, &z, &a); + if ((n > 0) ^ (z.sn > 0)) { + mpz_add_int(ctx, &z, n<0 ? -n : n); + } + else { + mpz_sub_int(ctx, &z, n<0 ? -n : n); + } + struct RBigint *v = bint_new(ctx, &z); + return mrb_obj_value(v); + } + } y = mrb_as_bint(mrb, y); - struct RBigint *b = RBIGINT(x); - struct RBigint *b2 = RBIGINT(y); - struct RBigint *b3 = bint_new(mrb); - mpz_add(mrb, &b3->mp, &b->mp, &b2->mp); - return bint_norm(mrb, b3); + bint_as_mpz(RBIGINT(y), &b); + mpz_init(ctx, &z); + mpz_sub(ctx, &z, &a, &b); + struct RBigint *v = bint_new(ctx, &z); + return mrb_obj_value(v); } mrb_value @@ -1112,17 +5664,54 @@ mrb_bint_sub(mrb_state *mrb, mrb_value x, mrb_value y) return mrb_float_value(mrb,v1-v2); } #endif + x = mrb_bint_sub_n(mrb, x, y); + return bint_norm(mrb, RBIGINT(x)); +} + +struct bint_mul_data { + mpz_ctx_t *ctx; + mpz_t *a; + mpz_t *b; + mpz_t z; /* cleanup target */ +}; + +static mrb_value +bint_mul_body(mrb_state *mrb, void *userdata) +{ + struct bint_mul_data *d = (struct bint_mul_data *)userdata; + mpz_init(d->ctx, &d->z); + mpz_mul(d->ctx, &d->z, d->a, d->b); + return mrb_nil_value(); +} + +static struct RBigint* +bint_mul(mrb_state *mrb, mrb_value x, mrb_value y) +{ + mpz_t a, b; + y = mrb_as_bint(mrb, y); - struct RBigint *b = RBIGINT(x); - struct RBigint *b2 = RBIGINT(y); - struct RBigint *b3 = bint_new(mrb); - mpz_sub(mrb, &b3->mp, &b->mp, &b2->mp); - return bint_norm(mrb, b3); + bint_as_mpz(RBIGINT(x), &a); + bint_as_mpz(RBIGINT(y), &b); + + MPZ_CTX_INIT(mrb, ctx, pool); + struct bint_mul_data d = {ctx, &a, &b, {0,0,0}}; + mrb_value exc; + MRB_ENSURE(mrb, exc, bint_mul_body, &d) { + /* On exception, cleanup z (mpz_clear is safe on zero-initialized mpz_t) */ + if (mrb->exc) { + mpz_clear(ctx, &d.z); + } + } + return bint_new(ctx, &d.z); } mrb_value mrb_bint_mul(mrb_state *mrb, mrb_value x, mrb_value y) { + if (mrb_integer_p(y)) { + if (mrb_integer(y) == 0) return mrb_fixnum_value(0); + if (mrb_integer(y) == 1) return bint_norm(mrb, RBIGINT(x)); + } #ifndef MRB_NO_FLOAT if (mrb_float_p(y)) { mrb_float v1 = mrb_bint_as_float(mrb, x); @@ -1130,17 +5719,23 @@ mrb_bint_mul(mrb_state *mrb, mrb_value x, mrb_value y) return mrb_float_value(mrb,v1*v2); } #endif - y = mrb_as_bint(mrb, y); - struct RBigint *b = RBIGINT(x); - struct RBigint *b2 = RBIGINT(y); - struct RBigint *b3 = bint_new(mrb); - mpz_mul(mrb, &b3->mp, &b->mp, &b2->mp); - return bint_norm(mrb, b3); + return bint_norm(mrb, bint_mul(mrb, x, y)); +} + +mrb_value +mrb_bint_mul_n(mrb_state *mrb, mrb_value x, mrb_value y) +{ + struct RBigint *b = bint_mul(mrb, x, y); + return mrb_obj_value(b); } mrb_value mrb_bint_div(mrb_state *mrb, mrb_value x, mrb_value y) { + if (mrb_integer_p(y)) { + if (mrb_integer(y) == 0) mrb_int_zerodiv(mrb); + if (mrb_integer(y) == 1) return bint_norm(mrb, RBIGINT(x)); + } #ifndef MRB_NO_FLOAT if (mrb_float_p(y)) { mrb_float v1 = mrb_bint_as_float(mrb, x); @@ -1148,74 +5743,64 @@ mrb_bint_div(mrb_state *mrb, mrb_value x, mrb_value y) return mrb_float_value(mrb,v1*v2); } #endif - if (mrb_integer_p(y) && mrb_integer(y) == 0) { - mrb_int_zerodiv(mrb); - } + mpz_t a, b, z; + y = mrb_as_bint(mrb, y); - struct RBigint *b = RBIGINT(x); - struct RBigint *b2 = RBIGINT(y); - struct RBigint *b3 = bint_new(mrb); - if (b2->mp.sn == 0 || uzero(&b2->mp)) { + bint_as_mpz(RBIGINT(y), &b); + if (zero_p(&b) || uzero_p(&b)) { mrb_int_zerodiv(mrb); } - mpz_mdiv(mrb, &b3->mp, &b->mp, &b2->mp); - return bint_norm(mrb, b3); + bint_as_mpz(RBIGINT(x), &a); + + MPZ_CTX_INIT(mrb, ctx, pool); + mpz_init(ctx, &z); + mpz_mdiv(ctx, &z, &a, &b); + return bint_norm(mrb, bint_new(ctx, &z)); } mrb_value mrb_bint_add_ii(mrb_state *mrb, mrb_int x, mrb_int y) { - struct RBigint *b = bint_new(mrb); - mpz_t z1, z2; - - mpz_init_set_int(mrb, &z1, x); - mpz_init_set_int(mrb, &z2, y); - mpz_add(mrb, &b->mp, &z1, &z2); - mpz_clear(mrb, &z1); - mpz_clear(mrb, &z2); - return bint_norm(mrb, b); + mpz_t a, b, z; + MPZ_CTX_INIT(mrb, ctx, pool); + + mpz_init(ctx, &z); + mpz_init_set_int(ctx, &a, x); + mpz_init_set_int(ctx, &b, y); + mpz_add(ctx, &z, &a, &b); + mpz_clear(ctx, &a); + mpz_clear(ctx, &b); + return bint_norm(mrb, bint_new(ctx, &z)); } mrb_value mrb_bint_sub_ii(mrb_state *mrb, mrb_int x, mrb_int y) { - struct RBigint *b = bint_new(mrb); - mpz_t z1, z2; - - mpz_init_set_int(mrb, &z1, x); - mpz_init_set_int(mrb, &z2, y); - mpz_sub(mrb, &b->mp, &z1, &z2); - mpz_clear(mrb, &z1); - mpz_clear(mrb, &z2); - return bint_norm(mrb, b); + mpz_t a, b, z; + MPZ_CTX_INIT(mrb, ctx, pool); + + mpz_init(ctx, &z); + mpz_init_set_int(ctx, &a, x); + mpz_init_set_int(ctx, &b, y); + mpz_sub(ctx, &z, &a, &b); + mpz_clear(ctx, &a); + mpz_clear(ctx, &b); + return bint_norm(mrb, bint_new(ctx, &z)); } mrb_value mrb_bint_mul_ii(mrb_state *mrb, mrb_int x, mrb_int y) { - struct RBigint *b = bint_new(mrb); - mpz_t z1, z2; - - mpz_init_set_int(mrb, &z1, x); - mpz_init_set_int(mrb, &z2, y); - mpz_mul(mrb, &b->mp, &z1, &z2); - mpz_clear(mrb, &z1); - mpz_clear(mrb, &z2); - return bint_norm(mrb, b); -} - -mrb_value -mrb_bint_div_ii(mrb_state *mrb, mrb_int x, mrb_int y) -{ - struct RBigint *b = bint_new(mrb); - mpz_t z1, z2; - - mpz_init_set_int(mrb, &z1, x); - mpz_init_set_int(mrb, &z2, y); - mpz_mdiv(mrb, &b->mp, &z1, &z2); - mpz_clear(mrb, &z1); - mpz_clear(mrb, &z2); - return bint_norm(mrb, b); + mpz_t a, b, z; + MPZ_CTX_INIT(mrb, ctx, pool); + + mpz_init(ctx, &z); + mpz_init_set_int(ctx, &a, x); + mpz_init_set_int(ctx, &b, y); + mpz_mul(ctx, &z, &a, &b); + mpz_clear(ctx, &a); + mpz_clear(ctx, &b); + return bint_norm(mrb, bint_new(ctx, &z)); } mrb_value @@ -1231,15 +5816,19 @@ mrb_bint_mod(mrb_state *mrb, mrb_value x, mrb_value y) if (mrb_integer_p(y) && mrb_integer(y) == 0) { mrb_int_zerodiv(mrb); } + mpz_t a, b, z; + x = mrb_as_bint(mrb, x); y = mrb_as_bint(mrb, y); - struct RBigint *b = RBIGINT(x); - struct RBigint *b2 = RBIGINT(y); - struct RBigint *b3 = bint_new(mrb); - if (b2->mp.sn == 0 || uzero(&b2->mp)) { + bint_as_mpz(RBIGINT(y), &b); + if (zero_p(&b) || uzero_p(&b)) { mrb_int_zerodiv(mrb); } - mpz_mmod(mrb, &b3->mp, &b->mp, &b2->mp); - return bint_norm(mrb, b3); + bint_as_mpz(RBIGINT(x), &a); + + MPZ_CTX_INIT(mrb, ctx, pool); + mpz_init(ctx, &z); + mpz_mmod(ctx, &z, &a, &b); + return bint_norm(mrb, bint_new(ctx, &z)); } mrb_value @@ -1250,15 +5839,19 @@ mrb_bint_rem(mrb_state *mrb, mrb_value x, mrb_value y) if (mrb_integer_p(y) && mrb_integer(y) == 0) { mrb_int_zerodiv(mrb); } + mpz_t a, b, z; + x = mrb_as_bint(mrb, x); y = mrb_as_bint(mrb, y); - struct RBigint *b = RBIGINT(x); - struct RBigint *b2 = RBIGINT(y); - struct RBigint *b3 = bint_new(mrb); - if (b2->mp.sn == 0 || uzero(&b2->mp)) { + bint_as_mpz(RBIGINT(y), &b); + if (zero_p(&b) || uzero_p(&b)) { mrb_int_zerodiv(mrb); } - mpz_mod(mrb, &b3->mp, &b->mp, &b2->mp); - return bint_norm(mrb, b3); + bint_as_mpz(RBIGINT(x), &a); + + MPZ_CTX_INIT(mrb, ctx, pool); + mpz_init(ctx, &z); + mpz_mod(ctx, &z, &a, &b); + return bint_norm(mrb, bint_new(ctx, &z)); } mrb_value @@ -1270,17 +5863,18 @@ mrb_bint_divmod(mrb_state *mrb, mrb_value x, mrb_value y) mrb_int_zerodiv(mrb); } y = mrb_as_bint(mrb, y); - struct RBigint *b = RBIGINT(x); - struct RBigint *b2 = RBIGINT(y); - struct RBigint *b3 = bint_new(mrb); - struct RBigint *b4 = bint_new(mrb); - if (b2->mp.sn == 0 || uzero(&b2->mp)) { + mpz_t a, b, c, d; + bint_as_mpz(RBIGINT(y), &b); + if (zero_p(&b) || uzero_p(&b)) { mrb_int_zerodiv(mrb); } - mpz_mdivmod(mrb, &b3->mp, &b4->mp, &b->mp, &b2->mp); - x = bint_norm(mrb, b3); - y = bint_norm(mrb, b4); - return mrb_assoc_new(mrb, x, y); + bint_as_mpz(RBIGINT(x), &a); + + MPZ_CTX_INIT(mrb, ctx, pool); + mpz_init(ctx, &c); + mpz_init(ctx, &d); + mpz_mdivmod(ctx, &c, &d, &a, &b); + return mrb_assoc_new(mrb, bint_norm(mrb, bint_new(ctx, &c)), bint_norm(mrb, bint_new(ctx, &d))); } mrb_int @@ -1295,84 +5889,157 @@ mrb_bint_cmp(mrb_state *mrb, mrb_value x, mrb_value y) return -1; } #endif - struct RBigint *b = RBIGINT(x); + mpz_t a; + + bint_as_mpz(RBIGINT(x), &a); if (!mrb_bigint_p(y)) { if (!mrb_integer_p(y)) return -2; /* type mismatch */ mrb_int i1, i2 = mrb_integer(y); - if (mpz_get_int(&b->mp, &i1)) { + if (mpz_get_int(&a, &i1)) { if (i1 == i2) return 0; if (i1 > i2) return 1; return -1; } - if (b->mp.sn > 0) return 1; + if (a.sn > 0) return 1; return -1; } - struct RBigint *b2 = RBIGINT(y); - return mpz_cmp(mrb, &b->mp, &b2->mp); + mpz_t b; + bint_as_mpz(RBIGINT(y), &b); + MPZ_CTX_INIT(mrb, ctx, pool); + return mpz_cmp(ctx, &a, &b); } +/* Maximum bits for power result to prevent resource exhaustion */ +/* 1 million bits = ~125KB per bigint, ~300,000 decimal digits */ +#define MRB_BIGINT_POW_MAX_BITS 1000000 + mrb_value mrb_bint_pow(mrb_state *mrb, mrb_value x, mrb_value y) { - struct RBigint *b = RBIGINT(x); + mpz_t a; + + bint_as_mpz(RBIGINT(x), &a); switch (mrb_type(y)) { case MRB_TT_INTEGER: - { - struct RBigint *b3 = bint_new(mrb); - mpz_pow(mrb, &b3->mp, &b->mp, mrb_integer(y)); - return mrb_obj_value(b3); - } + break; case MRB_TT_BIGINT: mrb_raise(mrb, E_TYPE_ERROR, "too big power"); default: - mrb_raisef(mrb, E_TYPE_ERROR, "%v cannot be convert to integer", y); + mrb_raisef(mrb, E_TYPE_ERROR, "%Y cannot be convert to integer", y); } - return mrb_nil_value(); + + mrb_int exp = mrb_integer(y); + if (exp < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative exponent"); + } + + /* Check if result would be too large */ + /* result_bits ≈ base_bits * exp */ + size_t base_bits = mpz_bits(&a); + if (base_bits == 0) base_bits = 1; /* handle 0 and 1 */ + if (exp > 0 && (size_t)exp > MRB_BIGINT_POW_MAX_BITS / base_bits) { + mrb_raise(mrb, E_RANGE_ERROR, "exponent too large"); + } + + mpz_t z; + MPZ_CTX_INIT(mrb, ctx, pool); + mpz_pow(ctx, &z, &a, exp); + + struct RBigint *b = bint_new(ctx, &z); + return mrb_obj_value(b); } mrb_value -mrb_bint_powm(mrb_state *mrb, mrb_value x, mrb_int exp, mrb_value mod) +mrb_bint_powm(mrb_state *mrb, mrb_value x, mrb_value exp, mrb_value mod) { - struct RBigint *b = RBIGINT(x); - switch (mrb_type(mod)) { - case MRB_TT_INTEGER: - { - mrb_int m = mrb_integer(mod); - if (m == 0) mrb_int_zerodiv(mrb); - struct RBigint *b2 = bint_new_int(mrb, m); - struct RBigint *b3 = bint_new(mrb); - mpz_powm(mrb, &b3->mp, &b->mp, exp, &b2->mp); - return bint_norm(mrb, b3); + mpz_t a, b, c, z; + mrb_bool neg_mod = FALSE; + MPZ_CTX_INIT(mrb, ctx, pool); + + bint_as_mpz(RBIGINT(x), &a); + if (mrb_integer_p(mod)) { + mrb_int m = mrb_integer(mod); + if (m == 0) mrb_int_zerodiv(mrb); + if (m < 0) { + neg_mod = TRUE; + m = -m; } - case MRB_TT_BIGINT: - { - struct RBigint *b2 = RBIGINT(mod); - struct RBigint *b3 = bint_new(mrb); - if (uzero(&b2->mp)) mrb_int_zerodiv(mrb); - mpz_powm(mrb, &b3->mp, &b->mp, exp, &b2->mp); - return bint_norm(mrb, b3); + mpz_init_set_int(ctx, &c, m); + } + else { + mod = mrb_as_bint(mrb, mod); + bint_as_mpz(RBIGINT(mod), &c); + if (zero_p(&c) || uzero_p(&c)) { + mrb_int_zerodiv(mrb); + } + if (c.sn < 0) { + neg_mod = TRUE; + c.sn = 1; /* use absolute value */ + } + } + + /* Check for zero base case: 0^n = 0 for n > 0 */ + if (zero_p(&a) || uzero_p(&a)) { + mrb_bool exp_positive; + if (mrb_bigint_p(exp)) { + bint_as_mpz(RBIGINT(exp), &b); + exp_positive = (b.sn > 0) && !uzero_p(&b); + } + else { + exp_positive = mrb_integer(exp) > 0; + } + if (exp_positive) { + /* 0^n mod m = 0 for n > 0 */ + if (mrb_integer_p(mod)) mpz_clear(ctx, &c); + return mrb_fixnum_value(0); } - mrb_raise(mrb, E_TYPE_ERROR, "too big power"); - default: - mrb_raisef(mrb, E_TYPE_ERROR, "%v cannot be convert to integer", mod); } + + mpz_init(ctx, &z); + if (mrb_bigint_p(exp)) { + bint_as_mpz(RBIGINT(exp), &b); + if (b.sn < 0) goto raise; + mpz_powm(ctx, &z, &a, &b, &c); + } + else { + mrb_int e = mrb_integer(exp); + if (e < 0) goto raise; + mpz_powm_i(ctx, &z, &a, e, &c); + } + + /* Apply signed modulo adjustment for negative modulus */ + /* Ruby: result + m for non-zero result when m is negative */ + if (neg_mod && !zero_p(&z) && !uzero_p(&z)) { + mpz_sub(ctx, &z, &z, &c); /* z = z - |m| = z + m (since m is negative) */ + } + + if (mrb_integer_p(mod)) mpz_clear(ctx, &c); + return bint_norm(mrb, bint_new(ctx, &z)); + + raise: + if (mrb_integer_p(mod)) mpz_clear(ctx, &c); + mrb_raise(mrb, E_ARGUMENT_ERROR, "int.pow(n,m): n must be positive"); + /* not reached */ return mrb_nil_value(); } mrb_value mrb_bint_to_s(mrb_state *mrb, mrb_value x, mrb_int base) { - struct RBigint *b = RBIGINT(x); + mpz_t a; - if (b->mp.sz == 0) return mrb_str_new_lit(mrb, "0"); - - size_t len = mpz_sizeinbase(&b->mp, (int)base); - if (MRB_INT_MAX-2 < len) { + bint_as_mpz(RBIGINT(x), &a); + if (zero_p(&a) || uzero_p(&a)) { + return mrb_str_new_lit(mrb, "0"); + } + size_t len = mpz_sizeinbase(&a, (int)base); + if (sizeof(size_t) >= sizeof(mrb_int) && MRB_INT_MAX-2 < len) { mrb_raise(mrb, E_ARGUMENT_ERROR, "too long string from Integer"); } mrb_value str = mrb_str_new(mrb, NULL, len+2); - mpz_get_str(mrb, RSTRING_PTR(str), len, base, &b->mp); + MPZ_CTX_INIT(mrb, ctx, pool); + mpz_get_str(ctx, RSTRING_PTR(str), len, base, &a); RSTR_SET_LEN(RSTRING(str), strlen(RSTRING_PTR(str))); return str; } @@ -1380,125 +6047,335 @@ mrb_bint_to_s(mrb_state *mrb, mrb_value x, mrb_int base) mrb_value mrb_bint_and(mrb_state *mrb, mrb_value x, mrb_value y) { - struct RBigint *b1 = RBIGINT(x); - struct RBigint *b3 = bint_new(mrb); - -#ifndef MRB_NO_FLOAT - if (mrb_float_p(y)) { - mpz_t z; - mpz_init_set_int(mrb, &z, (mrb_int)mrb_float(y)); - mpz_and(mrb, &b3->mp, &b1->mp, &z); - mpz_clear(mrb, &z); - return bint_norm(mrb, b3); + mpz_t a, b, c; + + bint_as_mpz(RBIGINT(x), &a); + if (mrb_integer_p(y)) { + mrb_int z = mrb_integer(y); + if (z == 0) return mrb_fixnum_value(0); + if (z > 0 && (mp_dbl_limb)z < DIG_BASE) { + z &= a.p[0]; + return mrb_int_value(mrb, z); + } + if (z == -1) return x; } -#endif + y = mrb_as_bint(mrb, y); - struct RBigint *b2 = RBIGINT(y); - mpz_and(mrb, &b3->mp, &b1->mp, &b2->mp); - return bint_norm(mrb, b3); + bint_as_mpz(RBIGINT(y), &b); + if (zero_p(&a) || zero_p(&b)) return mrb_fixnum_value(0); + + MPZ_CTX_INIT(mrb, ctx, pool); + mpz_init(ctx, &c); + mpz_and(ctx, &c, &a, &b); + return bint_norm(mrb, bint_new(ctx, &c)); } mrb_value mrb_bint_or(mrb_state *mrb, mrb_value x, mrb_value y) { - struct RBigint *b1 = RBIGINT(x); - struct RBigint *b3 = bint_new(mrb); + mpz_t a, b, c; -#ifndef MRB_NO_FLOAT - if (mrb_float_p(y)) { - mpz_t z; - mpz_init_set_int(mrb, &z, (mrb_int)mrb_float(y)); - mpz_or(mrb, &b3->mp, &b1->mp, &z); - mpz_clear(mrb, &z); - return bint_norm(mrb, b3); + bint_as_mpz(RBIGINT(x), &a); + if (mrb_integer_p(y)) { + mrb_int z = mrb_integer(y); + if (z == 0) return x; + if (z == -1) return y; } -#endif + + MPZ_CTX_INIT(mrb, ctx, pool); y = mrb_as_bint(mrb, y); - struct RBigint *b2 = RBIGINT(y); - mpz_or(mrb, &b3->mp, &b1->mp, &b2->mp); - return bint_norm(mrb, b3); + bint_as_mpz(RBIGINT(y), &b); + if (zero_p(&a)) return y; + if (zero_p(&b)) return x; + mpz_init(ctx, &c); + mpz_or(ctx, &c, &b, &a); + return bint_norm(mrb, bint_new(ctx, &c)); } mrb_value mrb_bint_xor(mrb_state *mrb, mrb_value x, mrb_value y) { - struct RBigint *b3 = bint_new(mrb); - struct RBigint *b1 = RBIGINT(x); - -#ifndef MRB_NO_FLOAT - if (mrb_float_p(y)) { - mpz_t z; - mpz_init_set_int(mrb, &z, (mrb_int)mrb_float(y)); - mpz_xor(mrb, &b3->mp, &b1->mp, &z); - mpz_clear(mrb, &z); - return bint_norm(mrb, b3); + mpz_t a, b, c; + MPZ_CTX_INIT(mrb, ctx, pool); + + bint_as_mpz(RBIGINT(x), &a); + if (mrb_integer_p(y) && a.sn > 0) { + mrb_int z = mrb_integer(y); + if (z == 0) return x; + if (0 < z && (mp_dbl_limb)z < DIG_BASE) { + mpz_init_set(ctx, &c, &a); + if (a.sz == 0) { + mpz_realloc(ctx, &c, 1); + c.p[0] = (mp_limb)z; + } + else { + c.p[0] ^= (mp_limb)z; + } + return bint_norm(mrb, bint_new(ctx, &c)); + } } -#endif y = mrb_as_bint(mrb, y); - struct RBigint *b2 = RBIGINT(y); - mpz_xor(mrb, &b3->mp, &b1->mp, &b2->mp); - return bint_norm(mrb, b3); + bint_as_mpz(RBIGINT(y), &b); + if (zero_p(&a)) return y; + if (zero_p(&b)) return x; + mpz_init(ctx, &c); + mpz_xor(ctx, &c, &a, &b); + return bint_norm(mrb, bint_new(ctx, &c)); } mrb_value -mrb_bint_rev(mrb_state *mrb, mrb_value x) +mrb_bint_neg(mrb_state *mrb, mrb_value x) { - struct RBigint *b1 = RBIGINT(x); - struct RBigint *b2 = bint_new(mrb); + mpz_t a, b; + MPZ_CTX_INIT(mrb, ctx, pool); + + bint_as_mpz(RBIGINT(x), &a); + mpz_init(ctx, &b); + mpz_neg(ctx, &b, &a); + struct RBigint *b2 = bint_new(ctx, &b); + /* no normalization */ + return mrb_obj_value(b2); +} - mpz_neg(mrb, &b2->mp, &b1->mp); - mpz_sub_int(mrb, &b2->mp, &b2->mp, 1); - return bint_norm(mrb, b2); +mrb_value +mrb_bint_rev(mrb_state *mrb, mrb_value x) +{ + mpz_t a, b; + MPZ_CTX_INIT(mrb, ctx, pool); + + bint_as_mpz(RBIGINT(x), &a); + mpz_init(ctx, &b); + mpz_neg(ctx, &b, &a); + mpz_sub_int(ctx, &b, 1); + return bint_norm(mrb, bint_new(ctx, &b)); } mrb_value mrb_bint_lshift(mrb_state *mrb, mrb_value x, mrb_int width) { - struct RBigint *b = RBIGINT(x); - struct RBigint *b2 = bint_new(mrb); + mpz_t a, z; + MPZ_CTX_INIT(mrb, ctx, pool); + + bint_as_mpz(RBIGINT(x), &a); + mpz_init(ctx, &z); if (width < 0) { - mpz_div_2exp(mrb, &b2->mp, &b->mp, -width); + mpz_div_2exp(ctx, &z, &a, -width); } else { - mpz_mul_2exp(mrb, &b2->mp, &b->mp, width); + mpz_mul_2exp(ctx, &z, &a, width); } - return bint_norm(mrb, b2); + return bint_norm(mrb, bint_new(ctx, &z)); } mrb_value mrb_bint_rshift(mrb_state *mrb, mrb_value x, mrb_int width) { - struct RBigint *b = RBIGINT(x); - struct RBigint *b2 = bint_new(mrb); + mpz_t a, z; + MPZ_CTX_INIT(mrb, ctx, pool); + + bint_as_mpz(RBIGINT(x), &a); + mpz_init(ctx, &z); if (width < 0) { - mpz_mul_2exp(mrb, &b2->mp, &b->mp, -width); + mpz_mul_2exp(ctx, &z, &a, -width); } else { - mpz_div_2exp(mrb, &b2->mp, &b->mp, width); + mpz_div_2exp(ctx, &z, &a, width); } - return bint_norm(mrb, b2); + return bint_norm(mrb, bint_new(ctx, &z)); } void mrb_bint_copy(mrb_state *mrb, mrb_value x, mrb_value y) { - struct RBigint *b = RBIGINT(x); - struct RBigint *b2 = RBIGINT(y); - mpz_init_set(mrb, &b->mp, &b2->mp); + mpz_t b, temp; + MPZ_CTX_INIT(mrb, ctx, pool); + + bint_as_mpz(RBIGINT(y), &b); + mpz_init_set(ctx, &temp, &b); + bint_set(ctx, RBIGINT(x), &temp); } size_t mrb_bint_memsize(mrb_value x) { - struct RBigint *b = RBIGINT(x); - return b->mp.sz * sizeof(mp_limb); + mpz_t z; + + bint_as_mpz(RBIGINT(x), &z); + return z.sz * sizeof(mp_limb); +} + +mrb_value +mrb_bint_sqrt(mrb_state *mrb, mrb_value x) +{ + mpz_t a; + + bint_as_mpz(RBIGINT(x), &a); + if (a.sn < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "square root of negative number"); + } + MPZ_CTX_INIT(mrb, ctx, pool); + + mpz_t z; + mpz_init(ctx, &z); + mpz_sqrt(ctx, &z, &a); + + return bint_norm(mrb, bint_new(ctx, &z)); +} + +mrb_int +mrb_bint_sign(mrb_state *mrb, mrb_value bint) +{ + return RBIGINT_SIGN(RBIGINT(bint)); +} + +mrb_int +mrb_bint_size(mrb_state *mrb, mrb_value bint) +{ + mpz_t z; + bint_as_mpz(RBIGINT(bint), &z); + return z.sz * sizeof(mp_limb); +} + +mrb_value +mrb_bint_from_bytes(mrb_state *mrb, const uint8_t *bytes, mrb_int len) +{ + mpz_t z; + size_t limb_len = (len + sizeof(mp_limb) - 1) / sizeof(mp_limb); + MPZ_CTX_INIT(mrb, ctx, pool); + + mpz_init_heap(ctx, &z, limb_len); + memcpy(z.p, bytes, len); + z.sn = (len > 0) ? 1 : 0; + z.sz = limb_len; + trim(&z); + return bint_norm(mrb, bint_new(ctx, &z)); } mrb_value mrb_bint_hash(mrb_state *mrb, mrb_value x) { - struct RBigint *b = RBIGINT(x); - uint32_t hash = mrb_byte_hash((uint8_t*)b->mp.p, b->mp.sz); - hash = mrb_byte_hash_step((uint8_t*)&b->mp.sn, sizeof(b->mp.sn), hash); + mpz_t z; + + bint_as_mpz(RBIGINT(x), &z); + uint32_t hash = mrb_byte_hash((uint8_t*)z.p, z.sz*sizeof(mp_limb)); + hash = mrb_byte_hash_step((uint8_t*)&z.sn, sizeof(z.sn), hash); return mrb_int_value(mrb, hash); } + +/* to be used only from mruby-sprintf */ +mrb_value +mrb_bint_2comp(mrb_state *mrb, mrb_value x) +{ + mpz_t a, z; + MPZ_CTX_INIT(mrb, ctx, pool); + + bint_as_mpz(RBIGINT(x), &a); + mpz_init(ctx, &z); + mrb_assert(a.sn < 0); + size_t size = a.sz; + mpz_realloc(ctx, &z, size); + mp_limb *ds = a.p; + mp_limb *dd = z.p; + char carry = 1; + for (size_t i=0; i - generic bug fixes, mpz_sqrt and + * Paul Rouse - generic bugfixes, mpz_sqrt and * mpz_sqrtrem, and modifications to get fgmp to compile on a system * with int and long of different sizes (specifically MS-DOS,286 compiler) * Also see the file "notes" included with the fgmp distribution, for @@ -30,10 +30,6 @@ #include -#if defined(MRB_INT32) && defined(_WIN32) && !defined(MRB_NO_MPZ64BIT) -#define MRB_NO_MPZ64BIT -#endif - #ifdef MRB_NO_MPZ64BIT typedef uint16_t mp_limb; typedef uint32_t mp_dbl_limb; @@ -46,6 +42,8 @@ typedef int64_t mp_dbl_limb_signed; #define MPZ_DIG_SIZE 32 #endif +#define RBIGINT_EMBED_SIZE_MAX ((sizeof(void*) * 3) / sizeof(mp_limb)) + typedef struct _mpz_t { mp_limb *p; short sn; @@ -54,10 +52,66 @@ typedef struct _mpz_t { struct RBigint { MRB_OBJECT_HEADER; - mpz_t mp; + union { + mpz_t heap; + mp_limb ary[RBIGINT_EMBED_SIZE_MAX]; + } as; }; #define RBIGINT(v) ((struct RBigint*)mrb_ptr(v)) +/* + * flags of struct RBigint + * + * 6..: UNUSED + * 4..5: sign flags + * 00: negative (<--> -1) + * 01: zero (<--> 0) + * 10: positive (<--> +1) + * 11: UNUSED + * 0..3: size of embedded array; 15 means used with heap + */ + +#define RBIGINT_EMBED_SIZE_MASK 0x0f +#define RBIGINT_EMBED_SIZE_OVER RBIGINT_EMBED_SIZE_MASK +#define RBIGINT_EMBED_SIZE_SHIFT 0 +#define RBIGINT_EMBED_SIGN_MASK 0x03 +#define RBIGINT_EMBED_SIGN_SHIFT 4 + +#define RBIGINT_ARY(m) (RBIGINT_EMBED_P(m) ? RBIGINT_EMBED_ARY(m) : RBIGINT_HEAP_ARY(m)) +#define RBIGINT_SIGN(m) (RBIGINT_EMBED_P(m) ? RBIGINT_EMBED_SIGN(m) : RBIGINT_HEAP_SIGN(m)) +#define RBIGINT_SIZE(m) (RBIGINT_EMBED_P(m) ? RBIGINT_EMBED_SIZE(m) : RBIGINT_HEAP_SIZE(m)) + +#define RBIGINT_HEAP_ARY(m) ((m)->as.heap.p) +#define RBIGINT_HEAP_SIGN(m) ((m)->as.heap.sn) +#define RBIGINT_HEAP_SIZE(m) ((m)->as.heap.sz) +#define RBIGINT_SET_HEAP(m) do { \ + (m)->flags |= RBIGINT_EMBED_SIZE_OVER << RBIGINT_EMBED_SIZE_SHIFT; \ +} while (0) +#define RBIGINT_SET_HEAP_SIGN(m, s) do { \ + (m)->as.heap.sn = (s); \ +} while (0) +#define RBIGINT_SET_HEAP_SIZE(m, s) do { \ + (m)->as.heap.sz = (s); \ +} while (0) + +#define RBIGINT_EMBED_P(m) ((((m)->flags >> RBIGINT_EMBED_SIZE_SHIFT) & RBIGINT_EMBED_SIZE_MASK) < RBIGINT_EMBED_SIZE_OVER) +#define RBIGINT_EMBED_ARY(m) ((m)->as.ary) +#define RBIGINT_EMBED_SIGN(m) ((short)(((m)->flags >> RBIGINT_EMBED_SIGN_SHIFT) & RBIGINT_EMBED_SIGN_MASK) - 1) +#define RBIGINT_EMBED_SIZE(m) (size_t)(((m)->flags >> RBIGINT_EMBED_SIZE_SHIFT) & RBIGINT_EMBED_SIZE_MASK) +#define RBIGINT_SET_EMBED_ZERO(m) do { \ + (m)->flags &= ~(RBIGINT_EMBED_SIZE_MASK << RBIGINT_EMBED_SIZE_SHIFT); \ +} while (0) +#define RBIGINT_SET_EMBED_SIGN(m, s) do { \ + (m)->flags = ((((s) + 1) & RBIGINT_EMBED_SIGN_MASK) << RBIGINT_EMBED_SIGN_SHIFT) | \ + ((m)->flags & ~(RBIGINT_EMBED_SIGN_MASK << RBIGINT_EMBED_SIGN_SHIFT)); \ +} while (0) +#define RBIGINT_SET_EMBED_SIZE(m, s) do { \ + size_t s_tmp = (s); \ + mrb_assert((s_tmp) <= RBIGINT_EMBED_SIZE_MAX); \ + RBIGINT_SET_EMBED_ZERO(m); \ + (m)->flags |= (s_tmp) << RBIGINT_EMBED_SIZE_SHIFT; \ +} while (0) + mrb_static_assert_object_size(struct RBigint); #endif /* MRUBY_BIGINT_H */ diff --git a/mrbgems/mruby-bigint/mrbgem.rake b/mrbgems/mruby-bigint/mrbgem.rake index a1039bf57c..edd910b6e9 100644 --- a/mrbgems/mruby-bigint/mrbgem.rake +++ b/mrbgems/mruby-bigint/mrbgem.rake @@ -4,6 +4,8 @@ MRuby::Gem::Specification.new('mruby-bigint') do |spec| spec.summary = 'Integer class extension to multiple-precision' spec.build.defines << "MRB_USE_BIGINT" + spec.add_test_dependency('mruby-numeric-ext', :core => 'mruby-numeric-ext') + spec.build.libmruby_core_objs << Dir.glob(File.join(__dir__, "core/**/*.c")).map { |fn| objfile(fn.relative_path_from(__dir__).pathmap("#{spec.build_dir}/%X")) } diff --git a/mrbgems/mruby-bigint/test/bigint.rb b/mrbgems/mruby-bigint/test/bigint.rb index 8ab09548ed..4f8827edd6 100644 --- a/mrbgems/mruby-bigint/test/bigint.rb +++ b/mrbgems/mruby-bigint/test/bigint.rb @@ -1,10 +1,129 @@ assert 'Bigint basic' do n = 1<<65 assert_equal 36893488147419103232, n - assert_equal 36893488147419104229, n+997 - assert_equal 36893488147419102235, n-997 - assert_equal 36782807682976845922304, n*997 - assert_equal 37004501652376231, n.div(997) +end + +assert 'Bigint +' do + n = 1<<65 + assert_equal 36893488147419103232, n + 0 + assert_equal 36893488147419104229, n + 997 + assert_equal 36893488147419102235, n + -997 + assert_equal(-36893488147419102235, -n + 997) + assert_equal(-36893488147419104229, -n + -997) + assert_equal 73786976294838206464, n + n + assert_equal 0, n + -n + assert_equal 0, -n + n + assert_equal(-73786976294838206464, -n + -n) + assert_equal 36893488147419104229, 997 + n + assert_equal 36893488147419102235, -997 + n +end + +assert 'Bigint -' do + n = 1<<65 + assert_equal 36893488147419103232, n - 0 + assert_equal 36893488147419102235, n - 997 + assert_equal 36893488147419104229, n - -997 + assert_equal(-36893488147419104229, -n - 997) + assert_equal(-36893488147419102235, -n - -997) + assert_equal 0, n - n + assert_equal(-36893488147419104229, -997 - n) + assert_equal(-36893488147419102235, 997 - n) + assert_equal(-36893488147419104229, -997 - n) +end + +assert 'Bigint *' do + n = 1<<65 + assert_equal 0, n * 0 + assert_equal 36782807682976845922304, n * 997 + assert_equal(-36782807682976845922304, n * -997) + assert_equal 36782807682976845922304, 997 * n + assert_equal(-36782807682976845922304, -997 * n) + assert_equal 1361129467683753853853498429727072845824, n * n + assert_equal(-1361129467683753853853498429727072845824, -n * n) + assert_equal(-1361129467683753853853498429727072845824, n * -n) + assert_equal 1361129467683753853853498429727072845824, -n * -n + + # Test multiplication commutativity for large numbers with different limb counts + # This test specifically targets the bug where operands with different + # limb counts would produce different results based on order + a = (2**512) - 1 # 16 limbs + b = 26815615859885194199148049996411692254958731641184786755447122887443528060147093953603748596333806855380063716372972101707507765623893139892867298012168194 # 17 limbs + assert_equal(a * b, b * a) +end + +assert 'Bigint /' do + n = 1<<65 + assert_equal 37004501652376231, n / 997 + assert_equal(-37004501652376232, n / -997) + assert_equal(-37004501652376232, -n / 997) + assert_equal 0, 997 / n + assert_equal 2, 73786976294838206464 / n + assert_equal 1, n / n + assert_equal(-1, -n / n) + assert_equal(-1, n / -n) + assert_equal 1, -n / -n +end + +assert 'Bigint mod' do + n = 1<<65 + assert_equal 925, n % 997 + assert_equal(-72, n % -997) + assert_equal 72, -n % 997 + assert_equal(-925, -n % -997) + assert_equal 0, n % n + assert_equal 997, 997 % n + assert_equal 36893488147419102235, -997 % n + assert_equal(-36893488147419102235, 997 % -n) + assert_equal(-997, -997 % -n) + assert_equal 18446744073709551616, (n / 2) % n +end + +assert 'Bigint divmod' do + n = 1<<65 + assert_equal [37004501652376231, 925], n.divmod(997) + assert_equal [-37004501652376232, -72], n.divmod(-997) + assert_equal [-37004501652376232, 72], (-n).divmod(997) + assert_equal [37004501652376231, -925], (-n).divmod(-997) + assert_equal [1, 0], n.divmod(n) + assert_equal [0, 997], 997.divmod(n) + assert_equal [-1, 36893488147419102235], (-997).divmod(n) + assert_equal [-1, -36893488147419102235], 997.divmod(-n) + assert_equal [0, -997], (-997).divmod(-n) + assert_equal [0, 18446744073709551616], (n / 2).divmod(n) +end + +assert 'Bigint &' do + n = 1<<65 + assert_equal 0, n & 0 + assert_equal 0, 0 & n + assert_equal 0, n & 1 + assert_equal 1, (n + 3) & 1 + assert_equal 2, (n + 3) & 2 + assert_equal 3, (n + 3) & 3 + assert_equal n, n & n + assert_equal 36893488147419103232, n & -1 + assert_equal 36893488147419103232, -1 & n +end + +assert 'Bigint |' do + n = 1<<65 + assert_equal 36893488147419103232, n | 0 + assert_equal 36893488147419103232, 0 | n + assert_equal 36893488147419103233, n | 1 + assert_equal 36893488147419103233, 1 | n + assert_equal 36893488147419103235, n | 3 + assert_equal 36893488147419103232, n | n + assert_equal(-1, n | -1) +end + +assert 'Bigint ^' do + n = 1<<65 + assert_equal 36893488147419103232, n ^ 0 + assert_equal 36893488147419103233, n ^ 1 + assert_equal 36893488147419103235, 3 ^ n + assert_equal 0, n ^ n + assert_equal(-36893488147419103233, n ^ -1) + assert_equal(-36893488147419103231, -n ^ 1) end assert 'Bigint to_s' do @@ -13,6 +132,8 @@ assert_equal(-n, "-11978_571669_96989179607278372168909873645893814254642585755536286462800958278984531968".to_i) n = 0x1197857166996989179607278372168909873645893814254642585755536286462800958278984531968 assert_equal n, "1197857166996989179607278372168909873645893814254642585755536286462800958278984531968".to_i(16) + n = 10 ** 20 + assert_equal "100000000000000000000", n.to_s end assert 'Bigint pow' do @@ -20,4 +141,84 @@ assert_equal n, 2 ** 64 assert_equal n, 1 << 64 assert_equal 2, n >> 63 + + n = 1<<65 + assert_equal n, n ** 1 + assert_equal 1, n ** 0 + assert_equal 1361129467683753853853498429727072845824, n ** 2 + # assert_equal 193128586, n.pow(n, 1234567890) + # assert_equal(-1041439304, n.pow(n, -1234567890)) +end + +assert 'Bigint Integer#pow(e, m) - Montgomery path' do + # Regression: mpz_powm_montgomery() failed to pre-reduce base mod n, + # producing wrong results when base >= n. Also trim() must restore + # the canonical sn=0 when sz becomes 0, otherwise an inconsistent + # zero bignum (sn!=0, sz=0) propagates through the squaring loop. + m = (2**40) + 1 + assert_equal 1, (2**160).pow(2, m) + assert_equal 1, (2**320).pow(2, m) + assert_equal 8, ((2**160) + 1).pow(3, m) + m2 = (2**100) + 3 + assert_equal (3**500) % m2, (3**500).pow(1, m2) + assert_equal ((5**300) ** 7) % m2, (5**300).pow(7, m2) +end + +assert 'Bigint Integer#remainder large operand' do + # Regression: mpz_mod's Barrett path didn't enforce its precondition + # x < 2^(2*bits(m)), so it silently truncated high limbs when x was + # much larger than m^2, producing the wrong remainder. Integer#% + # took the udiv path and worked, but Integer#remainder went through + # mpz_mod and was broken. + m = (2**100) + 3 + assert_equal (3**500) % m, (3**500).remainder(m) + assert_equal (5**500) % ((2**150) + 1), (5**500).remainder((2**150) + 1) + assert_equal (2**400) % ((2**130) + 1), (2**400).remainder((2**130) + 1) +end + +assert 'Bigint abs' do + n = 1<<65 + assert_equal 36893488147419103232, n.abs + assert_equal 36893488147419103232, (-n).abs +end + +assert 'Bigint gcd' do + # zero cases + assert_equal 0, 0.gcd(0) + n = 1 << 200 + assert_equal n, n.gcd(0) + assert_equal n, 0.gcd(n) + + # power-of-2 fast path + assert_equal 1 << 100, (1 << 200).gcd(1 << 100) + assert_equal 1 << 100, (1 << 100).gcd(1 << 200) + assert_equal 1 << 40, (10 ** 50).gcd(1 << 40) + + # negative operands: result is the positive GCD + a = 1 << 200 + b = 3 << 200 + assert_equal a, a.gcd(b) + assert_equal a, (-a).gcd(b) + assert_equal a, a.gcd(-b) + assert_equal a, (-a).gcd(-b) + + # balanced multi-limb with known common factor + fib1000 = (1..1000).inject([0, 1]) { |(x, y), _| [y, x + y] }[0] + common = fib1000 + k, m = 1_000_003, 1_000_033 # small coprime primes + assert_equal common, (common * k).gcd(common * m) + assert_equal common, (common * m).gcd(common * k) + + # unbalanced: small coprime vs large + big = common * k + assert_equal 1, big.gcd(m) + assert_equal 1, m.gcd(big) + + # Fibonacci neighbors are always coprime + f100 = (1..100).inject([0, 1]) { |(x, y), _| [y, x + y] }[0] + f101 = (1..101).inject([0, 1]) { |(x, y), _| [y, x + y] }[0] + assert_equal 1, f100.gcd(f101) + + # Euclidean fallback path: operand sizes differ by several limbs + assert_equal 7, (7 * (1 << 4000)).gcd(7 * 13) end diff --git a/mrbgems/mruby-bin-config/README.md b/mrbgems/mruby-bin-config/README.md new file mode 100644 index 0000000000..bd1fadf473 --- /dev/null +++ b/mrbgems/mruby-bin-config/README.md @@ -0,0 +1,46 @@ +# mruby-bin-config + +mruby-config outputs the configuration used to build mruby, useful for compiling C extensions. + +## Usage + +``` +mruby-config [switches] +``` + +### Options + +- `--cc` - print C compiler name +- `--cflags` - print flags passed to C compiler +- `--cxx` - print C++ compiler name +- `--cxxflags` - print flags passed to C++ compiler +- `--as` - print assembler name +- `--asflags` - print flags passed to assembler +- `--objc` - print Objective C compiler name +- `--objcflags` - print flags passed to Objective C compiler +- `--ld` - print linker name +- `--ldflags` - print flags passed to linker +- `--ldflags-before-libs` - print flags passed to linker before linked libraries +- `--libs` - print linked libraries +- `--libmruby-path` - print libmruby path +- `--help` - print help + +## Examples + +```bash +# Get C compiler +mruby-config --cc + +# Get compiler flags for building extensions +mruby-config --cflags + +# Compile a C extension +gcc $(mruby-config --cflags) -c extension.c + +# Link with mruby +gcc extension.o $(mruby-config --ldflags) $(mruby-config --libs) +``` + +## License + +MIT License - see the mruby LICENSE file. diff --git a/mrbgems/mruby-bin-config/mrbgem.rake b/mrbgems/mruby-bin-config/mrbgem.rake index b4c437640a..3ee4b0e4d1 100644 --- a/mrbgems/mruby-bin-config/mrbgem.rake +++ b/mrbgems/mruby-bin-config/mrbgem.rake @@ -11,24 +11,38 @@ MRuby::Gem::Specification.new('mruby-bin-config') do |spec| else mruby_config_dir = "#{build.build_dir}/bin" end - mruby_config = name + (ENV['OS'] == 'Windows_NT' ? '.bat' : '') + + if ENV['OS'] == 'Windows_NT' + suffix = '.bat' + refvar = '%\\1%' + else + suffix = '' + refvar = '${\\1}' + end + + mruby_config = name + suffix mruby_config_path = "#{mruby_config_dir}/#{mruby_config}" - make_cfg = "#{build.build_dir}/lib/libmruby.flags.mak" + make_cfg = "#{build.build_dir}/#{build.libdir_name}/libmruby.flags.mak" tmplt_path = "#{__dir__}/#{mruby_config}" if iscross build.products << mruby_config_path else - build.bins << mruby_config + build.products << build.define_installer(mruby_config_path) end directory mruby_config_dir - file mruby_config_path => [mruby_config_dir, make_cfg, tmplt_path] do |t| + file mruby_config_path => [__FILE__, mruby_config_dir, make_cfg, tmplt_path] do |t| config = Hash[File.readlines(make_cfg).map!(&:chomp).map! {|l| + l.gsub!(/\$\((\w+)\)/, refvar) l.gsub('\\"', '"').split(' = ', 2).map! {|s| s.sub(/^(?=.)/, 'echo ')} }] tmplt = File.read(tmplt_path) + tmplt.sub!(%r((?<=\A#!/bin/sh\n\n)), <<~SETDIR) + MRUBY_PACKAGE_DIR=$(dirname "$(dirname "$(readlink -f "$0")")") + + SETDIR File.write(t.name, tmplt.gsub(/(#{Regexp.union(*config.keys)})\b/, config)) chmod(0755, t.name) end diff --git a/mrbgems/mruby-bin-config/mruby-config b/mrbgems/mruby-bin-config/mruby-config old mode 100644 new mode 100755 index 3adda9e1a6..27365e650a --- a/mrbgems/mruby-bin-config/mruby-config +++ b/mrbgems/mruby-bin-config/mruby-config @@ -4,8 +4,14 @@ print_help() { echo "Usage: mruby-config [switches]" echo " switches:" - echo " --cc print compiler name" - echo " --cflags print flags passed to compiler" + echo " --cc print C compiler name" + echo " --cflags print flags passed to C compiler" + echo " --cxx print C++ compiler name" + echo " --cxxflags print flags passed to C++ compiler" + echo " --as print assembler name" + echo " --asflags print flags passed to assembler" + echo " --objc print Objective C compiler name" + echo " --objcflags print flags passed to Objective C compiler" echo " --ld print linker name" echo " --ldflags print flags passed to linker" echo " --ldflags-before-libs print flags passed to linker before linked libraries" @@ -23,6 +29,12 @@ while [ $# -gt 0 ]; do case $1 in --cc) echo MRUBY_CC;; --cflags) echo MRUBY_CFLAGS;; + --cxx) echo MRUBY_CXX;; + --cxxflags) echo MRUBY_CXXFLAGS;; + --as) echo MRUBY_AS;; + --asflags) echo MRUBY_ASFLAGS;; + --objc) echo MRUBY_OBJC;; + --objcflags) echo MRUBY_OBJCFLAGS;; --ld) echo MRUBY_LD;; --ldflags) echo MRUBY_LDFLAGS;; --ldflags-before-libs) echo MRUBY_LDFLAGS_BEFORE_LIBS;; diff --git a/mrbgems/mruby-bin-config/mruby-config.bat b/mrbgems/mruby-bin-config/mruby-config.bat index 949fea06f0..9308ccdbf4 100644 --- a/mrbgems/mruby-bin-config/mruby-config.bat +++ b/mrbgems/mruby-bin-config/mruby-config.bat @@ -1,10 +1,18 @@ @echo off +set MRUBY_PACKAGE_DIR=%~dp0.. + :top shift if "%0" equ "" goto :eof if "%0" equ "--cc" goto cc if "%0" equ "--cflags" goto cflags +if "%0" equ "--cxx" goto cxx +if "%0" equ "--cxxflags" goto cxxflags +if "%0" equ "--as" goto as +if "%0" equ "--asflags" goto asflags +if "%0" equ "--objc" goto objc +if "%0" equ "--objcflags" goto objcflags if "%0" equ "--ld" goto ld if "%0" equ "--ldflags" goto ldflags if "%0" equ "--ldflags-before-libs" goto ldflagsbeforelibs @@ -22,6 +30,30 @@ goto top echo MRUBY_CFLAGS goto top +:cxx +echo MRUBY_CXX +goto top + +:cxxflags +echo MRUBY_CXXFLAGS +goto top + +:as +echo MRUBY_AS +goto top + +:asflags +echo MRUBY_ASFLAGS +goto top + +:objc +echo MRUBY_OBJC +goto top + +:objcflags +echo MRUBY_OBJCFLAGS +goto top + :ld echo MRUBY_LD goto top @@ -45,8 +77,14 @@ goto top :showhelp echo Usage: mruby-config [switches] echo switches: -echo --cc print compiler name -echo --cflags print flags passed to compiler +echo --cc print C compiler name +echo --cflags print flags passed to C compiler +echo --cxx print C++ compiler name +echo --cxxflags print flags passed to C++ compiler +echo --as print assembler name +echo --asflags print flags passed to assembler +echo --objc print Objective C compiler name +echo --objcflags print flags passed to Objective C compiler echo --ld print linker name echo --ldflags print flags passed to linker echo --ldflags-before-libs print flags passed to linker before linked libraries diff --git a/mrbgems/mruby-bin-debugger/README.md b/mrbgems/mruby-bin-debugger/README.md new file mode 100644 index 0000000000..64c25fabd1 --- /dev/null +++ b/mrbgems/mruby-bin-debugger/README.md @@ -0,0 +1,63 @@ +# mruby-bin-debugger + +mrdb is the mruby debugger for debugging Ruby scripts. + +## Usage + +``` +mrdb [switches] programfile +``` + +### Options + +- `-b` - load and execute RiteBinary (mrb) file +- `-d` - specify source directory +- `--version` - print the version +- `--copyright` - print the copyright + +## Debugger Commands + +| Command | Abbreviation | Description | +| ------------------ | ------------ | --------------------------- | +| `break` | `b` | Set a breakpoint | +| `continue` | `c` | Continue execution | +| `delete` | `d` | Delete breakpoints | +| `disable` | `dis` | Disable breakpoints | +| `enable` | `en` | Enable breakpoints | +| `eval` | `ev` | Evaluate expression | +| `help` | `h` | Show help | +| `info breakpoints` | `i b` | Show breakpoint information | +| `info locals` | `i l` | Show local variables | +| `list` | `l` | List source code | +| `print` | `p` | Print expression value | +| `quit` | `q` | Quit debugger | +| `run` | `r` | Run program | +| `step` | `s` | Step into | +| `next` | `n` | Step over | + +## Examples + +```bash +# Start debugging a script +mrdb script.rb + +# Debug a compiled binary with source directory +mrdb -b -d /path/to/source script.mrb +``` + +### Debugging Session Example + +``` +$ mrdb script.rb +(mrdb) b 10 # Set breakpoint at line 10 +(mrdb) r # Run the program +(mrdb) p variable # Print variable value +(mrdb) n # Step to next line +(mrdb) i l # Show local variables +(mrdb) c # Continue execution +(mrdb) q # Quit +``` + +## License + +MIT License - see the mruby LICENSE file. diff --git a/mrbgems/mruby-bin-debugger/bintest/mrdb.rb b/mrbgems/mruby-bin-debugger/bintest/mrdb.rb index bc5dc4552c..00d327d1da 100644 --- a/mrbgems/mruby-bin-debugger/bintest/mrdb.rb +++ b/mrbgems/mruby-bin-debugger/bintest/mrdb.rb @@ -1,7 +1,7 @@ require 'open3' require 'tempfile' -class BinTest_MrubyBinDebugger +class BinTest_MRubyBinDebugger @debug1=false @debug2=true @debug3=true @@ -67,9 +67,9 @@ def self.test(rubysource, testcase) cmd = "p a=#{str}" # test case - BinTest_MrubyBinDebugger.test(src, [{:cmd=>cmd[0...1023], :unexp=>'command line too long.'}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>cmd[0...1024], :unexp=>'command line too long.'}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>cmd[0...1025], :exp=>'command line too long.'}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>cmd[0...1023], :unexp=>'command line too long.'}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>cmd[0...1024], :unexp=>'command line too long.'}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>cmd[0...1025], :exp=>'command line too long.'}]) end assert('mruby-bin-debugger(mrdb) command: "break"') do @@ -82,10 +82,10 @@ def self.test(rubysource, testcase) tc << {:cmd=>"br", :unexp=>INVCMD} tc << {:cmd=>"brea", :unexp=>INVCMD} tc << {:cmd=>"break", :unexp=>INVCMD} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"bl", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"breaka", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"bl", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"breaka", :exp=>INVCMD}]) end assert('mruby-bin-debugger(mrdb) command: "continue"') do @@ -93,13 +93,13 @@ def self.test(rubysource, testcase) src = "foo = 'foo'\n" # test case - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"c", :unexp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"co", :unexp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"continu", :unexp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"continue", :unexp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"c", :unexp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"co", :unexp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"continu", :unexp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"continue", :unexp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"cn", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"continuee", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"cn", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"continuee", :exp=>INVCMD}]) end assert('mruby-bin-debugger(mrdb) command: "delete"') do @@ -112,10 +112,10 @@ def self.test(rubysource, testcase) tc << {:cmd=>"de 1", :unexp=>INVCMD} tc << {:cmd=>"delet 1", :unexp=>INVCMD} tc << {:cmd=>"delete 1", :unexp=>INVCMD} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"dd 1", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"deletee 1", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"dd 1", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"deletee 1", :exp=>INVCMD}]) end assert('mruby-bin-debugger(mrdb) command: "disable"') do @@ -128,11 +128,11 @@ def self.test(rubysource, testcase) tc << {:cmd=>"disa", :unexp=>INVCMD} tc << {:cmd=>"disabl", :unexp=>INVCMD} tc << {:cmd=>"disable", :unexp=>INVCMD} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"di", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"disb", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"disablee", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"di", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"disb", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"disablee", :exp=>INVCMD}]) end assert('mruby-bin-debugger(mrdb) command: "enable"') do @@ -145,11 +145,11 @@ def self.test(rubysource, testcase) tc << {:cmd=>"ena", :unexp=>INVCMD} tc << {:cmd=>"enabl", :unexp=>INVCMD} tc << {:cmd=>"enable", :unexp=>INVCMD} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"e", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"enb", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"enablee", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"e", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"enb", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"enablee", :exp=>INVCMD}]) end assert('mruby-bin-debugger(mrdb) command: "eval"') do @@ -161,11 +161,11 @@ def self.test(rubysource, testcase) tc << {:cmd=>"ev", :unexp=>INVCMD} tc << {:cmd=>"eva", :unexp=>INVCMD} tc << {:cmd=>"eval", :unexp=>INVCMD} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"e", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"evl", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"evall", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"e", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"evl", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"evall", :exp=>INVCMD}]) end assert('mruby-bin-debugger(mrdb) command: "help"') do @@ -178,10 +178,10 @@ def self.test(rubysource, testcase) tc << {:cmd=>"he", :unexp=>INVCMD} tc << {:cmd=>"hel", :unexp=>INVCMD} tc << {:cmd=>"help", :unexp=>INVCMD} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"hl", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"helpp", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"hl", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"helpp", :exp=>INVCMD}]) end assert('mruby-bin-debugger(mrdb) command: "info breakpoints"') do @@ -195,12 +195,12 @@ def self.test(rubysource, testcase) tc << {:cmd=>"i br", :unexp=>INVCMD} tc << {:cmd=>"inf breakpoint", :unexp=>INVCMD} tc << {:cmd=>"info breakpoints", :unexp=>INVCMD} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"ii b", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"i bb", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"infoo breakpoints", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"info breakpointss", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"ii b", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"i bb", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"infoo breakpoints", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"info breakpointss", :exp=>INVCMD}]) end assert('mruby-bin-debugger(mrdb) command: "list"') do @@ -213,10 +213,10 @@ def self.test(rubysource, testcase) tc << {:cmd=>"li", :unexp=>INVCMD} tc << {:cmd=>"lis", :unexp=>INVCMD} tc << {:cmd=>"list", :unexp=>INVCMD} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"ll", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"listt", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"ll", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"listt", :exp=>INVCMD}]) end assert('mruby-bin-debugger(mrdb) command: "print"') do @@ -229,10 +229,10 @@ def self.test(rubysource, testcase) tc << {:cmd=>"pr", :unexp=>INVCMD} tc << {:cmd=>"prin", :unexp=>INVCMD} tc << {:cmd=>"print", :unexp=>INVCMD} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"pp", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"printt", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"pp", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"printt", :exp=>INVCMD}]) end assert('mruby-bin-debugger(mrdb) command: "quit"') do @@ -240,13 +240,13 @@ def self.test(rubysource, testcase) src = "foo = 'foo'\n" # test case - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"q", :unexp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"qu", :unexp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"qui", :unexp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"quit", :unexp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"q", :unexp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"qu", :unexp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"qui", :unexp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"quit", :unexp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"qq", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"quitt", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"qq", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"quitt", :exp=>INVCMD}]) end assert('mruby-bin-debugger(mrdb) command: "run"') do @@ -254,12 +254,12 @@ def self.test(rubysource, testcase) src = "foo = 'foo'\n" # test case - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"r", :unexp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"ru", :unexp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"run", :unexp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"r", :unexp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"ru", :unexp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"run", :unexp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"rr", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"runn", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"rr", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"runn", :exp=>INVCMD}]) end assert('mruby-bin-debugger(mrdb) command: "step"') do @@ -276,8 +276,8 @@ def self.test(rubysource, testcase) tc << {:cmd=>"st", :unexp=>INVCMD} tc << {:cmd=>"ste", :unexp=>INVCMD} tc << {:cmd=>"step", :unexp=>INVCMD} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"ss", :exp=>INVCMD}]) - BinTest_MrubyBinDebugger.test(src, [{:cmd=>"stepp", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"ss", :exp=>INVCMD}]) + BinTest_MRubyBinDebugger.test(src, [{:cmd=>"stepp", :exp=>INVCMD}]) end diff --git a/mrbgems/mruby-bin-debugger/bintest/print.rb b/mrbgems/mruby-bin-debugger/bintest/print.rb index 63ebded3e4..fc23c93070 100644 --- a/mrbgems/mruby-bin-debugger/bintest/print.rb +++ b/mrbgems/mruby-bin-debugger/bintest/print.rb @@ -2,7 +2,7 @@ require 'tempfile' require 'strscan' -class BinTest_MrubyBinDebugger +class BinTest_MRubyBinDebugger # @debug1=false # @debug2=true def self.test(rubysource, testcase) @@ -64,7 +64,7 @@ def self.test(rubysource, testcase) tc = [] tc << {:cmd=>"p", :exp=>"Parameter not specified."} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) normal') do @@ -84,7 +84,7 @@ def self.test(rubysource, testcase) tc << {:cmd=>"s"} tc << {:cmd=>"p bar", :exp=>'$4 = "foofoo"'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) error') do @@ -96,7 +96,7 @@ def self.test(rubysource, testcase) tc << {:cmd=>"p (1+2", :exp=>'$1 = line 1: syntax error'} tc << {:cmd=>"p bar", :exp=>'$2 = undefined method'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end # Kernel#instance_eval(string) doesn't work multiple statements. @@ -116,7 +116,7 @@ def self.test(rubysource, testcase) tc << {:cmd=>"s",} tc << {:cmd=>"p x", :exp=>"3"} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end =end @@ -128,7 +128,7 @@ def self.test(rubysource, testcase) tc = [] tc << {:cmd=>"p self", :exp=>'$1 = main'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) scope:class') do @@ -144,7 +144,7 @@ class TestClassScope tc << {:cmd=>"s"} tc << {:cmd=>"p self", :exp=>'$1 = TestClassScope'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) scope:module') do @@ -160,7 +160,7 @@ class TestModuleScope tc << {:cmd=>"s"} tc << {:cmd=>"p self", :exp=>'$1 = TestModuleScope'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) scope:instance method') do @@ -179,7 +179,7 @@ def m tc << {:cmd=>"r"} tc << {:cmd=>"p self", :exp=>'$1 = #"r"} tc << {:cmd=>"p self", :exp=>'$1 = TestClassMethodScope'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) scope:block') do @@ -231,7 +231,7 @@ def m tc << {:cmd=>"c"} tc << {:cmd=>"p self", :exp=>'$3 = #"c"} tc << {:cmd=>"p lv", :exp=>'$3 = "top"'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) same name:instance variable') do @@ -293,7 +293,7 @@ def m tc << {:cmd=>"c"} tc << {:cmd=>"p @iv", :exp=>'$3 = "top"'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end # Kernel#instance_eval(string) doesn't work const. @@ -329,7 +329,7 @@ def m 1.times { tc << {:cmd=>"s"} } tc << {:cmd=>"p CONST", :exp=>"top"} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end =end @@ -353,7 +353,7 @@ def m tc << {:cmd=>"p 1e4", :exp=>'$11 = 10000'} tc << {:cmd=>"p -0.1e-2", :exp=>'$12 = -0.001'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) Literal:String') do @@ -394,7 +394,7 @@ def m tc << {:cmd=>'p %q!\\C-a\\C-z!', :exp=>'$19 = "\\\\C-a\\\\C-z"'} tc << {:cmd=>'p %q!#{foo+bar}!', :exp=>'$20 = "\\#{foo+bar}"'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) Literal:Array') do @@ -416,7 +416,7 @@ def m tc << {:cmd=>'p %w[3.14 A\ &\ B #{foo}]', :exp=>'$4 = ["3.14", "A & B", "\#{foo}"]'} tc << {:cmd=>'p %W[3.14 A\ &\ B #{foo}]', :exp=>'$5 = ["3.14", "A & B", "foo"]'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) Literal:Hash') do @@ -433,13 +433,13 @@ def m tc << {:cmd=>"s"} tc << {:cmd=>'p {}', :exp=>'$1 = {}'} - tc << {:cmd=>'p {"one"=>1,"two"=>2}', :exp=>'$2 = {"one"=>1, "two"=>2}'} - tc << {:cmd=>'p {:eins=>"1", :zwei=>"2", }', :exp=>'$3 = {:eins=>"1", :zwei=>"2"}'} - tc << {:cmd=>'p {uno:"one", dos: 2}', :exp=>'$4 = {:uno=>"one", :dos=>2}'} - tc << {:cmd=>'p {"one"=>1, :zwei=>2, tres:3}', :exp=>'$5 = {"one"=>1, :zwei=>2, :tres=>3}'} - tc << {:cmd=>'p {:foo=>"#{foo}",:bar=>"#{bar}"}', :exp=>'$6 = {:foo=>"foo", :bar=>"bar"}'} + tc << {:cmd=>'p {"one"=>1,"two"=>2}', :exp=>'$2 = {"one" => 1, "two" => 2}'} + tc << {:cmd=>'p {eins: "1", zwei: "2",}', :exp=>'$3 = {eins: "1", zwei: "2"}'} + tc << {:cmd=>'p {uno: "one", dos: 2}', :exp=>'$4 = {uno: "one", dos: 2}'} + tc << {:cmd=>'p {"one"=>1, zwei: 2, tres: 3}', :exp=>'$5 = {"one" => 1, zwei: 2, tres: 3}'} + tc << {:cmd=>'p {foo: "#{foo}",bar: "#{bar}"}', :exp=>'$6 = {foo: "foo", bar: "bar"}'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) Literal:Range') do @@ -456,7 +456,7 @@ def m tc << {:cmd=>'p "1" .. "9"', :exp=>'$5 = "1".."9"'} tc << {:cmd=>'p "A" ... "Z"', :exp=>'$6 = "A"..."Z"'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) Literal:Symbol') do @@ -479,7 +479,7 @@ def m tc << {:cmd=>'p :"#{foo} baz"', :exp=>'$5 = :"foo baz"'} tc << {:cmd=>'p %s!symsym!', :exp=>'$6 = :symsym'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) Unary operation') do @@ -495,7 +495,7 @@ def m tc << {:cmd=>'p !nil', :exp=>'$5 = true'} tc << {:cmd=>'p !1', :exp=>'$6 = false'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) Binary operation') do @@ -545,7 +545,7 @@ def m tc << {:cmd=>'p false or true', :exp=>'$24 = true'} tc << {:cmd=>'p false and true', :exp=>'$25 = false'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) Ternary operation') do @@ -569,7 +569,7 @@ def m tc << {:cmd=>'p false ? "true" : "false"', :exp=>'$4 = "false"'} tc << {:cmd=>'p nil ? "true" : "false"', :exp=>'$5 = "false"'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) Substitution:simple') do @@ -593,7 +593,7 @@ def m tc << {:cmd=>'p undefined=-1', :exp=>'$3 = -1'} tc << {:cmd=>'p "#{undefined}"', :exp=>'$4 = undefined method'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) Substitution:self') do @@ -631,7 +631,7 @@ def m tc << {:cmd=>'p undefined=-1', :exp=>'$14 = -1'} tc << {:cmd=>'p "#{undefined}"', :exp=>'$15 = undefined method'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) Substitution:multiple') do @@ -661,7 +661,7 @@ def m # tc << {:cmd=>'p a,*b=[123, 456, 789]'} # tc << {:cmd=>'p [a,b]', :exp=>'[123, [456, 789]]'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end assert('mruby-bin-debugger(print) Substitution:self') do @@ -699,5 +699,5 @@ def m tc << {:cmd=>'p undefined=-1', :exp=>'$14 = -1'} tc << {:cmd=>'p "#{undefined}"', :exp=>'$15 = undefined method'} - BinTest_MrubyBinDebugger.test(src, tc) + BinTest_MRubyBinDebugger.test(src, tc) end diff --git a/mrbgems/mruby-bin-debugger/mrbgem.rake b/mrbgems/mruby-bin-debugger/mrbgem.rake index 091851dd43..3372950c53 100644 --- a/mrbgems/mruby-bin-debugger/mrbgem.rake +++ b/mrbgems/mruby-bin-debugger/mrbgem.rake @@ -2,8 +2,9 @@ MRuby::Gem::Specification.new('mruby-bin-debugger') do |spec| spec.license = 'MIT' spec.author = 'mruby developers' spec.summary = 'mruby debugger command' - spec.build.defines |= %w(MRB_USE_DEBUG_HOOK) + spec.build.defines << "MRB_USE_DEBUG_HOOK" spec.add_dependency('mruby-eval', :core => 'mruby-eval') + spec.add_test_dependency('mruby-bin-mrbc', :core => 'mruby-bin-mrbc') spec.bins = %w(mrdb) end diff --git a/mrbgems/mruby-bin-debugger/tools/mrdb/apibreak.c b/mrbgems/mruby-bin-debugger/tools/mrdb/apibreak.c index a69d65dd9c..6eb4973f17 100644 --- a/mrbgems/mruby-bin-debugger/tools/mrdb/apibreak.c +++ b/mrbgems/mruby-bin-debugger/tools/mrdb/apibreak.c @@ -65,21 +65,32 @@ static int32_t get_break_index(mrb_debug_context *dbg, uint32_t bpno) { uint32_t i; - int32_t index; - char hit = FALSE; - for(i = 0 ; i < dbg->bpnum; i++) { - if (dbg->bp[i].bpno == bpno) { - hit = TRUE; - index = i; - break; - } + for (i = 0; i < dbg->bpnum; i++) { + if (dbg->bp[i].bpno == bpno) return i; } + return MRB_DEBUG_BREAK_INVALID_NO; +} - if (hit == FALSE) { - return MRB_DEBUG_BREAK_INVALID_NO; +static int32_t +alloc_breakpoint(mrb_debug_context *dbg, mrb_debug_bptype type) +{ + int32_t index; + + if (dbg->bpnum >= MAX_BREAKPOINT) { + return MRB_DEBUG_BREAK_NUM_OVER; + } + if (dbg->next_bpno > MAX_BREAKPOINTNO) { + return MRB_DEBUG_BREAK_NO_OVER; } + index = dbg->bpnum; + dbg->bp[index].bpno = dbg->next_bpno; + dbg->next_bpno++; + dbg->bp[index].enable = TRUE; + dbg->bp[index].type = type; + dbg->bpnum++; + return index; } @@ -189,21 +200,12 @@ int32_t mrb_debug_set_break_line(mrb_state *mrb, mrb_debug_context *dbg, const char *file, uint16_t lineno) { int32_t index; - char* set_file; uint16_t result; if ((mrb == NULL)||(dbg == NULL)||(file == NULL)) { return MRB_DEBUG_INVALID_ARGUMENT; } - if (dbg->bpnum >= MAX_BREAKPOINT) { - return MRB_DEBUG_BREAK_NUM_OVER; - } - - if (dbg->next_bpno > MAX_BREAKPOINTNO) { - return MRB_DEBUG_BREAK_NO_OVER; - } - /* file and lineno check. */ result = check_file_lineno(mrb, dbg->root_irep, file, lineno); if (result == 0) { @@ -213,17 +215,11 @@ mrb_debug_set_break_line(mrb_state *mrb, mrb_debug_context *dbg, const char *fil return MRB_DEBUG_BREAK_INVALID_LINENO; } - set_file = mrdb_strdup(mrb, file); + index = alloc_breakpoint(dbg, MRB_DEBUG_BPTYPE_LINE); + if (index < 0) return index; - index = dbg->bpnum; - dbg->bp[index].bpno = dbg->next_bpno; - dbg->next_bpno++; - dbg->bp[index].enable = TRUE; - dbg->bp[index].type = MRB_DEBUG_BPTYPE_LINE; + dbg->bp[index].point.linepoint.file = mrdb_strdup(mrb, file); dbg->bp[index].point.linepoint.lineno = lineno; - dbg->bpnum++; - - dbg->bp[index].point.linepoint.file = set_file; return dbg->bp[index].bpno; } @@ -239,34 +235,22 @@ mrb_debug_set_break_method(mrb_state *mrb, mrb_debug_context *dbg, const char *c return MRB_DEBUG_INVALID_ARGUMENT; } - if (dbg->bpnum >= MAX_BREAKPOINT) { - return MRB_DEBUG_BREAK_NUM_OVER; - } - - if (dbg->next_bpno > MAX_BREAKPOINTNO) { - return MRB_DEBUG_BREAK_NO_OVER; - } - - if (class_name != NULL) { - set_class = mrdb_strdup(mrb, class_name); - } - else { - set_class = NULL; - } - + set_class = class_name != NULL ? mrdb_strdup(mrb, class_name) : NULL; set_method = mrdb_strdup(mrb, method_name); if (set_method == NULL) { mrb_free(mrb, set_class); + return MRB_DEBUG_NOBUF; + } + + index = alloc_breakpoint(dbg, MRB_DEBUG_BPTYPE_METHOD); + if (index < 0) { + mrb_free(mrb, set_method); + mrb_free(mrb, set_class); + return index; } - index = dbg->bpnum; - dbg->bp[index].bpno = dbg->next_bpno; - dbg->next_bpno++; - dbg->bp[index].enable = TRUE; - dbg->bp[index].type = MRB_DEBUG_BPTYPE_METHOD; dbg->bp[index].point.methodpoint.method_name = set_method; dbg->bp[index].point.methodpoint.class_name = set_class; - dbg->bpnum++; return dbg->bp[index].bpno; } @@ -341,7 +325,7 @@ mrb_debug_delete_break(mrb_state *mrb, mrb_debug_context *dbg, uint32_t bpno) free_breakpoint(mrb, &dbg->bp[index]); - for(i = index ; i < dbg->bpnum; i++) { + for (i = index; i < dbg->bpnum; i++) { if ((i + 1) == dbg->bpnum) { dbg->bp[i] = (mrb_debug_breakpoint){0}; } @@ -364,7 +348,7 @@ mrb_debug_delete_break_all(mrb_state *mrb, mrb_debug_context *dbg) return MRB_DEBUG_INVALID_ARGUMENT; } - for(i = 0 ; i < dbg->bpnum ; i++) { + for (i = 0; i < dbg->bpnum; i++) { free_breakpoint(mrb, &dbg->bp[i]); } @@ -401,7 +385,7 @@ mrb_debug_enable_break_all(mrb_state *mrb, mrb_debug_context *dbg) return MRB_DEBUG_INVALID_ARGUMENT; } - for(i = 0 ; i < dbg->bpnum; i++) { + for (i = 0; i < dbg->bpnum; i++) { dbg->bp[i].enable = TRUE; } @@ -436,7 +420,7 @@ mrb_debug_disable_break_all(mrb_state *mrb, mrb_debug_context *dbg) return MRB_DEBUG_INVALID_ARGUMENT; } - for(i = 0 ; i < dbg->bpnum; i++) { + for (i = 0; i < dbg->bpnum; i++) { dbg->bp[i].enable = FALSE; } @@ -470,7 +454,7 @@ mrb_debug_check_breakpoint_line(mrb_state *mrb, mrb_debug_context *dbg, const ch } bp = dbg->bp; - for(i=0; ibpnum; i++) { + for (i=0; ibpnum; i++) { switch (bp->type) { case MRB_DEBUG_BPTYPE_LINE: if (bp->enable == TRUE) { @@ -504,7 +488,7 @@ mrb_debug_check_breakpoint_method(mrb_state *mrb, mrb_debug_context *dbg, struct } bp = dbg->bp; - for(i=0; ibpnum; i++) { + for (i=0; ibpnum; i++) { if (bp->type == MRB_DEBUG_BPTYPE_METHOD) { if (bp->enable == TRUE) { bpno = compare_break_method(mrb, bp, class_obj, method_sym, isCfunc); diff --git a/mrbgems/mruby-bin-debugger/tools/mrdb/apibreak.h b/mrbgems/mruby-bin-debugger/tools/mrdb/apibreak.h index 08f1d8080f..cc272cbebc 100644 --- a/mrbgems/mruby-bin-debugger/tools/mrdb/apibreak.h +++ b/mrbgems/mruby-bin-debugger/tools/mrdb/apibreak.h @@ -9,18 +9,18 @@ #include #include "mrdb.h" -int32_t mrb_debug_set_break_line(mrb_state *, mrb_debug_context *, const char *, uint16_t); -int32_t mrb_debug_set_break_method(mrb_state *, mrb_debug_context *, const char *, const char *); -int32_t mrb_debug_get_breaknum(mrb_state *, mrb_debug_context *); -int32_t mrb_debug_get_break_all(mrb_state *, mrb_debug_context *, uint32_t, mrb_debug_breakpoint bp[]); -int32_t mrb_debug_get_break(mrb_state *, mrb_debug_context *, uint32_t, mrb_debug_breakpoint *); -int32_t mrb_debug_delete_break(mrb_state *, mrb_debug_context *, uint32_t); -int32_t mrb_debug_delete_break_all(mrb_state *, mrb_debug_context *); -int32_t mrb_debug_enable_break(mrb_state *, mrb_debug_context *, uint32_t); -int32_t mrb_debug_enable_break_all(mrb_state *, mrb_debug_context *); -int32_t mrb_debug_disable_break(mrb_state *, mrb_debug_context *, uint32_t); -int32_t mrb_debug_disable_break_all(mrb_state *, mrb_debug_context *); -int32_t mrb_debug_check_breakpoint_line(mrb_state *, mrb_debug_context *, const char *, uint16_t); -int32_t mrb_debug_check_breakpoint_method(mrb_state *, mrb_debug_context *, struct RClass *, mrb_sym, mrb_bool*); +int32_t mrb_debug_set_break_line(mrb_state*, mrb_debug_context*, const char*, uint16_t); +int32_t mrb_debug_set_break_method(mrb_state*, mrb_debug_context*, const char*, const char*); +int32_t mrb_debug_get_breaknum(mrb_state*, mrb_debug_context*); +int32_t mrb_debug_get_break_all(mrb_state*, mrb_debug_context*, uint32_t, mrb_debug_breakpoint bp[]); +int32_t mrb_debug_get_break(mrb_state*, mrb_debug_context*, uint32_t, mrb_debug_breakpoint*); +int32_t mrb_debug_delete_break(mrb_state*, mrb_debug_context*, uint32_t); +int32_t mrb_debug_delete_break_all(mrb_state*, mrb_debug_context*); +int32_t mrb_debug_enable_break(mrb_state*, mrb_debug_context*, uint32_t); +int32_t mrb_debug_enable_break_all(mrb_state*, mrb_debug_context*); +int32_t mrb_debug_disable_break(mrb_state*, mrb_debug_context*, uint32_t); +int32_t mrb_debug_disable_break_all(mrb_state*, mrb_debug_context*); +int32_t mrb_debug_check_breakpoint_line(mrb_state*, mrb_debug_context*, const char*, uint16_t); +int32_t mrb_debug_check_breakpoint_method(mrb_state*, mrb_debug_context*, struct RClass*, mrb_sym, mrb_bool*); #endif /* APIBREAK_H_ */ diff --git a/mrbgems/mruby-bin-debugger/tools/mrdb/apilist.c b/mrbgems/mruby-bin-debugger/tools/mrdb/apilist.c index 27db02b48e..0fc087bdf9 100644 --- a/mrbgems/mruby-bin-debugger/tools/mrdb/apilist.c +++ b/mrbgems/mruby-bin-debugger/tools/mrdb/apilist.c @@ -200,7 +200,7 @@ mrb_debug_get_source(mrb_state *mrb, mrdb_state *mrdb, const char *srcpath, cons break; } - mrb_free(mrb, (void *)search_path[1]); + mrb_free(mrb, (void*)search_path[1]); return path; } diff --git a/mrbgems/mruby-bin-debugger/tools/mrdb/apilist.h b/mrbgems/mruby-bin-debugger/tools/mrdb/apilist.h index 6c41078851..542fda1049 100644 --- a/mrbgems/mruby-bin-debugger/tools/mrdb/apilist.h +++ b/mrbgems/mruby-bin-debugger/tools/mrdb/apilist.h @@ -8,7 +8,7 @@ #include #include "mrdb.h" -int32_t mrb_debug_list(mrb_state *, mrb_debug_context *, char *, uint16_t, uint16_t); -char* mrb_debug_get_source(mrb_state *, mrdb_state *, const char *, const char *); +int32_t mrb_debug_list(mrb_state*, mrb_debug_context*, char*, uint16_t, uint16_t); +char* mrb_debug_get_source(mrb_state*, mrdb_state*, const char*, const char *); #endif /* APILIST_H_ */ diff --git a/mrbgems/mruby-bin-debugger/tools/mrdb/apiprint.c b/mrbgems/mruby-bin-debugger/tools/mrdb/apiprint.c index 9093a4f0ee..c78f3b6289 100644 --- a/mrbgems/mruby-bin-debugger/tools/mrdb/apiprint.c +++ b/mrbgems/mruby-bin-debugger/tools/mrdb/apiprint.c @@ -11,30 +11,30 @@ #include #include #include -#include +#include #include "apiprint.h" static void mrdb_check_syntax(mrb_state *mrb, mrb_debug_context *dbg, const char *expr, size_t len) { - mrbc_context *c; + mrb_ccontext *c; - c = mrbc_context_new(mrb); + c = mrb_ccontext_new(mrb); c->no_exec = TRUE; c->capture_errors = TRUE; - mrbc_filename(mrb, c, (const char*)dbg->prvfile); + mrb_ccontext_filename(mrb, c, (const char*)dbg->prvfile); c->lineno = dbg->prvline; /* Load program */ mrb_load_nstring_cxt(mrb, expr, len, c); - mrbc_context_free(mrb, c); + mrb_ccontext_free(mrb, c); } mrb_value mrb_debug_eval(mrb_state *mrb, mrb_debug_context *dbg, const char *expr, size_t len, mrb_bool *exc, int direct_eval) { - void (*tmp)(struct mrb_state *, const struct mrb_irep *, const mrb_code *, mrb_value *); + void (*tmp)(mrb_state*, const struct mrb_irep*, const mrb_code*, mrb_value*); mrb_value ruby_code; mrb_value s; mrb_value v; @@ -68,14 +68,17 @@ mrb_debug_eval(mrb_state *mrb, mrb_debug_context *dbg, const char *expr, size_t recv = dbg->regs[0]; - v = mrb_funcall_id(mrb, recv, MRB_SYM(instance_eval), 1, ruby_code); + v = mrb_funcall_argv(mrb, recv, MRB_SYM(instance_eval), 1, &ruby_code); } - - if (exc) { - *exc = mrb_obj_is_kind_of(mrb, v, mrb->eException_class); + mrb_bool is_exc = mrb_obj_is_kind_of(mrb, v, E_EXCEPTION); + if (is_exc) { + s = mrb_exc_get_output(mrb, mrb_obj_ptr(v)); + } + else { + s = mrb_inspect(mrb, v); } - s = mrb_inspect(mrb, v); + if (exc) *exc = is_exc; /* enable code_fetch_hook */ mrb->code_fetch_hook = tmp; diff --git a/mrbgems/mruby-bin-debugger/tools/mrdb/apistring.h b/mrbgems/mruby-bin-debugger/tools/mrdb/apistring.h index 33737e7fd8..4e77a55ff2 100644 --- a/mrbgems/mruby-bin-debugger/tools/mrdb/apistring.h +++ b/mrbgems/mruby-bin-debugger/tools/mrdb/apistring.h @@ -5,7 +5,7 @@ #ifndef APISTRING_H_ #define APISTRING_H_ -#include "mruby.h" +#include /* both functions return a null pointer on failure */ char *mrdb_strndup(mrb_state *mrb, const char *s, size_t size); diff --git a/mrbgems/mruby-bin-debugger/tools/mrdb/cmdbreak.c b/mrbgems/mruby-bin-debugger/tools/mrdb/cmdbreak.c index bc9937e947..7dff7e2b2d 100644 --- a/mrbgems/mruby-bin-debugger/tools/mrdb/cmdbreak.c +++ b/mrbgems/mruby-bin-debugger/tools/mrdb/cmdbreak.c @@ -26,7 +26,7 @@ #define BREAK_ERR_MSG_INVALIDARG "Internal error." #define BREAK_ERR_MSG_BLANK "Try \'help break\' for more information." #define BREAK_ERR_MSG_RANGEOVER "The line number range is from 1 to 65535." -#define BREAK_ERR_MSG_NUMOVER "Exceeded the setable number of breakpoint." +#define BREAK_ERR_MSG_NUMOVER "Exceeded the settable number of breakpoint." #define BREAK_ERR_MSG_NOOVER "Breakno is over the available number.Please 'quit' and restart mrdb." #define BREAK_ERR_MSG_INVALIDSTR "String \'%s\' is invalid.\n" #define BREAK_ERR_MSG_INVALIDLINENO "Line %d in file \"%s\" is unavailable.\n" @@ -42,8 +42,8 @@ #define LINENO_MAX_DIGIT 6 #define BPNO_LETTER_NUM 9 -typedef int32_t (*all_command_func)(mrb_state *, mrb_debug_context *); -typedef int32_t (*select_command_func)(mrb_state *, mrb_debug_context *, uint32_t); +typedef int32_t (*all_command_func)(mrb_state*, mrb_debug_context*); +typedef int32_t (*select_command_func)(mrb_state*, mrb_debug_context*, uint32_t); static void print_api_common_error(int32_t error) @@ -61,7 +61,7 @@ print_api_common_error(int32_t error) #define STRTOUL(ul,s) { \ int i; \ ul = 0; \ - for(i=0; ISDIGIT(s[i]); i++) ul = 10*ul + (s[i] -'0'); \ + for (i=0; ISDIGIT(s[i]); i++) ul = 10*ul + (s[i] -'0'); \ } static int32_t @@ -106,7 +106,7 @@ exe_set_command_select(mrb_state *mrb, mrdb_state *mrdb, select_command_func fun int32_t bpno = 0; int32_t i; - for(i=1; iwcnt; i++) { + for (i=1; iwcnt; i++) { ps = mrdb->words[i]; bpno = parse_breakpoint_no(ps); if (bpno == 0) { @@ -199,7 +199,7 @@ info_break_all(mrb_state *mrb, mrdb_state *mrdb) return; } puts(BREAK_INFO_MSG_HEADER); - for(i = 0 ; i < bpnum ; i++) { + for (i = 0; i < bpnum; i++) { print_breakpoint(&bp_list[i]); } @@ -216,7 +216,7 @@ info_break_select(mrb_state *mrb, mrdb_state *mrdb) mrb_bool isFirst = TRUE; int32_t i; - for(i=2; iwcnt; i++) { + for (i=2; iwcnt; i++) { ps = mrdb->words[i]; bpno = parse_breakpoint_no(ps); if (bpno == 0) { @@ -397,40 +397,30 @@ dbgcmd_info_break(mrb_state *mrb, mrdb_state *mrdb) return DBGST_PROMPT; } -dbgcmd_state -dbgcmd_delete(mrb_state *mrb, mrdb_state *mrdb) +static dbgcmd_state +dbgcmd_set_breakpoint(mrb_state *mrb, mrdb_state *mrdb, + all_command_func all_func, select_command_func select_func) { - mrb_bool ret = FALSE; - - ret = exe_set_command_all(mrb, mrdb, mrb_debug_delete_break_all); - if (ret != TRUE) { - exe_set_command_select(mrb, mrdb, mrb_debug_delete_break); + if (!exe_set_command_all(mrb, mrdb, all_func)) { + exe_set_command_select(mrb, mrdb, select_func); } - return DBGST_PROMPT; } dbgcmd_state -dbgcmd_enable(mrb_state *mrb, mrdb_state *mrdb) +dbgcmd_delete(mrb_state *mrb, mrdb_state *mrdb) { - mrb_bool ret = FALSE; - - ret = exe_set_command_all(mrb, mrdb, mrb_debug_enable_break_all); - if (ret != TRUE) { - exe_set_command_select(mrb, mrdb, mrb_debug_enable_break); - } + return dbgcmd_set_breakpoint(mrb, mrdb, mrb_debug_delete_break_all, mrb_debug_delete_break); +} - return DBGST_PROMPT; +dbgcmd_state +dbgcmd_enable(mrb_state *mrb, mrdb_state *mrdb) +{ + return dbgcmd_set_breakpoint(mrb, mrdb, mrb_debug_enable_break_all, mrb_debug_enable_break); } dbgcmd_state dbgcmd_disable(mrb_state *mrb, mrdb_state *mrdb) { - mrb_bool ret = FALSE; - - ret = exe_set_command_all(mrb, mrdb, mrb_debug_disable_break_all); - if (ret != TRUE) { - exe_set_command_select(mrb, mrdb, mrb_debug_disable_break); - } - return DBGST_PROMPT; + return dbgcmd_set_breakpoint(mrb, mrdb, mrb_debug_disable_break_all, mrb_debug_disable_break); } diff --git a/mrbgems/mruby-bin-debugger/tools/mrdb/cmdmisc.c b/mrbgems/mruby-bin-debugger/tools/mrdb/cmdmisc.c index 0714f3f213..6beed461da 100644 --- a/mrbgems/mruby-bin-debugger/tools/mrdb/cmdmisc.c +++ b/mrbgems/mruby-bin-debugger/tools/mrdb/cmdmisc.c @@ -167,7 +167,8 @@ parse_uint(char **sp, uint16_t *n) return FALSE; } - for (p = *sp; *p != '\0' && ISDIGIT(*p); p++) ; + for (p = *sp; *p != '\0' && ISDIGIT(*p); p++) + ; if (p != *sp && (i = atoi(*sp)) >= 0) { *n = (uint16_t)i; @@ -269,35 +270,37 @@ replace_ext(mrb_state *mrb, const char *filename, const char *ext) return s; } +/* parse: | : | */ +static void +parse_file_line_spec(mrb_state *mrb, char *arg, listcmd_parser_state *st) +{ + char *p = arg; + + if (parse_lineno(mrb, &p, st)) { + /* matched or , */ + } + else if (parse_filename(mrb, &p, st)) { + if (skip_char(&p, ':') && !parse_lineno(mrb, &p, st)) { + st->parse_error = TRUE; + } + } + else { + st->parse_error = TRUE; + } + if (*p != '\0') { + st->parse_error = TRUE; + } +} + static mrb_bool parse_listcmd_args(mrb_state *mrb, mrdb_state *mrdb, listcmd_parser_state *st) { - char *p; - switch (mrdb->wcnt) { case 2: - p = mrdb->words[1]; - - /* mrdb->words[1] ::= | ':' | */ - if (!parse_lineno(mrb, &p, st)) { - if (parse_filename(mrb, &p, st)) { - if (skip_char(&p, ':')) { - if (!parse_lineno(mrb, &p, st)) { - st->parse_error = TRUE; - } - } - } - else { - st->parse_error = TRUE; - } - } - if (*p != '\0') { - st->parse_error = TRUE; - } + parse_file_line_spec(mrb, mrdb->words[1], st); break; case 1: case 0: - /* do nothing */ break; default: st->parse_error = TRUE; @@ -350,9 +353,9 @@ check_cmd_pattern(const char *pattern, const char *cmd) } p = lbracket + 1; - q = (char *)cmd + (lbracket - pattern); + q = (char*)cmd + (lbracket - pattern); - for ( ; p < rbracket && *q != '\0'; p++, q++) { + for (; p < rbracket && *q != '\0'; p++, q++) { if (*p != *q) { break; } @@ -479,7 +482,8 @@ dbgcmd_quit(mrb_state *mrb, mrdb_state *mrdb) break; } c = buf; - while (buf != '\n' && (buf = getchar()) != EOF) ; + while (buf != '\n' && (buf = getchar()) != EOF) + ; if (c == 'y' || c == 'Y') { mrdb->dbg->xm = DBG_QUIT; @@ -499,9 +503,7 @@ dbgcmd_quit(mrb_state *mrb, mrdb_state *mrdb) } if (mrdb->dbg->xm == DBG_QUIT) { - struct RClass *exc; - exc = mrb_define_class(mrb, "DebuggerExit", mrb->eException_class); - mrb_raise(mrb, exc, "Exit mrdb"); + raise_debugger_exception(mrb, "DebuggerExit", "Exit mrdb"); } return DBGST_PROMPT; } diff --git a/mrbgems/mruby-bin-debugger/tools/mrdb/cmdprint.c b/mrbgems/mruby-bin-debugger/tools/mrdb/cmdprint.c index f78c1e1fc1..1e09f6da18 100644 --- a/mrbgems/mruby-bin-debugger/tools/mrdb/cmdprint.c +++ b/mrbgems/mruby-bin-debugger/tools/mrdb/cmdprint.c @@ -13,39 +13,38 @@ #include #include "apiprint.h" +static uint32_t +next_print_no(mrdb_state *mrdb) +{ + uint32_t no = mrdb->print_no++; + if (mrdb->print_no == 0) mrdb->print_no = 1; + return no; +} + dbgcmd_state dbgcmd_print(mrb_state *mrb, mrdb_state *mrdb) { - mrb_value expr; - mrb_value result; - uint8_t wcnt; - int ai; - if (mrdb->wcnt <= 1) { puts("Parameter not specified."); return DBGST_PROMPT; } - ai = mrb_gc_arena_save(mrb); + int ai = mrb_gc_arena_save(mrb); /* eval expr */ - expr = mrb_str_new_cstr(mrb, NULL); - for (wcnt=1; wcntwcnt; wcnt++) { + mrb_value expr = mrb_str_new_cstr(mrb, NULL); + for (uint8_t wcnt=1; wcntwcnt; wcnt++) { expr = mrb_str_cat_lit(mrb, expr, " "); expr = mrb_str_cat_cstr(mrb, expr, mrdb->words[wcnt]); } - result = mrb_debug_eval(mrb, mrdb->dbg, RSTRING_PTR(expr), RSTRING_LEN(expr), NULL, 0); + mrb_value result = mrb_debug_eval(mrb, mrdb->dbg, RSTRING_PTR(expr), RSTRING_LEN(expr), NULL, 0); /* $print_no = result */ - printf("$%lu = ", (unsigned long)mrdb->print_no++); + printf("$%lu = ", (unsigned long)next_print_no(mrdb)); fwrite(RSTRING_PTR(result), RSTRING_LEN(result), 1, stdout); putc('\n', stdout); - if (mrdb->print_no == 0) { - mrdb->print_no = 1; - } - mrb_gc_arena_restore(mrb, ai); return DBGST_PROMPT; @@ -60,20 +59,11 @@ dbgcmd_eval(mrb_state *mrb, mrdb_state *mrdb) dbgcmd_state dbgcmd_info_local(mrb_state *mrb, mrdb_state *mrdb) { - mrb_value result; - mrb_value s; - int ai; - - ai = mrb_gc_arena_save(mrb); - - result = mrb_debug_eval(mrb, mrdb->dbg, "local_variables", 0, NULL, 1); + int ai = mrb_gc_arena_save(mrb); - s = mrb_str_cat_lit(mrb, result, "\0"); - printf("$%lu = %s\n", (unsigned long)mrdb->print_no++, RSTRING_PTR(s)); - - if (mrdb->print_no == 0) { - mrdb->print_no = 1; - } + mrb_value result = mrb_debug_eval(mrb, mrdb->dbg, "local_variables", 0, NULL, 1); + mrb_value s = mrb_str_cat_lit(mrb, result, "\0"); + printf("$%lu = %s\n", (unsigned long)next_print_no(mrdb), RSTRING_PTR(s)); mrb_gc_arena_restore(mrb, ai); diff --git a/mrbgems/mruby-bin-debugger/tools/mrdb/cmdrun.c b/mrbgems/mruby-bin-debugger/tools/mrdb/cmdrun.c index fe8cf0aa7f..dd914af77b 100644 --- a/mrbgems/mruby-bin-debugger/tools/mrdb/cmdrun.c +++ b/mrbgems/mruby-bin-debugger/tools/mrdb/cmdrun.c @@ -17,10 +17,8 @@ dbgcmd_run(mrb_state *mrb, mrdb_state *mrdb) else { dbg->xm = DBG_QUIT; if (dbg->xphase == DBG_PHASE_RUNNING){ - struct RClass *exc; puts("Start it from the beginning"); - exc = mrb_define_class(mrb, "DebuggerRestart", mrb->eException_class); - mrb_raise(mrb, exc, "Restart mrdb"); + raise_debugger_exception(mrb, "DebuggerRestart", "Restart mrdb"); } } diff --git a/mrbgems/mruby-bin-debugger/tools/mrdb/mrdb.c b/mrbgems/mruby-bin-debugger/tools/mrdb/mrdb.c index 340a57b56c..42fa2fa621 100644 --- a/mrbgems/mruby-bin-debugger/tools/mrdb/mrdb.c +++ b/mrbgems/mruby-bin-debugger/tools/mrdb/mrdb.c @@ -20,7 +20,7 @@ #include "apibreak.h" #include "apilist.h" -void mrdb_state_free(mrb_state *); +void mrdb_state_free(mrb_state*); static mrb_debug_context *_debug_context = NULL; static mrdb_state *_mrdb_state = NULL; @@ -113,7 +113,7 @@ parse_args(mrb_state *mrb, int argc, char **argv, struct _args *args) char *buf; buflen = strlen(item) + 1; - buf = (char *)mrb_malloc(mrb, buflen); + buf = (char*)mrb_malloc(mrb, buflen); memcpy(buf, item, buflen); args->srcpath = buf; } @@ -123,8 +123,7 @@ parse_args(mrb_state *mrb, int argc, char **argv, struct _args *args) srcpathlen = strlen(args->srcpath); itemlen = strlen(item); - args->srcpath = - (char *)mrb_realloc(mrb, args->srcpath, srcpathlen + itemlen + 2); + args->srcpath = (char*)mrb_realloc(mrb, args->srcpath, srcpathlen + itemlen + 2); args->srcpath[srcpathlen] = '\n'; memcpy(args->srcpath + srcpathlen + 1, item, itemlen + 1); } @@ -280,7 +279,7 @@ get_command(mrb_state *mrb, mrdb_state *mrdb) } if (i == MAX_COMMAND_LINE) { - for ( ; (c=getchar()) != EOF && c !='\n'; i++) ; + for (; (c=getchar()) != EOF && c !='\n'; i++) ; } if (i > MAX_COMMAND_LINE) { @@ -297,7 +296,8 @@ pick_out_word(mrb_state *mrb, char **pp) { char *ps; - for (ps=*pp; ISBLANK(*ps); ps++) ; + for (ps=*pp; ISBLANK(*ps); ps++) + ; if (*ps == '\0') { return NULL; } @@ -322,12 +322,48 @@ pick_out_word(mrb_state *mrb, char **pp) return ps; } +/* find first command entry matching word1 (ignoring cmd2) */ +static debug_command* +find_command_by_word1(const char *word1) +{ + debug_command *cmd; + size_t wlen = strlen(word1); + + for (cmd=(debug_command*)debug_command_list; cmd->cmd1; cmd++) { + if (wlen >= cmd->len1 && strncmp(word1, cmd->cmd1, wlen) == 0) { + return cmd; + } + } + return NULL; +} + +/* find command entry matching both word1 and word2 */ +static debug_command* +find_command_by_words(const char *word1, const char *word2) +{ + debug_command *cmd; + size_t wlen; + + for (cmd=(debug_command*)debug_command_list; cmd->cmd1; cmd++) { + wlen = strlen(word1); + if (wlen < cmd->len1 || strncmp(word1, cmd->cmd1, wlen)) { + continue; + } + if (!cmd->cmd2) return cmd; /* word #1 only match */ + if (word2 == NULL) continue; /* word #2 not specified */ + wlen = strlen(word2); + if (wlen >= cmd->len2 && strncmp(word2, cmd->cmd2, wlen) == 0) { + return cmd; /* word #1 and #2 match */ + } + } + return NULL; +} + static debug_command* parse_command(mrb_state *mrb, mrdb_state *mrdb, char *buf) { - debug_command *cmd = NULL; + debug_command *cmd; char *p = buf; - size_t wlen; /* get word #1 */ mrdb->words[0] = pick_out_word(mrb, &p); @@ -336,57 +372,34 @@ parse_command(mrb_state *mrb, mrdb_state *mrdb, char *buf) } mrdb->wcnt = 1; /* set remain parameter */ - for ( ; *p && ISBLANK(*p); p++) ; + for (; *p && ISBLANK(*p); p++) + ; if (*p) { mrdb->words[mrdb->wcnt++] = p; } - /* check word #1 */ - for (cmd=(debug_command*)debug_command_list; cmd->cmd1; cmd++) { - wlen = strlen(mrdb->words[0]); - if (wlen >= cmd->len1 && - strncmp(mrdb->words[0], cmd->cmd1, wlen) == 0) { - break; - } - } - - if (cmd->cmd2) { - if (mrdb->wcnt > 1) { - /* get word #2 */ - mrdb->words[1] = pick_out_word(mrb, &p); - if (mrdb->words[1]) { - /* update remain parameter */ - for ( ; *p && ISBLANK(*p); p++) ; - if (*p) { - mrdb->words[mrdb->wcnt++] = p; - } - } - } - - /* check word #1,#2 */ - for ( ; cmd->cmd1; cmd++) { - wlen = strlen(mrdb->words[0]); - if (wlen < cmd->len1 || - strncmp(mrdb->words[0], cmd->cmd1, wlen)) { - continue; - } - - if (!cmd->cmd2) break; /* word #1 only */ - - if (mrdb->wcnt == 1) continue; /* word #2 not specified */ + cmd = find_command_by_word1(mrdb->words[0]); + if (!cmd) return NULL; - wlen = strlen(mrdb->words[1]); - if (wlen >= cmd->len2 && - strncmp(mrdb->words[1], cmd->cmd2, wlen) == 0) { - break; /* word #1 and #2 */ + /* if matched command has a sub-command, try word #1 + #2 */ + if (cmd->cmd2 && mrdb->wcnt > 1) { + mrdb->words[1] = pick_out_word(mrb, &p); + if (mrdb->words[1]) { + /* update remain parameter */ + for (; *p && ISBLANK(*p); p++) + ; + if (*p) { + mrdb->words[mrdb->wcnt++] = p; } } + cmd = find_command_by_words(mrdb->words[0], mrdb->words[1]); + if (!cmd) return NULL; } /* divide remain parameters */ - if (cmd->cmd1 && cmd->div) { + if (cmd->div) { p = mrdb->words[--mrdb->wcnt]; - for ( ; mrdb->wcntwcnt++) { + for (; mrdb->wcntwcnt++) { mrdb->words[mrdb->wcnt] = pick_out_word(mrb, &p); if (!mrdb->words[mrdb->wcnt]) { break; @@ -394,7 +407,7 @@ parse_command(mrb_state *mrb, mrdb_state *mrdb, char *buf) } } - return cmd->cmd1 ? cmd : NULL; + return cmd; } static void @@ -546,6 +559,22 @@ check_method_breakpoint(mrb_state *mrb, const mrb_irep *irep, const mrb_code *pc return bpno; } +static int32_t +check_breakpoint_hit(mrb_state *mrb, mrb_debug_context *dbg, + const mrb_irep *irep, const mrb_code *pc, + const char *file, int32_t line, mrb_value *regs) +{ + int32_t bpno; + + bpno = check_method_breakpoint(mrb, irep, pc, regs); + if (bpno > 0) return bpno; + if (dbg->prvfile != file || dbg->prvline != line) { + bpno = mrb_debug_check_breakpoint_line(mrb, dbg, file, line); + if (bpno > 0) return bpno; + } + return 0; +} + static void mrb_code_fetch_hook(mrb_state *mrb, const mrb_irep *irep, const mrb_code *pc, mrb_value *regs) { @@ -595,23 +624,16 @@ mrb_code_fetch_hook(mrb_state *mrb, const mrb_irep *irep, const mrb_code *pc, mr break; case DBG_RUN: - bpno = check_method_breakpoint(mrb, irep, pc, regs); + bpno = check_breakpoint_hit(mrb, dbg, irep, pc, file, line, regs); if (bpno > 0) { dbg->stopped_bpno = bpno; dbg->bm = BRK_BREAK; break; } - if (dbg->prvfile != file || dbg->prvline != line) { - bpno = mrb_debug_check_breakpoint_line(mrb, dbg, file, line); - if (bpno > 0) { - dbg->stopped_bpno = bpno; - dbg->bm = BRK_BREAK; - break; - } - } dbg->prvfile = file; dbg->prvline = line; return; + case DBG_INIT: dbg->root_irep = irep; dbg->bm = BRK_INIT; @@ -638,6 +660,7 @@ mrb_code_fetch_hook(mrb_state *mrb, const mrb_irep *irep, const mrb_code *pc, mr static mrdb_exemode mrb_debug_break_hook(mrb_state *mrb, mrb_debug_context *dbg) { + ptrdiff_t regs_off = dbg->regs - mrb->c->ci->stack; debug_command *cmd; dbgcmd_state st = DBGST_CONTINUE; mrdb_state *mrdb = mrdb_state_get(mrb); @@ -649,6 +672,7 @@ mrb_debug_break_hook(mrb_state *mrb, mrb_debug_context *dbg) mrb_assert(cmd); st = cmd->func(mrb, mrdb); + dbg->regs = mrb->c->ci->stack + regs_off; if ((st == DBGST_CONTINUE) || (st == DBGST_RESTART)) break; } @@ -669,8 +693,9 @@ main(int argc, char **argv) l_restart: - if (mrb == NULL) { - fputs("Invalid mrb_state, exiting mruby\n", stderr); + if (MRB_OPEN_FAILURE(mrb)) { + mrb_print_error(mrb); /* handles NULL */ + mrb_close(mrb); /* handles NULL */ return EXIT_FAILURE; } @@ -704,10 +729,10 @@ main(int argc, char **argv) v = mrb_load_irep_file(mrb, args.rfp); } else { /* .rb */ - mrbc_context *cc = mrbc_context_new(mrb); - mrbc_filename(mrb, cc, args.fname); + mrb_ccontext *cc = mrb_ccontext_new(mrb); + mrb_ccontext_filename(mrb, cc, args.fname); v = mrb_load_file_cxt(mrb, args.rfp, cc); - mrbc_context_free(mrb, cc); + mrb_ccontext_free(mrb, cc); } if (mrdb->dbg->xm == DBG_QUIT && !mrb_undef_p(v) && mrb->exc) { const char *classname = mrb_obj_classname(mrb, mrb_obj_value(mrb->exc)); diff --git a/mrbgems/mruby-bin-debugger/tools/mrdb/mrdb.h b/mrbgems/mruby-bin-debugger/tools/mrdb/mrdb.h index 24ccad1261..df7680aff5 100644 --- a/mrbgems/mruby-bin-debugger/tools/mrdb/mrdb.h +++ b/mrbgems/mruby-bin-debugger/tools/mrdb/mrdb.h @@ -7,6 +7,7 @@ #define MRDB_H #include +#include #include "mrdbconf.h" @@ -74,10 +75,6 @@ typedef enum { MRB_DEBUG_BPTYPE_METHOD, } mrb_debug_bptype; -struct mrb_irep; -struct mrbc_context; -struct mrb_debug_context; - typedef struct mrb_debug_linepoint { const char *file; uint16_t lineno; @@ -140,6 +137,13 @@ typedef struct mrdb_state { typedef dbgcmd_state (*debug_command_func)(mrb_state*, mrdb_state*); +static inline mrb_noreturn void +raise_debugger_exception(mrb_state *mrb, const char *name, const char *msg) +{ + struct RClass *exc = mrb_define_class(mrb, name, E_EXCEPTION); + mrb_raise(mrb, exc, msg); +} + /* cmdrun.c */ dbgcmd_state dbgcmd_run(mrb_state*, mrdb_state*); dbgcmd_state dbgcmd_continue(mrb_state*, mrdb_state*); diff --git a/mrbgems/mruby-bin-debugger/tools/mrdb/mrdbconf.h b/mrbgems/mruby-bin-debugger/tools/mrdb/mrdbconf.h index 0cc36c8b7e..a425a3b0f5 100644 --- a/mrbgems/mruby-bin-debugger/tools/mrdb/mrdbconf.h +++ b/mrbgems/mruby-bin-debugger/tools/mrdb/mrdbconf.h @@ -18,7 +18,7 @@ /* maximum size for command buffer */ #define MAX_COMMAND_LINE 1024 -/* maximum number of setable breakpoint */ +/* maximum number of settable breakpoint */ #define MAX_BREAKPOINT 5 #endif diff --git a/mrbgems/mruby-bin-mirb/README.md b/mrbgems/mruby-bin-mirb/README.md new file mode 100644 index 0000000000..4bb0944a2f --- /dev/null +++ b/mrbgems/mruby-bin-mirb/README.md @@ -0,0 +1,94 @@ +# mruby-bin-mirb + +mirb (mruby interactive) is an interactive Ruby shell for mruby. + +## Usage + +``` +mirb [options] +``` + +### Options + +- `-v` - print version and exit +- `-d` - set `$DEBUG` to true +- `-r library` - load the library before executing +- `--verbose` - verbose mode + +## Tab Completion + +mirb supports context-aware tab completion when built with a readline library. + +### Supported Completions + +- **Methods on objects**: Type an expression followed by `.` and press Tab + + ``` + > "hello".up + upcase upcase! upto + ``` + +- **Local variables**: Variables defined in the session + + ``` + > my_var = 123 + > my + my_var + ``` + +- **Global variables**: Press Tab after `$` + + ``` + > $std + $stdout $stderr $stdin + ``` + +- **Constants and classes**: Capital letter followed by Tab + + ``` + > Str + String Struct + ``` + +- **Ruby keywords**: At the start of expressions + + ``` + > cla + class + ``` + +### Readline Library Support + +Tab completion works with: + +- **GNU readline** (default on Linux) +- **libedit** (default on macOS/BSD) +- **linenoise** (lightweight alternative) + +### Configuration + +The readline library can be configured via the `MRUBY_MIRB_READLINE` environment variable: + +```bash +# Auto-detect (default) +rake + +# Force specific library +MRUBY_MIRB_READLINE=readline rake # GNU readline +MRUBY_MIRB_READLINE=libedit rake # libedit +MRUBY_MIRB_READLINE=linenoise rake # linenoise + +# Disable readline (plain input mode) +MRUBY_MIRB_READLINE=none rake +``` + +### Notes + +- Completion evaluates receiver expressions to determine available methods +- Only simple receivers (variable names, constants) are evaluated for safety +- Complex expressions like `obj.method().` are not completed to avoid side effects +- File path completion in `require`/`load` statements is planned for future versions + +## License + +MIT License - see the mruby LICENSE file. diff --git a/mrbgems/mruby-bin-mirb/bintest/mirb.rb b/mrbgems/mruby-bin-mirb/bintest/mirb.rb index 62f863c274..1650444afe 100644 --- a/mrbgems/mruby-bin-mirb/bintest/mirb.rb +++ b/mrbgems/mruby-bin-mirb/bintest/mirb.rb @@ -7,6 +7,12 @@ assert_true o.include?('=> 2') end +assert('mirb multi-line') do + o, s = Open3.capture2(cmd("mirb"), :stdin_data => "def a(b)\n return b\n end\na(1)\n") + assert_true o.include?('=> :a') + assert_true o.include?('=> 1') +end + assert('regression for #1563') do o, s = Open3.capture2(cmd("mirb"), :stdin_data => "a=1;b=2;c=3\nb\nc") assert_true o.include?('=> 3') @@ -48,5 +54,5 @@ def hoge A.call TESTCODE - assert_kind_of Integer, o =~ /\bundefined method 'a' \(NoMethodError\).*=> 5\b.*=> 1\b/m + assert_kind_of Integer, o =~ /\bundefined method 'a' .*\(NoMethodError\).*=> 5\b.*=> 1\b/m end diff --git a/mrbgems/mruby-bin-mirb/mrbgem.rake b/mrbgems/mruby-bin-mirb/mrbgem.rake index c31ac7ae92..6842e6f297 100644 --- a/mrbgems/mruby-bin-mirb/mrbgem.rake +++ b/mrbgems/mruby-bin-mirb/mrbgem.rake @@ -2,48 +2,6 @@ MRuby::Gem::Specification.new('mruby-bin-mirb') do |spec| spec.license = 'MIT' spec.author = 'mruby developers' spec.summary = 'mirb command' - - if spec.build.cc.search_header_path 'readline/readline.h' - spec.cc.defines << "MRB_USE_READLINE" - spec.cc.defines << "MRB_READLINE_HEADER=''" - spec.cc.defines << "MRB_READLINE_HISTORY=''" - if spec.build.cc.search_header_path 'termcap.h' - if MRUBY_BUILD_HOST_IS_CYGWIN || MRUBY_BUILD_HOST_IS_OPENBSD - if spec.build.cc.search_header_path 'termcap.h' - if MRUBY_BUILD_HOST_IS_CYGWIN then - spec.linker.libraries << 'ncurses' - else - spec.linker.libraries << 'termcap' - end - end - end - end - if RUBY_PLATFORM.include?('netbsd') - spec.linker.libraries << 'edit' - else - spec.linker.libraries << 'readline' - if RUBY_PLATFORM.include?('darwin') - # Workaround to build with Homebrew's readline on Mac (#4537) - lib_path = spec.build.cc.header_search_paths.find do |include_path| - lib_path = File.expand_path("#{include_path}/../lib") - break lib_path if File.exist?("#{lib_path}/libreadline.dylib") || - File.exist?("#{lib_path}/libreadline.a") - end - spec.linker.library_paths << lib_path if lib_path - end - if spec.build.cc.search_header_path 'curses.h' - spec.linker.libraries << 'ncurses' - end - end - elsif spec.build.cc.search_header_path 'edit/readline/readline.h' - spec.cc.defines << "MRB_USE_READLINE" - spec.cc.defines << "MRB_READLINE_HEADER=''" - spec.cc.defines << "MRB_READLINE_HISTORY=''" - spec.linker.libraries << "edit" - elsif spec.build.cc.search_header_path 'linenoise.h' - spec.cc.defines << "MRB_USE_LINENOISE" - end - spec.bins = %w(mirb) spec.add_dependency('mruby-compiler', :core => 'mruby-compiler') end diff --git a/mrbgems/mruby-bin-mirb/tools/mirb/mirb.c b/mrbgems/mruby-bin-mirb/tools/mirb/mirb.c index 695c329816..481dc4a353 100644 --- a/mrbgems/mruby-bin-mirb/tools/mirb/mirb.c +++ b/mrbgems/mruby-bin-mirb/tools/mirb/mirb.c @@ -19,119 +19,87 @@ #include #include #include -#include +#include #include #include -#include - #include -#include - -/* obsolete configuration */ -#ifdef ENABLE_READLINE -# define MRB_USE_READLINE -#endif -#ifdef ENABLE_LINENOISE -# define MRB_USE_LINENOISE -#endif -#ifdef DISABLE_MIRB_UNDERSCORE -# define MRB_NO_MIRB_UNDERSCORE -#endif -#ifdef MRB_USE_READLINE -#include MRB_READLINE_HEADER -#include MRB_READLINE_HISTORY -#define MIRB_ADD_HISTORY(line) add_history(line) -#define MIRB_READLINE(ch) readline(ch) -#if !defined(RL_READLINE_VERSION) || RL_READLINE_VERSION < 0x600 -/* libedit & older readline do not have rl_free() */ -#define MIRB_LINE_FREE(line) free(line) -#else -#define MIRB_LINE_FREE(line) rl_free(line) -#endif -#define MIRB_WRITE_HISTORY(path) write_history(path) -#define MIRB_READ_HISTORY(path) read_history(path) -#define MIRB_USING_HISTORY() using_history() -#elif defined(MRB_USE_LINENOISE) -#define MRB_USE_READLINE -#include -#define MIRB_ADD_HISTORY(line) linenoiseHistoryAdd(line) -#define MIRB_READLINE(ch) linenoise(ch) -#define MIRB_LINE_FREE(line) linenoiseFree(line) -#define MIRB_WRITE_HISTORY(path) linenoiseHistorySave(path) -#define MIRB_READ_HISTORY(path) linenoiseHistoryLoad(history_path) -#define MIRB_USING_HISTORY() -#endif - -#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) -#define MIRB_SIGSETJMP(env) sigsetjmp(env, 1) -#define MIRB_SIGLONGJMP(env, val) siglongjmp(env, val) -#define SIGJMP_BUF sigjmp_buf +#ifdef _WIN32 +#include +#define isatty(fd) _isatty(fd) +#define fileno(fd) _fileno(fd) #else -#define MIRB_SIGSETJMP(env) setjmp(env) -#define MIRB_SIGLONGJMP(env, val) longjmp(env, val) -#define SIGJMP_BUF jmp_buf +#include #endif -#ifdef MRB_USE_READLINE +#include "mirb_editor.h" +#include "mirb_completion.h" +#include "mirb_highlight.h" -static const char history_file_name[] = ".mirb_history"; +/* obsolete configuration */ +#ifdef DISABLE_MIRB_UNDERSCORE +# define MRB_NO_MIRB_UNDERSCORE +#endif -static char * -get_history_path(mrb_state *mrb) +static void +p(mrb_state *mrb, mrb_value obj, mirb_highlighter *hl) { - char *path = NULL; - const char *home = getenv("HOME"); - -#ifdef _WIN32 - if (home != NULL) { - home = getenv("USERPROFILE"); + mrb_value val = mrb_funcall_argv(mrb, obj, MRB_SYM(inspect), 0, NULL); + if (mrb->exc) { + val = mrb_exc_get_output(mrb, mrb->exc); } -#endif - - if (home != NULL) { - int len = snprintf(NULL, 0, "%s/%s", home, history_file_name); - if (len >= 0) { - size_t size = len + 1; - path = (char *)mrb_malloc_simple(mrb, size); - if (path != NULL) { - int n = snprintf(path, size, "%s/%s", home, history_file_name); - if (n != len) { - mrb_free(mrb, path); - path = NULL; - } - } - } + if (!mrb_string_p(val)) { + val = mrb_obj_as_string(mrb, obj); } - - return path; + char* msg = mrb_locale_from_utf8(RSTRING_PTR(val), (int)RSTRING_LEN(val)); + mirb_highlight_print_result(hl, msg); + mrb_locale_free(msg); } -#endif - static void -p(mrb_state *mrb, mrb_value obj, int prompt) +p_error(mrb_state *mrb, struct RObject* exc, mrb_ccontext *cxt, mirb_highlighter *hl) { - mrb_value val; - char* msg; + mrb_value val = mrb_exc_get_output(mrb, exc); + if (!mrb_string_p(val)) { + val = mrb_obj_as_string(mrb, val); + } - val = mrb_funcall_id(mrb, obj, MRB_SYM(inspect), 0); - if (prompt) { - if (!mrb->exc) { - fputs(" => ", stdout); - } - else { - val = mrb_funcall_id(mrb, mrb_obj_value(mrb->exc), MRB_SYM(inspect), 0); + /* get first line of backtrace for location info */ + mrb_value bt = mrb_exc_backtrace(mrb, mrb_obj_value(exc)); + if (mrb_array_p(bt) && RARRAY_LEN(bt) > 0) { + mrb_value location = RARRAY_PTR(bt)[0]; + if (mrb_string_p(location)) { + const char *loc_str = RSTRING_PTR(location); + + /* parse location string: "(mirb):LINE" or "(mirb):LINE:in method" */ + const char *colon = strchr(loc_str, ':'); + if (colon && colon[1] >= '0' && colon[1] <= '9') { + /* check if there's a method name - this means error is from previous code */ + const char *in_pos = strstr(colon + 1, ":in "); + if (in_pos) { + /* error inside a previously defined method */ + char* loc_msg = mrb_locale_from_utf8(in_pos, (int)RSTRING_LEN(location) - (int)(in_pos - loc_str)); + printf("(mirb)%s: ", loc_msg); + mrb_locale_free(loc_msg); + } + else { + /* no method name - could be current or previous top-level code */ + int err_line = atoi(colon + 1); + if (err_line >= cxt->lineno) { + /* error in current input - show relative line number */ + int relative_line = err_line - cxt->lineno + 1; + printf("line %d: ", relative_line); + } + /* else: error from top-level previous code, no location shown */ + } + } } } - if (!mrb_string_p(val)) { - val = mrb_obj_as_string(mrb, obj); - } - msg = mrb_locale_from_utf8(RSTRING_PTR(val), (int)RSTRING_LEN(val)); - fwrite(msg, strlen(msg), 1, stdout); + + char* msg = mrb_locale_from_utf8(RSTRING_PTR(val), (int)RSTRING_LEN(val)); + mirb_highlight_print_error(hl, msg); mrb_locale_free(msg); - putc('\n', stdout); } /* Guess if the user might want to enter more @@ -149,7 +117,7 @@ is_code_block_open(struct mrb_parser_state *parser) /* check if parser error are available */ if (0 < parser->nerr) { - const char unexpected_end[] = "syntax error, unexpected $end"; + const char unexpected_end[] = "syntax error, unexpected end of file"; const char *message = parser->error_buffer[0].message; /* a parser error occur, we have to check if */ @@ -343,21 +311,6 @@ parse_args(mrb_state *mrb, int argc, char **argv, struct _args *args) return EXIT_SUCCESS; } -static void -cleanup(mrb_state *mrb, struct _args *args) -{ - if (args->rfp) - fclose(args->rfp); - mrb_free(mrb, args->argv); - if (args->libc) { - while (args->libc--) { - mrb_free(mrb, args->libv[args->libc]); - } - mrb_free(mrb, args->libv); - } - mrb_close(mrb); -} - /* Print a short remark for the user */ static void print_hint(void) @@ -365,22 +318,40 @@ print_hint(void) printf("mirb - Embeddable Interactive Ruby Shell\n\n"); } -#ifndef MRB_USE_READLINE -/* Print the command line prompt of the REPL */ -static void -print_cmdline(int code_block_open) +/* Extract a specific line from source code */ +static const char* +extract_line(const char *str, int target_line, size_t *line_len) { - if (code_block_open) { - printf("* "); + const char *line_start = str; + const char *p = str; + int current_line = 1; + + /* skip to target line */ + while (current_line < target_line && *p) { + if (*p == '\n') { + current_line++; + line_start = p + 1; + } + p++; } - else { - printf("> "); + + /* find line end */ + const char *line_end = line_start; + while (*line_end && *line_end != '\n') { + line_end++; } - fflush(stdout); + + *line_len = line_end - line_start; + return line_start; } -#endif -void mrb_codedump_all(mrb_state*, struct RProc*); +/* Print the command line prompt of the REPL */ +static void +print_cmdline(int code_block_open, int line_num) +{ + printf("%d%c ", line_num, code_block_open ? '*' : '>'); + fflush(stdout); +} static int check_keyword(const char *buf, const char *word) @@ -405,25 +376,62 @@ check_keyword(const char *buf, const char *word) return 1; } - -#ifndef MRB_USE_READLINE volatile sig_atomic_t input_canceled = 0; -void -ctrl_c_handler(int signo) + +/* Data for completion checker callback */ +typedef struct { + mrb_state *mrb; + mrb_ccontext *cxt; +} mirb_check_data; + +/* Check if code is syntactically complete (for multi-line editor) */ +static mrb_bool +mirb_check_code_complete(const char *code, void *user_data) { - input_canceled = 1; + mirb_check_data *data = (mirb_check_data *)user_data; + struct mrb_parser_state *parser; + mrb_bool complete; + + parser = mrb_parser_new(data->mrb); + if (parser == NULL) return TRUE; /* error - accept input */ + + parser->s = code; + parser->send = code + strlen(code); + parser->lineno = data->cxt->lineno; + mrb_parser_parse(parser, data->cxt); + complete = !is_code_block_open(parser); + mrb_parser_free(parser); + + return complete; } -#else -SIGJMP_BUF ctrl_c_buf; -void + +/* Tab completion callback for editor */ +static int +mirb_tab_complete(const char *line, int cursor_pos, + char ***completions_out, int *prefix_len_out, + void *user_data) +{ + (void)user_data; + return mirb_get_completions(line, cursor_pos, completions_out, prefix_len_out); +} + +/* Free tab completions */ +static void +mirb_tab_complete_free(char **completions, int count, void *user_data) +{ + (void)user_data; + mirb_free_completions(completions, count); +} + +static void ctrl_c_handler(int signo) { - MIRB_SIGLONGJMP(ctrl_c_buf, 1); + input_canceled = 1; } -#endif #ifndef MRB_NO_MIRB_UNDERSCORE -void decl_lv_underscore(mrb_state *mrb, mrbc_context *cxt) +static void +decl_lv_underscore(mrb_state *mrb, mrb_ccontext *cxt) { struct RProc *proc; struct mrb_parser_state *parser; @@ -447,37 +455,38 @@ main(int argc, char **argv) { char ruby_code[4096] = { 0 }; char last_code_line[1024] = { 0 }; -#ifndef MRB_USE_READLINE int last_char; size_t char_index; -#else - char *history_path; - char* line; -#endif - mrbc_context *cxt; + mirb_editor editor; + mirb_check_data check_data; + mrb_bool use_editor = FALSE; + + memset(&editor, 0, sizeof(editor)); + mrb_ccontext *cxt = NULL; struct mrb_parser_state *parser; mrb_state *mrb; mrb_value result; struct _args args; mrb_value ARGV; - int n; + int ret = EXIT_SUCCESS; int i; mrb_bool code_block_open = FALSE; + int line_num = 1; int ai; unsigned int stack_keep = 0; /* new interpreter instance */ mrb = mrb_open(); - if (mrb == NULL) { - fputs("Invalid mrb interpreter, exiting mirb\n", stderr); + if (MRB_OPEN_FAILURE(mrb)) { + mrb_print_error(mrb); /* handles NULL */ + mrb_close(mrb); /* handles NULL */ return EXIT_FAILURE; } - n = parse_args(mrb, argc, argv, &args); - if (n == EXIT_FAILURE) { - cleanup(mrb, &args); + ret = parse_args(mrb, argc, argv, &args); + if (ret == EXIT_FAILURE) { usage(argv[0]); - return n; + goto cleanup; } ARGV = mrb_ary_new_capa(mrb, args.argc); @@ -491,37 +500,27 @@ main(int argc, char **argv) mrb_define_global_const(mrb, "ARGV", ARGV); mrb_gv_set(mrb, mrb_intern_lit(mrb, "$DEBUG"), mrb_bool_value(args.debug)); -#ifdef MRB_USE_READLINE - history_path = get_history_path(mrb); - if (history_path == NULL) { - fputs("failed to get history path\n", stderr); - mrb_close(mrb); - return EXIT_FAILURE; + /* Query terminal background color before any output */ + if (isatty(fileno(stdin)) && isatty(fileno(stdout))) { + mirb_highlight_query_terminal(); } - MIRB_USING_HISTORY(); - MIRB_READ_HISTORY(history_path); -#endif - print_hint(); - cxt = mrbc_context_new(mrb); + cxt = mrb_ccontext_new(mrb); /* Load libraries */ for (i = 0; i < args.libc; i++) { - struct REnv *e; FILE *lfp = fopen(args.libv[i], "r"); if (lfp == NULL) { printf("Cannot open library file. (%s)\n", args.libv[i]); - cleanup(mrb, &args); - return EXIT_FAILURE; + ret = EXIT_FAILURE; + goto cleanup; } mrb_load_file_cxt(mrb, lfp, cxt); fclose(lfp); - e = mrb_vm_ci_env(mrb->c->cibase); - mrb_vm_ci_env_set(mrb->c->cibase, NULL); - mrb_env_unshare(mrb, e, FALSE); - mrbc_cleanup_local_variables(mrb, cxt); + mrb_vm_ci_env_clear(mrb, mrb->c->cibase); + mrb_ccontext_cleanup_local_variables(cxt); } #ifndef MRB_NO_MIRB_UNDERSCORE @@ -530,9 +529,27 @@ main(int argc, char **argv) cxt->capture_errors = TRUE; cxt->lineno = 1; - mrbc_filename(mrb, cxt, "(mirb)"); + mrb_ccontext_filename(mrb, cxt, "(mirb)"); if (args.verbose) cxt->dump_result = TRUE; + /* Initialize multi-line editor */ + if (isatty(fileno(stdin)) && mirb_editor_init(&editor)) { + use_editor = TRUE; + check_data.mrb = mrb; + check_data.cxt = cxt; + mirb_editor_set_check_complete(&editor, mirb_check_code_complete, &check_data); + /* Setup tab completion */ + mirb_setup_editor_completion(mrb, cxt); + mirb_editor_set_tab_complete(&editor, mirb_tab_complete, mirb_tab_complete_free, NULL); + /* Enable colored prompts if terminal supports it */ + if (isatty(fileno(stdout))) { + const char *term = getenv("TERM"); + if (term && strcmp(term, "dumb") != 0 && !getenv("NO_COLOR")) { + mirb_editor_set_color(&editor, TRUE); + } + } + } + ai = mrb_gc_arena_save(mrb); while (TRUE) { @@ -544,62 +561,85 @@ main(int argc, char **argv) break; } -#ifndef MRB_USE_READLINE - print_cmdline(code_block_open); + if (use_editor && mirb_editor_supported(&editor)) { + /* Use multi-line editor */ + char *input; + mirb_edit_result res; + + mirb_editor_set_prompt_format(&editor, "%d> ", "%d* ", line_num); + + res = mirb_editor_read(&editor, &input); - signal(SIGINT, ctrl_c_handler); - char_index = 0; - while ((last_char = getchar()) != '\n') { - if (last_char == EOF) break; - if (char_index >= sizeof(last_code_line)-2) { + if (res == MIRB_EDIT_EOF) { + break; + } + if (res == MIRB_EDIT_INTERRUPT) { + puts("^C"); + continue; + } + if (res != MIRB_EDIT_OK || input == NULL) { + continue; + } + + /* The editor returns complete multi-line input */ + if (strlen(input) >= sizeof(ruby_code) - 1) { fputs("input string too long\n", stderr); + free(input); continue; } - last_code_line[char_index++] = last_char; - } - signal(SIGINT, SIG_DFL); - if (input_canceled) { - ruby_code[0] = '\0'; - last_code_line[0] = '\0'; - code_block_open = FALSE; - puts("^C"); - input_canceled = 0; - continue; - } - if (last_char == EOF) { - fputs("\n", stdout); - break; - } + strcpy(ruby_code, input); + free(input); + + /* Count lines for line number update */ + { + const char *p = ruby_code; + while (*p) { + if (*p++ == '\n') line_num++; + } + } - last_code_line[char_index++] = '\n'; - last_code_line[char_index] = '\0'; -#else - if (MIRB_SIGSETJMP(ctrl_c_buf) == 0) { - ; - } - else { - ruby_code[0] = '\0'; - last_code_line[0] = '\0'; + /* Check for quit/exit commands */ + if (check_keyword(ruby_code, "quit") || check_keyword(ruby_code, "exit")) { + break; + } + + /* Skip to evaluation (editor already handles multi-line) */ code_block_open = FALSE; - puts("^C"); + goto evaluate; } - signal(SIGINT, ctrl_c_handler); - line = MIRB_READLINE(code_block_open ? "* " : "> "); - signal(SIGINT, SIG_DFL); + else { + /* Fallback to simple line-by-line input */ + print_cmdline(code_block_open, line_num); + + signal(SIGINT, ctrl_c_handler); + char_index = 0; + while ((last_char = getchar()) != '\n') { + if (last_char == EOF) break; + if (char_index >= sizeof(last_code_line)-2) { + fputs("input string too long\n", stderr); + continue; + } + last_code_line[char_index++] = last_char; + } + signal(SIGINT, SIG_DFL); + if (input_canceled) { + ruby_code[0] = '\0'; + last_code_line[0] = '\0'; + code_block_open = FALSE; + line_num = 1; + puts("^C"); + input_canceled = 0; + continue; + } + if (last_char == EOF) { + fputs("\n", stdout); + break; + } - if (line == NULL) { - printf("\n"); - break; + last_code_line[char_index++] = '\n'; + last_code_line[char_index] = '\0'; } - if (strlen(line) > sizeof(last_code_line)-2) { - fputs("input string too long\n", stderr); - continue; - } - strcpy(last_code_line, line); - strcat(last_code_line, "\n"); - MIRB_ADD_HISTORY(line); - MIRB_LINE_FREE(line); -#endif + line_num++; done: if (code_block_open) { @@ -616,6 +656,7 @@ main(int argc, char **argv) strcpy(ruby_code, last_code_line); } + evaluate: utf8 = mrb_utf8_from_locale(ruby_code, -1); if (!utf8) abort(); @@ -639,14 +680,38 @@ main(int argc, char **argv) if (0 < parser->nwarn) { /* warning */ char* msg = mrb_locale_from_utf8(parser->warn_buffer[0].message, -1); - printf("line %d: %s\n", parser->warn_buffer[0].lineno, msg); + printf("warning: line %d: %s\n", parser->warn_buffer[0].lineno, msg); mrb_locale_free(msg); } if (0 < parser->nerr) { /* syntax error */ + int err_line = parser->error_buffer[0].lineno; + int err_col = parser->error_buffer[0].column; char* msg = mrb_locale_from_utf8(parser->error_buffer[0].message, -1); - printf("line %d: %s\n", parser->error_buffer[0].lineno, msg); + + /* convert absolute line number to relative line within ruby_code */ + int relative_line = err_line - cxt->lineno + 1; + + /* show error with line:column (using relative line number) */ + printf("line %d:%d: %s\n", relative_line, err_col, msg); + + /* show source line and caret if available */ + if (ruby_code[0] != '\0') { + size_t line_len; + const char *line_start = extract_line(ruby_code, relative_line, &line_len); + + if (line_len > 0) { + printf(" %.*s\n", (int)line_len, line_start); + printf(" "); + for (int j = 0; j < err_col; j++) { + printf(" "); + } + printf("^\n"); + } + } + mrb_locale_free(msg); + line_num = 1; } else { /* generate bytecode */ @@ -676,7 +741,7 @@ main(int argc, char **argv) /* did an exception occur? */ if (mrb->exc) { MRB_EXC_CHECK_EXIT(mrb, mrb->exc); - p(mrb, mrb_obj_value(mrb->exc), 0); + p_error(mrb, mrb->exc, cxt, &editor.highlight); mrb->exc = 0; } else { @@ -684,35 +749,40 @@ main(int argc, char **argv) if (!mrb_respond_to(mrb, result, MRB_SYM(inspect))){ result = mrb_any_to_s(mrb, result); } - p(mrb, result, 1); + p(mrb, result, &editor.highlight); #ifndef MRB_NO_MIRB_UNDERSCORE *(mrb->c->ci->stack + 1) = result; #endif } + /* Add to history after evaluation (success or error) */ + if (use_editor) { + mirb_editor_history_add(&editor, ruby_code); + } } ruby_code[0] = '\0'; last_code_line[0] = '\0'; + line_num = 1; mrb_gc_arena_restore(mrb, ai); } mrb_parser_free(parser); cxt->lineno++; } -#ifdef MRB_USE_READLINE - MIRB_WRITE_HISTORY(history_path); - mrb_free(mrb, history_path); -#endif - +cleanup: if (args.rfp) fclose(args.rfp); mrb_free(mrb, args.argv); if (args.libv) { - for (i = 0; i < args.libc; ++i) { + for (i = 0; i < args.libc; i++) { mrb_free(mrb, args.libv[i]); } mrb_free(mrb, args.libv); } - mrbc_context_free(mrb, cxt); + if (cxt) mrb_ccontext_free(mrb, cxt); + if (use_editor) { + mirb_cleanup_completion(); + mirb_editor_cleanup(&editor); + } mrb_close(mrb); - return 0; + return ret; } diff --git a/mrbgems/mruby-bin-mirb/tools/mirb/mirb_buffer.c b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_buffer.c new file mode 100644 index 0000000000..8d3ce5cd86 --- /dev/null +++ b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_buffer.c @@ -0,0 +1,1014 @@ +/* +** mirb_buffer.c - Multi-line buffer for mirb editor +** +** See Copyright Notice in mruby.h +*/ + +#include "mirb_buffer.h" +#include +#include +#include + +#ifdef MRB_UTF8_STRING +/* + * UTF-8 helper functions + * These are only compiled when MRB_UTF8_STRING is defined + */ + +/* Check if byte is a UTF-8 lead byte (not a continuation byte) */ +static mrb_bool +utf8_islead(unsigned char c) +{ + return (c & 0xC0) != 0x80; +} + +/* UTF-8 character length table indexed by (first_byte >> 3) */ +static const char utf8_len_table[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00-0x7F: ASCII */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0xBF: continuation (invalid start) */ + 2, 2, 2, 2, /* 0xC0-0xDF: 2-byte sequences */ + 3, 3, /* 0xE0-0xEF: 3-byte sequences */ + 4, /* 0xF0-0xF7: 4-byte sequences */ + 0 /* 0xF8-0xFF: invalid */ +}; + +/* + * Get byte length of UTF-8 character at position + * Returns 1 for invalid sequences (safe fallback) + */ +static size_t +utf8_char_len(const char *p, const char *end) +{ + size_t len; + if (p >= end) return 0; + + len = (size_t)utf8_len_table[(unsigned char)p[0] >> 3]; + if (len == 0 || len > (size_t)(end - p)) return 1; + + /* Validate continuation bytes */ + switch (len) { + case 4: + if (!utf8_islead((unsigned char)p[3])) break; /* continuation expected */ + return 1; + case 3: + if (!utf8_islead((unsigned char)p[2])) break; + return 1; + case 2: + if (!utf8_islead((unsigned char)p[1])) break; + return 1; + } + return len; +} + +/* + * Find start of previous UTF-8 character + * Returns byte offset from start of string to the previous character + * If at position 0, returns 0 + */ +static size_t +utf8_prev_char_start(const char *str, size_t pos) +{ + size_t i; + if (pos == 0) return 0; + + /* Scan back to find a lead byte (max 4 bytes back) */ + for (i = 1; i <= 4 && i <= pos; i++) { + if (utf8_islead((unsigned char)str[pos - i])) { + return pos - i; + } + } + /* No lead byte found, assume single byte */ + return pos - 1; +} + +/* + * Calculate display width for a UTF-8 character + * Returns 2 for CJK/wide characters, 1 for others + * + * This is a simplified version - proper implementation would use wcwidth() + * We detect East Asian Wide characters by their code point ranges: + * - CJK Unified Ideographs: U+4E00-U+9FFF (3-byte UTF-8: E4-E9) + * - Hiragana/Katakana: U+3040-U+30FF (3-byte UTF-8: E3 81-83) + * - Full-width forms: U+FF00-U+FFEF (3-byte UTF-8: EF BC-BF) + */ +static int +utf8_char_width(const char *p, const char *end) +{ + unsigned char c = (unsigned char)p[0]; + + if (c < 0x80) return 1; /* ASCII */ + if (c < 0xE0) return 1; /* 2-byte (Latin extended, etc.) */ + + /* 3-byte sequences - check for wide characters */ + if (c >= 0xE3 && c <= 0xE9 && (end - p) >= 3) { + /* CJK and Japanese ranges are typically double-width */ + return 2; + } + if (c == 0xEF && (end - p) >= 3) { + unsigned char c2 = (unsigned char)p[1]; + if (c2 >= 0xBC && c2 <= 0xBF) { + /* Full-width ASCII and symbols */ + return 2; + } + } + + /* 4-byte sequences (emoji, etc.) - typically double-width */ + if (c >= 0xF0) return 2; + + return 1; +} + +/* + * Calculate display column from byte position + * Sums up the display width of all characters before the byte position + */ +static size_t +utf8_display_col(const char *str, size_t byte_pos) +{ + size_t col = 0; + const char *p = str; + const char *end = str + byte_pos; + + while (p < end) { + size_t char_len = utf8_char_len(p, str + byte_pos + 4); /* +4 for safety */ + if (char_len == 0) break; + col += (size_t)utf8_char_width(p, end); + p += char_len; + } + return col; +} +#endif /* MRB_UTF8_STRING */ + +/* + * Helper: Initialize a single line + */ +static mrb_bool +line_init(mirb_line *line) +{ + line->data = (char*)malloc(MIRB_BUF_LINE_INIT); + if (line->data == NULL) return FALSE; + line->data[0] = '\0'; + line->len = 0; + line->cap = MIRB_BUF_LINE_INIT; + return TRUE; +} + +/* + * Helper: Free a single line + */ +static void +line_free(mirb_line *line) +{ + free(line->data); + line->data = NULL; + line->len = 0; + line->cap = 0; +} + +/* + * Helper: Ensure line has capacity for additional chars + */ +static mrb_bool +line_ensure_cap(mirb_line *line, size_t additional) +{ + size_t needed = line->len + additional + 1; /* +1 for null */ + if (needed <= line->cap) return TRUE; + + size_t new_cap = line->cap * 2; + while (new_cap < needed) new_cap *= 2; + if (new_cap > MIRB_BUF_LINE_MAX) new_cap = MIRB_BUF_LINE_MAX; + if (new_cap < needed) return FALSE; + + char *new_data = (char*)realloc(line->data, new_cap); + if (new_data == NULL) return FALSE; + + line->data = new_data; + line->cap = new_cap; + return TRUE; +} + +/* + * Helper: Insert character at position in line + */ +static mrb_bool +line_insert_at(mirb_line *line, size_t pos, char c) +{ + if (pos > line->len) return FALSE; + if (!line_ensure_cap(line, 1)) return FALSE; + + memmove(line->data + pos + 1, line->data + pos, line->len - pos + 1); + line->data[pos] = c; + line->len++; + return TRUE; +} + +/* + * Helper: Delete character at position in line + */ +#ifndef MRB_UTF8_STRING +static mrb_bool +line_delete_at(mirb_line *line, size_t pos) +{ + if (pos >= line->len) return FALSE; + + memmove(line->data + pos, line->data + pos + 1, line->len - pos); + line->len--; + return TRUE; +} +#endif + +#ifdef MRB_UTF8_STRING +/* + * Helper: Delete N bytes at position in line (for UTF-8 multibyte chars) + */ +static mrb_bool +line_delete_bytes_at(mirb_line *line, size_t pos, size_t count) +{ + if (pos >= line->len || count == 0) return FALSE; + if (pos + count > line->len) count = line->len - pos; + + memmove(line->data + pos, line->data + pos + count, line->len - pos - count + 1); + line->len -= count; + return TRUE; +} +#endif + +/* + * Helper: Set line content + */ +static mrb_bool +line_set(mirb_line *line, const char *str, size_t len) +{ + if (len + 1 > line->cap) { + size_t new_cap = MIRB_BUF_LINE_INIT; + while (new_cap < len + 1) new_cap *= 2; + if (new_cap > MIRB_BUF_LINE_MAX) return FALSE; + + char *new_data = (char*)realloc(line->data, new_cap); + if (new_data == NULL) return FALSE; + line->data = new_data; + line->cap = new_cap; + } + + memcpy(line->data, str, len); + line->data[len] = '\0'; + line->len = len; + return TRUE; +} + +/* + * Helper: Ensure buffer has capacity for one more line + */ +static mrb_bool +buffer_ensure_line_cap(mirb_buffer *buf) +{ + if (buf->line_count < buf->line_cap) return TRUE; + size_t new_cap = buf->line_cap * 2; + if (new_cap > MIRB_BUF_LINES_MAX) return FALSE; + mirb_line *new_lines = (mirb_line*)realloc(buf->lines, sizeof(mirb_line) * new_cap); + if (new_lines == NULL) return FALSE; + buf->lines = new_lines; + buf->line_cap = new_cap; + return TRUE; +} + +/* + * Helper: Join line at line_idx with the previous line (line_idx-1). + * Appends content of line_idx to line_idx-1, then removes line_idx. + */ +static mrb_bool +buffer_join_line_up(mirb_buffer *buf, size_t line_idx) +{ + mirb_line *prev = &buf->lines[line_idx - 1]; + mirb_line *curr = &buf->lines[line_idx]; + + if (!line_ensure_cap(prev, curr->len)) return FALSE; + memcpy(prev->data + prev->len, curr->data, curr->len + 1); + prev->len += curr->len; + + line_free(curr); + memmove(&buf->lines[line_idx], + &buf->lines[line_idx + 1], + sizeof(mirb_line) * (buf->line_count - line_idx - 1)); + buf->line_count--; + return TRUE; +} + +/* + * Initialize buffer + */ +mrb_bool +mirb_buffer_init(mirb_buffer *buf) +{ + memset(buf, 0, sizeof(*buf)); + + buf->lines = (mirb_line*)malloc(sizeof(mirb_line) * MIRB_BUF_LINES_INIT); + if (buf->lines == NULL) return FALSE; + buf->line_cap = MIRB_BUF_LINES_INIT; + + /* Start with one empty line */ + if (!line_init(&buf->lines[0])) { + free(buf->lines); + return FALSE; + } + buf->line_count = 1; + + buf->kill_buf = (char*)malloc(MIRB_BUF_KILL_SIZE); + if (buf->kill_buf == NULL) { + line_free(&buf->lines[0]); + free(buf->lines); + return FALSE; + } + buf->kill_buf[0] = '\0'; + buf->kill_len = 0; + + return TRUE; +} + +/* + * Free buffer resources + */ +void +mirb_buffer_free(mirb_buffer *buf) +{ + if (buf->lines) { + for (size_t i = 0; i < buf->line_count; i++) { + line_free(&buf->lines[i]); + } + free(buf->lines); + buf->lines = NULL; + } + free(buf->kill_buf); + buf->kill_buf = NULL; +} + +/* + * Clear buffer content + */ +void +mirb_buffer_clear(mirb_buffer *buf) +{ + /* Free all lines except first */ + for (size_t i = 1; i < buf->line_count; i++) { + line_free(&buf->lines[i]); + } + + /* Clear first line */ + buf->lines[0].data[0] = '\0'; + buf->lines[0].len = 0; + buf->line_count = 1; + + buf->cursor_line = 0; + buf->cursor_col = 0; + buf->modified = FALSE; +} + +/* + * Get total character count + */ +size_t +mirb_buffer_total_len(mirb_buffer *buf) +{ + size_t total = 0; + for (size_t i = 0; i < buf->line_count; i++) { + total += buf->lines[i].len; + if (i < buf->line_count - 1) total++; /* newline */ + } + return total; +} + +/* + * Get buffer content up to and including a specific line as string + * Caller must free the returned string + */ +char * +mirb_buffer_to_string_upto_line(mirb_buffer *buf, size_t up_to_line) +{ + size_t total = 0; + size_t lines_to_include = (up_to_line < buf->line_count) ? up_to_line + 1 : buf->line_count; + + for (size_t i = 0; i < lines_to_include; i++) { + total += buf->lines[i].len; + if (i < lines_to_include - 1) total++; /* newline */ + } + + char *str = (char*)malloc(total + 1); + if (str == NULL) return NULL; + + char *p = str; + for (size_t i = 0; i < lines_to_include; i++) { + memcpy(p, buf->lines[i].data, buf->lines[i].len); + p += buf->lines[i].len; + if (i < lines_to_include - 1) *p++ = '\n'; + } + *p = '\0'; + + return str; +} + +/* + * Get buffer as string + */ +char * +mirb_buffer_to_string(mirb_buffer *buf) +{ + return mirb_buffer_to_string_upto_line(buf, buf->line_count - 1); +} + +/* + * Set buffer from string + */ +mrb_bool +mirb_buffer_set_string(mirb_buffer *buf, const char *str) +{ + mirb_buffer_clear(buf); + + if (str == NULL || *str == '\0') return TRUE; + + const char *start = str; + const char *p = str; + size_t line_idx = 0; + + while (*p) { + if (*p == '\n') { + /* Set current line */ + if (line_idx >= buf->line_count) { + /* Need to add new line */ + if (!buffer_ensure_line_cap(buf)) return FALSE; + if (!line_init(&buf->lines[buf->line_count])) return FALSE; + buf->line_count++; + } + if (!line_set(&buf->lines[line_idx], start, p - start)) return FALSE; + + start = p + 1; + line_idx++; + p++; + } + else { + p++; + } + } + + /* Handle last line (may not end with newline) */ + if (start < p || line_idx == 0) { + if (line_idx >= buf->line_count) { + if (!buffer_ensure_line_cap(buf)) return FALSE; + if (!line_init(&buf->lines[buf->line_count])) return FALSE; + buf->line_count++; + } + if (!line_set(&buf->lines[line_idx], start, p - start)) return FALSE; + } + + buf->cursor_line = 0; + buf->cursor_col = 0; + buf->modified = FALSE; + + return TRUE; +} + +/* + * Insert character at cursor + */ +mrb_bool +mirb_buffer_insert_char(mirb_buffer *buf, char c) +{ + mirb_line *line = &buf->lines[buf->cursor_line]; + + if (!line_insert_at(line, buf->cursor_col, c)) return FALSE; + + buf->cursor_col++; + buf->modified = TRUE; + return TRUE; +} + +/* + * Insert string at cursor + */ +mrb_bool +mirb_buffer_insert_string(mirb_buffer *buf, const char *str, size_t len) +{ + for (size_t i = 0; i < len; i++) { + if (str[i] == '\n') { + if (!mirb_buffer_newline(buf)) return FALSE; + } + else { + if (!mirb_buffer_insert_char(buf, str[i])) return FALSE; + } + } + return TRUE; +} + +/* + * Delete character before cursor + */ +mrb_bool +mirb_buffer_delete_back(mirb_buffer *buf) +{ + if (buf->cursor_col > 0) { + /* Delete within line */ + mirb_line *line = &buf->lines[buf->cursor_line]; +#ifdef MRB_UTF8_STRING + /* Find start of previous UTF-8 character and delete entire character */ + size_t prev_pos = utf8_prev_char_start(line->data, buf->cursor_col); + size_t char_len = buf->cursor_col - prev_pos; + if (line_delete_bytes_at(line, prev_pos, char_len)) { + buf->cursor_col = prev_pos; + buf->modified = TRUE; + return TRUE; + } +#else + if (line_delete_at(line, buf->cursor_col - 1)) { + buf->cursor_col--; + buf->modified = TRUE; + return TRUE; + } +#endif + } + else if (buf->cursor_line > 0) { + /* Join with previous line */ + size_t prev_len = buf->lines[buf->cursor_line - 1].len; + if (!buffer_join_line_up(buf, buf->cursor_line)) return FALSE; + buf->cursor_line--; + buf->cursor_col = prev_len; + buf->modified = TRUE; + return TRUE; + } + return FALSE; +} + +/* + * Delete character at cursor + */ +mrb_bool +mirb_buffer_delete_forward(mirb_buffer *buf) +{ + mirb_line *line = &buf->lines[buf->cursor_line]; + + if (buf->cursor_col < line->len) { + /* Delete within line */ +#ifdef MRB_UTF8_STRING + /* Delete entire UTF-8 character at cursor */ + size_t char_len = utf8_char_len(line->data + buf->cursor_col, + line->data + line->len); + if (line_delete_bytes_at(line, buf->cursor_col, char_len)) { + buf->modified = TRUE; + return TRUE; + } +#else + if (line_delete_at(line, buf->cursor_col)) { + buf->modified = TRUE; + return TRUE; + } +#endif + } + else if (buf->cursor_line < buf->line_count - 1) { + /* Join with next line */ + if (!buffer_join_line_up(buf, buf->cursor_line + 1)) return FALSE; + buf->modified = TRUE; + return TRUE; + } + return FALSE; +} + +/* + * Insert newline (split line) + */ +mrb_bool +mirb_buffer_newline(mirb_buffer *buf) +{ + /* Ensure we have room for a new line */ + if (!buffer_ensure_line_cap(buf)) return FALSE; + + mirb_line *curr = &buf->lines[buf->cursor_line]; + size_t split_pos = buf->cursor_col; + + /* Make room for new line */ + memmove(&buf->lines[buf->cursor_line + 2], + &buf->lines[buf->cursor_line + 1], + sizeof(mirb_line) * (buf->line_count - buf->cursor_line - 1)); + + /* Initialize new line with content after cursor */ + mirb_line *new_line = &buf->lines[buf->cursor_line + 1]; + if (!line_init(new_line)) { + /* Restore lines array */ + memmove(&buf->lines[buf->cursor_line + 1], + &buf->lines[buf->cursor_line + 2], + sizeof(mirb_line) * (buf->line_count - buf->cursor_line - 1)); + return FALSE; + } + + if (!line_set(new_line, curr->data + split_pos, curr->len - split_pos)) { + line_free(new_line); + memmove(&buf->lines[buf->cursor_line + 1], + &buf->lines[buf->cursor_line + 2], + sizeof(mirb_line) * (buf->line_count - buf->cursor_line - 1)); + return FALSE; + } + + /* Truncate current line */ + curr->data[split_pos] = '\0'; + curr->len = split_pos; + + buf->line_count++; + buf->cursor_line++; + buf->cursor_col = 0; + buf->modified = TRUE; + + return TRUE; +} + +/* Delete a line at the given index */ +void +mirb_buffer_delete_line(mirb_buffer *buf, size_t line_idx) +{ + if (line_idx >= buf->line_count) return; + if (buf->line_count <= 1) return; /* Keep at least one line */ + + /* Free the line's data */ + line_free(&buf->lines[line_idx]); + + /* Shift remaining lines down */ + if (line_idx < buf->line_count - 1) { + memmove(&buf->lines[line_idx], + &buf->lines[line_idx + 1], + sizeof(mirb_line) * (buf->line_count - line_idx - 1)); + } + + buf->line_count--; + + /* Adjust cursor if needed */ + if (buf->cursor_line >= buf->line_count) { + buf->cursor_line = buf->line_count - 1; + } + if (buf->cursor_col > buf->lines[buf->cursor_line].len) { + buf->cursor_col = buf->lines[buf->cursor_line].len; + } + + buf->modified = TRUE; +} + +/* + * Move cursor left + */ +mrb_bool +mirb_buffer_cursor_left(mirb_buffer *buf) +{ + if (buf->cursor_col > 0) { +#ifdef MRB_UTF8_STRING + /* Move back to start of previous UTF-8 character */ + mirb_line *line = &buf->lines[buf->cursor_line]; + buf->cursor_col = utf8_prev_char_start(line->data, buf->cursor_col); +#else + buf->cursor_col--; +#endif + return TRUE; + } + else if (buf->cursor_line > 0) { + buf->cursor_line--; + buf->cursor_col = buf->lines[buf->cursor_line].len; + return TRUE; + } + return FALSE; +} + +/* + * Move cursor right + */ +mrb_bool +mirb_buffer_cursor_right(mirb_buffer *buf) +{ + mirb_line *line = &buf->lines[buf->cursor_line]; + + if (buf->cursor_col < line->len) { +#ifdef MRB_UTF8_STRING + /* Skip entire UTF-8 character */ + size_t char_len = utf8_char_len(line->data + buf->cursor_col, + line->data + line->len); + buf->cursor_col += char_len; +#else + buf->cursor_col++; +#endif + return TRUE; + } + else if (buf->cursor_line < buf->line_count - 1) { + buf->cursor_line++; + buf->cursor_col = 0; + return TRUE; + } + return FALSE; +} + +/* + * Move cursor up + */ +mrb_bool +mirb_buffer_cursor_up(mirb_buffer *buf) +{ + if (buf->cursor_line > 0) { + buf->cursor_line--; + /* Clamp column to line length */ + if (buf->cursor_col > buf->lines[buf->cursor_line].len) { + buf->cursor_col = buf->lines[buf->cursor_line].len; + } + return TRUE; + } + return FALSE; +} + +/* + * Move cursor down + */ +mrb_bool +mirb_buffer_cursor_down(mirb_buffer *buf) +{ + if (buf->cursor_line < buf->line_count - 1) { + buf->cursor_line++; + /* Clamp column to line length */ + if (buf->cursor_col > buf->lines[buf->cursor_line].len) { + buf->cursor_col = buf->lines[buf->cursor_line].len; + } + return TRUE; + } + return FALSE; +} + +/* + * Move to beginning of line + */ +void +mirb_buffer_cursor_home(mirb_buffer *buf) +{ + buf->cursor_col = 0; +} + +/* + * Move to end of line + */ +void +mirb_buffer_cursor_end(mirb_buffer *buf) +{ + buf->cursor_col = buf->lines[buf->cursor_line].len; +} + +/* + * Move to start of buffer + */ +void +mirb_buffer_cursor_start(mirb_buffer *buf) +{ + buf->cursor_line = 0; + buf->cursor_col = 0; +} + +/* + * Move to end of buffer + */ +void +mirb_buffer_cursor_finish(mirb_buffer *buf) +{ + buf->cursor_line = buf->line_count - 1; + buf->cursor_col = buf->lines[buf->cursor_line].len; +} + +/* + * Move cursor back one word + */ +mrb_bool +mirb_buffer_cursor_word_back(mirb_buffer *buf) +{ + mrb_bool moved = FALSE; + + /* Skip any whitespace/non-word chars going back */ + while (buf->cursor_col > 0 || buf->cursor_line > 0) { + if (buf->cursor_col == 0) { + if (buf->cursor_line == 0) break; + buf->cursor_line--; + buf->cursor_col = buf->lines[buf->cursor_line].len; + moved = TRUE; + continue; + } + + char c = buf->lines[buf->cursor_line].data[buf->cursor_col - 1]; + if (mirb_is_word_char(c)) break; + buf->cursor_col--; + moved = TRUE; + } + + /* Move through word chars */ + while (buf->cursor_col > 0) { + char c = buf->lines[buf->cursor_line].data[buf->cursor_col - 1]; + if (!mirb_is_word_char(c)) break; + buf->cursor_col--; + moved = TRUE; + } + + return moved; +} + +/* + * Move cursor forward one word + */ +mrb_bool +mirb_buffer_cursor_word_forward(mirb_buffer *buf) +{ + mrb_bool moved = FALSE; + mirb_line *line = &buf->lines[buf->cursor_line]; + + /* Move through current word chars */ + while (buf->cursor_col < line->len) { + if (!mirb_is_word_char(line->data[buf->cursor_col])) break; + buf->cursor_col++; + moved = TRUE; + } + + /* Skip whitespace/non-word chars */ + while (buf->cursor_col < line->len || buf->cursor_line < buf->line_count - 1) { + if (buf->cursor_col >= line->len) { + if (buf->cursor_line >= buf->line_count - 1) break; + buf->cursor_line++; + buf->cursor_col = 0; + line = &buf->lines[buf->cursor_line]; + moved = TRUE; + continue; + } + + if (mirb_is_word_char(line->data[buf->cursor_col])) break; + buf->cursor_col++; + moved = TRUE; + } + + return moved; +} + +/* + * Helper: Save text to kill buffer + */ +static void +save_to_kill(mirb_buffer *buf, const char *str, size_t len) +{ + if (len >= MIRB_BUF_KILL_SIZE) len = MIRB_BUF_KILL_SIZE - 1; + memcpy(buf->kill_buf, str, len); + buf->kill_buf[len] = '\0'; + buf->kill_len = len; +} + +/* + * Kill to end of line + */ +void +mirb_buffer_kill_to_end(mirb_buffer *buf) +{ + mirb_line *line = &buf->lines[buf->cursor_line]; + + if (buf->cursor_col < line->len) { + /* Kill text to end of line */ + save_to_kill(buf, line->data + buf->cursor_col, line->len - buf->cursor_col); + line->data[buf->cursor_col] = '\0'; + line->len = buf->cursor_col; + buf->modified = TRUE; + } + else if (line->len == 0 && buf->line_count > 1) { + /* Empty line: delete the entire line */ + save_to_kill(buf, "\n", 1); + mirb_buffer_delete_line(buf, buf->cursor_line); + /* Adjust cursor to end of previous line if we deleted from middle */ + if (buf->cursor_line > 0 && buf->cursor_line >= buf->line_count) { + buf->cursor_line = buf->line_count - 1; + } + buf->cursor_col = 0; + } + else if (buf->cursor_line < buf->line_count - 1) { + /* At end of non-empty line: kill newline (join with next line) */ + save_to_kill(buf, "\n", 1); + mirb_buffer_delete_forward(buf); + } +} + +/* + * Kill to start of line + */ +void +mirb_buffer_kill_to_start(mirb_buffer *buf) +{ + mirb_line *line = &buf->lines[buf->cursor_line]; + + if (buf->cursor_col > 0) { + save_to_kill(buf, line->data, buf->cursor_col); + memmove(line->data, line->data + buf->cursor_col, line->len - buf->cursor_col + 1); + line->len -= buf->cursor_col; + buf->cursor_col = 0; + buf->modified = TRUE; + } +} + +/* + * Kill word backward + */ +void +mirb_buffer_kill_word_back(mirb_buffer *buf) +{ + size_t start_line = buf->cursor_line; + size_t start_col = buf->cursor_col; + + if (!mirb_buffer_cursor_word_back(buf)) return; + + if (buf->cursor_line == start_line) { + /* Same line */ + mirb_line *line = &buf->lines[buf->cursor_line]; + size_t kill_len = start_col - buf->cursor_col; + save_to_kill(buf, line->data + buf->cursor_col, kill_len); + memmove(line->data + buf->cursor_col, + line->data + start_col, + line->len - start_col + 1); + line->len -= kill_len; + buf->modified = TRUE; + } + /* Cross-line kill is more complex; simplified: just delete chars */ +} + +/* + * Kill word forward + */ +void +mirb_buffer_kill_word_forward(mirb_buffer *buf) +{ + size_t start_col = buf->cursor_col; + mirb_line *line = &buf->lines[buf->cursor_line]; + + /* Find end of word */ + size_t end_col = start_col; + + /* Skip word chars */ + while (end_col < line->len && mirb_is_word_char(line->data[end_col])) { + end_col++; + } + + /* Skip non-word chars */ + while (end_col < line->len && !mirb_is_word_char(line->data[end_col])) { + end_col++; + } + + if (end_col > start_col) { + save_to_kill(buf, line->data + start_col, end_col - start_col); + memmove(line->data + start_col, + line->data + end_col, + line->len - end_col + 1); + line->len -= (end_col - start_col); + buf->modified = TRUE; + } +} + +/* + * Yank (paste) from kill buffer + */ +mrb_bool +mirb_buffer_yank(mirb_buffer *buf) +{ + if (buf->kill_len == 0) return FALSE; + return mirb_buffer_insert_string(buf, buf->kill_buf, buf->kill_len); +} + +/* + * Get current line content + */ +const char * +mirb_buffer_current_line(mirb_buffer *buf) +{ + return buf->lines[buf->cursor_line].data; +} + +/* + * Get line at index + */ +const char * +mirb_buffer_line_at(mirb_buffer *buf, size_t index) +{ + if (index >= buf->line_count) return NULL; + return buf->lines[index].data; +} + +/* + * Get line length at index + */ +size_t +mirb_buffer_line_len(mirb_buffer *buf, size_t index) +{ + if (index >= buf->line_count) return 0; + return buf->lines[index].len; +} + +/* + * Get cursor display column (visual column for terminal positioning) + * When MRB_UTF8_STRING is defined, calculates display width considering + * multibyte characters. Otherwise, returns the byte position directly. + */ +size_t +mirb_buffer_cursor_display_col(mirb_buffer *buf) +{ +#ifdef MRB_UTF8_STRING + mirb_line *line = &buf->lines[buf->cursor_line]; + return utf8_display_col(line->data, buf->cursor_col); +#else + return buf->cursor_col; +#endif +} diff --git a/mrbgems/mruby-bin-mirb/tools/mirb/mirb_buffer.h b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_buffer.h new file mode 100644 index 0000000000..dd6d0b122b --- /dev/null +++ b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_buffer.h @@ -0,0 +1,195 @@ +/* +** mirb_buffer.h - Multi-line buffer for mirb editor +** +** See Copyright Notice in mruby.h +*/ + +#ifndef MIRB_BUFFER_H +#define MIRB_BUFFER_H + +#include + +/* + * Default sizes for buffer allocation + */ +#define MIRB_BUF_LINE_INIT 128 /* initial line buffer size */ +#define MIRB_BUF_LINE_MAX 4096 /* maximum line length */ +#define MIRB_BUF_LINES_INIT 8 /* initial number of lines */ +#define MIRB_BUF_LINES_MAX 1024 /* maximum number of lines */ +#define MIRB_BUF_KILL_SIZE 4096 /* kill buffer size */ + +/* + * A single line in the buffer + */ +typedef struct mirb_line { + char *data; /* line content (null-terminated) */ + size_t len; /* current length (excluding null) */ + size_t cap; /* allocated capacity */ +} mirb_line; + +/* + * Multi-line buffer with cursor tracking + */ +typedef struct mirb_buffer { + mirb_line *lines; /* array of lines */ + size_t line_count; /* number of lines */ + size_t line_cap; /* allocated line slots */ + + size_t cursor_line; /* current line (0-indexed) */ + size_t cursor_col; /* current column (0-indexed) */ + + char *kill_buf; /* kill buffer for cut/paste */ + size_t kill_len; /* length of kill buffer content */ + + mrb_bool modified; /* buffer has been modified */ +} mirb_buffer; + +/* + * Initialize buffer + * Returns TRUE on success + */ +mrb_bool mirb_buffer_init(mirb_buffer *buf); + +/* + * Free buffer resources + */ +void mirb_buffer_free(mirb_buffer *buf); + +/* + * Clear buffer content (reset to single empty line) + */ +void mirb_buffer_clear(mirb_buffer *buf); + +/* + * Get total character count across all lines + */ +size_t mirb_buffer_total_len(mirb_buffer *buf); + +/* + * Get buffer content as a single string + * Lines are joined with newlines + * Caller must free the returned string + */ +char *mirb_buffer_to_string(mirb_buffer *buf); + +/* + * Get buffer content up to and including a specific line + * Caller must free the returned string + */ +char *mirb_buffer_to_string_upto_line(mirb_buffer *buf, size_t up_to_line); + +/* + * Set buffer content from string + * String may contain newlines + */ +mrb_bool mirb_buffer_set_string(mirb_buffer *buf, const char *str); + +/* + * Insert a character at cursor position + */ +mrb_bool mirb_buffer_insert_char(mirb_buffer *buf, char c); + +/* + * Insert a string at cursor position + */ +mrb_bool mirb_buffer_insert_string(mirb_buffer *buf, const char *str, size_t len); + +/* + * Delete character before cursor (backspace) + * Returns TRUE if a character was deleted + */ +mrb_bool mirb_buffer_delete_back(mirb_buffer *buf); + +/* + * Delete character at cursor (delete key) + * Returns TRUE if a character was deleted + */ +mrb_bool mirb_buffer_delete_forward(mirb_buffer *buf); + +/* + * Insert newline at cursor position (split current line) + */ +mrb_bool mirb_buffer_newline(mirb_buffer *buf); + +/* + * Delete a line at the given index + */ +void mirb_buffer_delete_line(mirb_buffer *buf, size_t line_idx); + +/* + * Cursor movement functions + * Return TRUE if cursor moved + */ +mrb_bool mirb_buffer_cursor_left(mirb_buffer *buf); +mrb_bool mirb_buffer_cursor_right(mirb_buffer *buf); +mrb_bool mirb_buffer_cursor_up(mirb_buffer *buf); +mrb_bool mirb_buffer_cursor_down(mirb_buffer *buf); + +/* + * Move cursor to beginning/end of current line + */ +void mirb_buffer_cursor_home(mirb_buffer *buf); +void mirb_buffer_cursor_end(mirb_buffer *buf); + +/* + * Move cursor to beginning/end of buffer + */ +void mirb_buffer_cursor_start(mirb_buffer *buf); +void mirb_buffer_cursor_finish(mirb_buffer *buf); + +/* + * Word movement (like Emacs Alt+B, Alt+F) + */ +mrb_bool mirb_buffer_cursor_word_back(mirb_buffer *buf); +mrb_bool mirb_buffer_cursor_word_forward(mirb_buffer *buf); + +/* + * Kill operations (cut to kill buffer) + * Ctrl+K: kill to end of line + * Ctrl+U: kill to beginning of line + * Ctrl+W: kill word backward + * Alt+D: kill word forward + */ +void mirb_buffer_kill_to_end(mirb_buffer *buf); +void mirb_buffer_kill_to_start(mirb_buffer *buf); +void mirb_buffer_kill_word_back(mirb_buffer *buf); +void mirb_buffer_kill_word_forward(mirb_buffer *buf); + +/* + * Yank (paste from kill buffer) + * Ctrl+Y + */ +mrb_bool mirb_buffer_yank(mirb_buffer *buf); + +/* + * Get current line content + */ +const char *mirb_buffer_current_line(mirb_buffer *buf); + +/* + * Get line at index + */ +const char *mirb_buffer_line_at(mirb_buffer *buf, size_t index); + +/* + * Get length of line at index + */ +size_t mirb_buffer_line_len(mirb_buffer *buf, size_t index); + +/* + * Get cursor display column (visual column for terminal positioning) + * Handles UTF-8 display width when MRB_UTF8_STRING is defined + */ +size_t mirb_buffer_cursor_display_col(mirb_buffer *buf); + +/* + * Check if character is a word character (alphanumeric or underscore) + */ +static inline mrb_bool +mirb_is_word_char(char c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || c == '_'; +} + +#endif /* MIRB_BUFFER_H */ diff --git a/mrbgems/mruby-bin-mirb/tools/mirb/mirb_completion.c b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_completion.c new file mode 100644 index 0000000000..cc82ddd2c1 --- /dev/null +++ b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_completion.c @@ -0,0 +1,756 @@ +/* +** mirb_completion.c - Tab completion support for mirb +** +** See Copyright Notice in mruby.h +*/ + +#include "mirb_completion.h" +#include "mirb_highlight.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* Windows compatibility */ +#ifdef _MSC_VER +#define strdup _strdup +#endif + +/* strndup is not available on Windows (MSVC and MinGW) */ +#ifdef _WIN32 +static char* +strndup(const char *s, size_t n) +{ + size_t len = strlen(s); + if (len > n) len = n; + char *p = (char*)malloc(len + 1); + if (p) { + memcpy(p, s, len); + p[len] = '\0'; + } + return p; +} +#endif + +#ifdef MRB_USE_READLINE +#ifndef MRB_USE_LINENOISE +#include MRB_READLINE_HEADER +#endif +#endif + +#ifdef MRB_USE_LINENOISE +#include +#endif + +/* ============================================================ + * Core Completion Engine + * ============================================================ */ + +void +mirb_completion_init(mirb_completion_ctx *ctx, mrb_state *mrb, mrb_ccontext *cxt) +{ + memset(ctx, 0, sizeof(*ctx)); + ctx->mrb = mrb; + ctx->cxt = cxt; +} + +void +mirb_completion_free(mirb_completion_ctx *ctx) +{ + int i; + + /* Free match prefix */ + if (ctx->match_prefix) { + free(ctx->match_prefix); + ctx->match_prefix = NULL; + } + + /* Free completions */ + if (ctx->completions) { + for (i = 0; i < ctx->completion_count; i++) { + free(ctx->completions[i]); + } + free(ctx->completions); + ctx->completions = NULL; + } + + ctx->completion_count = 0; + ctx->completion_alloc = 0; + ctx->current_index = 0; +} + +/* ============================================================ + * Context Analysis + * ============================================================ */ + +mirb_completion_type +mirb_detect_completion_type(const char *line, int cursor_pos) +{ + int i; + int in_string = 0; /* 0 = not in string, '"' or '\'' = in that string type */ + + /* First pass: determine if we're inside a string by scanning from start */ + for (i = 0; i < cursor_pos; i++) { + if (in_string) { + if (line[i] == '\\' && i + 1 < cursor_pos) { + i++; /* Skip escaped character */ + } + else if (line[i] == in_string) { + in_string = 0; /* End of string */ + } + } + else { + if (line[i] == '"' || line[i] == '\'') { + in_string = line[i]; /* Start of string */ + } + } + } + + /* If we're inside a string, check for file completion context */ + if (in_string) { + if (mirb_in_file_context(line, cursor_pos)) { + return COMPLETION_FILE; + } + return COMPLETION_KEYWORD; /* No completion inside strings */ + } + + /* Scan backwards from cursor to find context */ + for (i = cursor_pos - 1; i >= 0; i--) { + if (line[i] == '.') { + /* After dot = method completion */ + return COMPLETION_METHOD; + } + if (line[i] == '$') { + /* Global variable */ + return COMPLETION_GLOBAL_VAR; + } + if (line[i] == '"' || line[i] == '\'') { + /* This is a closing quote (we know we're not in a string) */ + /* Continue scanning to find if there's a dot before the string */ + continue; + } + if (ISSPACE(line[i]) || line[i] == '(' || line[i] == ',' || + line[i] == '[' || line[i] == '{' || line[i] == ';') { + /* Start of new expression */ + break; + } + } + + /* Default: complete everything at top level */ + return COMPLETION_KEYWORD; /* Includes keywords, locals, constants */ +} + +mrb_bool +mirb_in_file_context(const char *line, int quote_pos) +{ + int i; + + /* Look backwards for require or load keyword */ + for (i = quote_pos - 1; i >= 0; i--) { + if (ISSPACE(line[i])) continue; + + /* Check for 'require' or 'load' */ + if (i >= 6 && strncmp(&line[i-6], "require", 7) == 0) return TRUE; + if (i >= 3 && strncmp(&line[i-3], "load", 4) == 0) return TRUE; + + break; + } + return FALSE; +} + +/* Extract receiver expression before the dot */ +char * +mirb_extract_receiver(const char *line, int cursor_pos, int *recv_end) +{ + int depth = 0; /* Parentheses/bracket depth */ + int i, start = -1; + char *receiver; + int len; + + /* Find the dot before cursor */ + for (i = cursor_pos - 1; i >= 0; i--) { + if (line[i] == '.' && depth == 0) { + *recv_end = i; + break; + } + /* Track nesting depth for complex expressions */ + if (line[i] == ')' || line[i] == ']' || line[i] == '}') depth++; + if (line[i] == '(' || line[i] == '[' || line[i] == '{') depth--; + } + + if (i < 0) return NULL; /* No dot found */ + + /* Now find start of receiver expression */ + depth = 0; + for (start = i - 1; start >= 0; start--) { + char c = line[start]; + + if (c == ')' || c == ']' || c == '}') depth++; + if (c == '(' || c == '[' || c == '{') depth--; + + if (depth < 0) { + start++; + break; + } + + /* Break on operators/keywords at depth 0 */ + if (depth == 0 && (ISSPACE(c) || c == '=' || c == ',' || c == ';')) { + start++; + break; + } + } + + if (start < 0) start = 0; + + /* Allocate and copy receiver */ + len = i - start; + receiver = (char*)malloc(len + 1); + if (!receiver) return NULL; + + memcpy(receiver, line + start, len); + receiver[len] = '\0'; + + return receiver; +} + +/* ============================================================ + * Receiver Evaluation + * ============================================================ */ + +/* Check if receiver expression is simple (just a name, no method calls) */ +static mrb_bool +is_simple_receiver(const char *expr) +{ + int i; + int in_string = 0; + + /* Empty is not simple */ + if (!expr || expr[0] == '\0') return FALSE; + + /* Check if it's a safe expression to evaluate */ + for (i = 0; expr[i]; i++) { + char c = expr[i]; + + if (in_string) { + /* Inside string - allow anything except check for end */ + if (c == '\\' && expr[i+1]) { + i++; /* Skip escaped character */ + } + else if (c == in_string) { + in_string = 0; /* End of string */ + } + } + else { + /* Outside string */ + if (c == '"' || c == '\'') { + in_string = c; /* Start of string */ + } + else if (c == '(' || c == ')') { + /* Disallow method calls - could have side effects */ + return FALSE; + } + else if (!(ISALNUM(c) || c == '_' || c == ':' || c == '[' || c == ']' || + c == '{' || c == '}' || c == ',' || c == ' ' || c == '\t' || + c == '-' || c == '+' || c == '.' || c == '@')) { + /* Disallow unknown characters */ + return FALSE; + } + } + } + + /* Unclosed string is not valid */ + if (in_string) return FALSE; + + return TRUE; +} + +mrb_value +mirb_eval_receiver(mrb_state *mrb, const char *receiver_expr, mrb_ccontext *cxt) +{ + struct mrb_parser_state *parser; + struct RProc *proc; + mrb_value result; + int ai = mrb_gc_arena_save(mrb); + + /* Parse the receiver expression WITH compiler context to access local variables */ + parser = mrb_parse_string(mrb, receiver_expr, cxt); + if (!parser || parser->nerr > 0) { + if (parser) mrb_parser_free(parser); + return mrb_nil_value(); + } + + /* Generate and execute */ + proc = mrb_generate_code(mrb, parser); + mrb_parser_free(parser); + + if (!proc) { + return mrb_nil_value(); + } + + result = mrb_vm_run(mrb, proc, mrb_top_self(mrb), 0); + + /* Clear exception if any */ + if (mrb->exc) { + mrb->exc = NULL; + result = mrb_nil_value(); + } + + mrb_gc_arena_restore(mrb, ai); + return result; +} + +/* ============================================================ + * Method Completion + * ============================================================ */ + +/* Callback for mrb_mt_foreach */ +struct method_collector { + mirb_completion_ctx *ctx; + int count; +}; + +static int +collect_method_callback(mrb_state *mrb, mrb_sym sym, mrb_method_t method, void *data) +{ + struct method_collector *mc = (struct method_collector*)data; + const char *name = mrb_sym_name(mrb, sym); + + (void)method; /* Unused */ + + /* Skip internal methods (start with __) */ + if (name[0] == '_' && name[1] == '_') { + return 0; /* Continue iteration */ + } + + /* Add if matches prefix */ + mirb_add_completion(mc->ctx, name); + mc->count++; + + return 0; /* Continue */ +} + +void +mirb_complete_methods(mirb_completion_ctx *ctx, mrb_value receiver) +{ + struct RClass *klass = mrb_class(ctx->mrb, receiver); + struct method_collector mc = { ctx, 0 }; + + /* Walk up class hierarchy */ + while (klass) { + mrb_mt_foreach(ctx->mrb, klass, collect_method_callback, &mc); + klass = klass->super; + } +} + +/* ============================================================ + * Keyword and Variable Completion + * ============================================================ */ + +void +mirb_complete_keywords(mirb_completion_ctx *ctx) +{ + int i; + for (i = 0; mirb_keywords[i] != NULL; i++) { + mirb_add_completion(ctx, mirb_keywords[i]); + } +} + +void +mirb_complete_local_vars(mirb_completion_ctx *ctx) +{ + int i; + + /* Local variables from compiler context */ + if (ctx->cxt && ctx->cxt->syms) { + for (i = 0; i < (int)ctx->cxt->slen; i++) { + const char *name = mrb_sym_name(ctx->mrb, ctx->cxt->syms[i]); + if (name && name[0] != '_') { /* Skip underscore-only */ + mirb_add_completion(ctx, name); + } + } + } +} + +void +mirb_complete_global_vars(mirb_completion_ctx *ctx) +{ + mrb_value gvars; + mrb_int len, i; + int ai = mrb_gc_arena_save(ctx->mrb); + + /* Use Ruby to get global variables */ + gvars = mrb_funcall_argv(ctx->mrb, mrb_obj_value(ctx->mrb->kernel_module), + mrb_intern_lit(ctx->mrb, "global_variables"), + 0, NULL); + + if (ctx->mrb->exc) { + ctx->mrb->exc = NULL; + mrb_gc_arena_restore(ctx->mrb, ai); + return; + } + + if (mrb_array_p(gvars)) { + len = RARRAY_LEN(gvars); + + for (i = 0; i < len; i++) { + mrb_value sym = mrb_ary_entry(gvars, i); + mrb_sym s = mrb_symbol(sym); + const char *name = mrb_sym_name(ctx->mrb, s); + if (name) { + mirb_add_completion(ctx, name); + } + } + } + + mrb_gc_arena_restore(ctx->mrb, ai); +} + +void +mirb_complete_constants(mirb_completion_ctx *ctx, struct RClass *scope) +{ + mrb_value consts; + mrb_int len, i; + int ai = mrb_gc_arena_save(ctx->mrb); + + /* Use Ruby to get constants */ + consts = mrb_funcall_argv(ctx->mrb, + mrb_obj_value(scope ? scope : ctx->mrb->object_class), + mrb_intern_lit(ctx->mrb, "constants"), + 0, NULL); + + if (ctx->mrb->exc) { + ctx->mrb->exc = NULL; + mrb_gc_arena_restore(ctx->mrb, ai); + return; + } + + if (mrb_array_p(consts)) { + len = RARRAY_LEN(consts); + + for (i = 0; i < len; i++) { + mrb_value sym = mrb_ary_entry(consts, i); + mrb_sym s = mrb_symbol(sym); + const char *name = mrb_sym_name(ctx->mrb, s); + if (name) { + mirb_add_completion(ctx, name); + } + } + } + + mrb_gc_arena_restore(ctx->mrb, ai); +} + +void +mirb_complete_files(mirb_completion_ctx *ctx, const char *partial_path) +{ + /* File completion implementation would go here */ + /* For now, just a stub */ + (void)ctx; + (void)partial_path; +} + +/* ============================================================ + * Completion Management + * ============================================================ */ + +void +mirb_add_completion(mirb_completion_ctx *ctx, const char *text) +{ + char **new_completions; + int new_alloc; + + /* Check if matches prefix */ + if (ctx->prefix_len > 0) { + if (strncmp(text, ctx->match_prefix, ctx->prefix_len) != 0) { + return; /* Doesn't match */ + } + } + + /* Grow array if needed */ + if (ctx->completion_count >= ctx->completion_alloc) { + new_alloc = ctx->completion_alloc == 0 ? 16 : ctx->completion_alloc * 2; + new_completions = (char**)realloc(ctx->completions, + new_alloc * sizeof(char*)); + if (!new_completions) return; /* Out of memory */ + + ctx->completions = new_completions; + ctx->completion_alloc = new_alloc; + } + + /* Add completion */ + ctx->completions[ctx->completion_count] = strdup(text); + if (ctx->completions[ctx->completion_count]) { + ctx->completion_count++; + } +} + +void +mirb_generate_completions(mirb_completion_ctx *ctx, const char *line, int cursor_pos) +{ + mirb_completion_type type; + int i, recv_end; + char *receiver_expr; + mrb_value receiver; + + /* Store context */ + ctx->line_buf = line; + ctx->cursor_pos = cursor_pos; + + /* Extract prefix to match */ + for (i = cursor_pos - 1; i >= 0; i--) { + char c = line[i]; + if (!ISALNUM(c) && c != '_' && c != '?' && c != '!' && c != '$' && c != '@') { + break; + } + } + i++; /* Move to start of identifier */ + + if (ctx->match_prefix) { + free(ctx->match_prefix); + } + ctx->match_prefix = strndup(line + i, cursor_pos - i); + ctx->prefix_len = cursor_pos - i; + + /* Detect completion type */ + type = mirb_detect_completion_type(line, cursor_pos); + + /* Generate completions based on type */ + switch (type) { + case COMPLETION_METHOD: + receiver_expr = mirb_extract_receiver(line, cursor_pos, &recv_end); + if (receiver_expr) { + /* Only evaluate simple receivers to avoid corrupting VM state. + * Complex expressions like "obj.method()" are skipped for now. + * This prevents local variables from being cleared during tab completion. */ + if (is_simple_receiver(receiver_expr)) { + receiver = mirb_eval_receiver(ctx->mrb, receiver_expr, ctx->cxt); + if (!mrb_nil_p(receiver)) { + mirb_complete_methods(ctx, receiver); + } + } + free(receiver_expr); + } + break; + + case COMPLETION_GLOBAL_VAR: + mirb_complete_global_vars(ctx); + break; + + case COMPLETION_FILE: + mirb_complete_files(ctx, ctx->match_prefix); + break; + + case COMPLETION_LOCAL_VAR: + case COMPLETION_CONSTANT: + case COMPLETION_KEYWORD: + default: + /* Complete everything */ + mirb_complete_keywords(ctx); + mirb_complete_local_vars(ctx); + mirb_complete_constants(ctx, NULL); + break; + } +} + +/* ============================================================ + * Shared Completion Context + * ============================================================ */ + +static mirb_completion_ctx *g_ctx = NULL; + +static mrb_bool +init_completion_ctx(mrb_state *mrb, mrb_ccontext *cxt) +{ + if (g_ctx) return TRUE; + g_ctx = (mirb_completion_ctx*)malloc(sizeof(mirb_completion_ctx)); + if (!g_ctx) return FALSE; + mirb_completion_init(g_ctx, mrb, cxt); + return TRUE; +} + +void +mirb_cleanup_completion(void) +{ + if (g_ctx) { + mirb_completion_free(g_ctx); + free(g_ctx); + g_ctx = NULL; + } +} + +/* ============================================================ + * Readline/Libedit Adapter + * ============================================================ */ + +#ifdef MRB_USE_READLINE +#ifndef MRB_USE_LINENOISE + +static char * +mirb_readline_generator(const char *text, int state) +{ + (void)text; /* text is already in match_prefix */ + + /* state == 0: first call, generate completions */ + if (state == 0) { + mirb_completion_free(g_ctx); + + /* Generate completions based on full line */ + mirb_generate_completions(g_ctx, rl_line_buffer, rl_point); + + g_ctx->current_index = 0; + } + + /* Return next completion or NULL when done */ + if (g_ctx->current_index < g_ctx->completion_count) { + char *completion = g_ctx->completions[g_ctx->current_index]; + g_ctx->current_index++; + + /* readline will free this, so duplicate */ + return strdup(completion); + } + + return NULL; +} + +static char ** +mirb_readline_completion(const char *text, int start, int end) +{ + (void)start; + (void)end; + + /* Prevent default filename completion */ + rl_attempted_completion_over = 1; + + /* Use our generator */ + return rl_completion_matches(text, mirb_readline_generator); +} + +void +mirb_setup_readline_completion(mrb_state *mrb, mrb_ccontext *cxt) +{ + if (!init_completion_ctx(mrb, cxt)) return; + + /* Set completion function */ + rl_attempted_completion_function = mirb_readline_completion; + + /* Configure readline behavior - include . so "obj.method" are separate words */ + rl_basic_word_break_characters = " \t\n\"\\'`@$><=;|&{(."; + rl_completer_word_break_characters = " \t\n\"\\'`@$><=;|&{(."; +} + +#endif +#endif + +/* ============================================================ + * Linenoise Adapter + * ============================================================ */ + +#ifdef MRB_USE_LINENOISE + +static void +mirb_linenoise_completion(const char *buf, linenoiseCompletions *lc) +{ + int cursor_pos = (int)strlen(buf); /* linenoise completes at end */ + int i, prefix_start; + char completion_line[1024]; + + /* Clear previous completions */ + mirb_completion_free(g_ctx); + + /* Generate completions */ + mirb_generate_completions(g_ctx, buf, cursor_pos); + + /* Add each completion to linenoise */ + for (i = 0; i < g_ctx->completion_count; i++) { + /* Need to build full line with completion */ + prefix_start = cursor_pos - g_ctx->prefix_len; + + /* Copy line up to prefix */ + if (prefix_start > 0) { + memcpy(completion_line, buf, prefix_start); + } + + /* Add completion */ + strcpy(completion_line + prefix_start, g_ctx->completions[i]); + + linenoiseAddCompletion(lc, completion_line); + } +} + +void +mirb_setup_linenoise_completion(mrb_state *mrb, mrb_ccontext *cxt) +{ + if (!init_completion_ctx(mrb, cxt)) return; + + /* Set completion callback */ + linenoiseSetCompletionCallback(mirb_linenoise_completion); +} + +#endif + +/* ============================================================ + * Custom Editor Adapter + * ============================================================ */ + +void +mirb_setup_editor_completion(mrb_state *mrb, mrb_ccontext *cxt) +{ + init_completion_ctx(mrb, cxt); +} + +int +mirb_get_completions(const char *line, int cursor_pos, + char ***completions_out, int *prefix_len_out) +{ + int i; + + if (!g_ctx) { + *completions_out = NULL; + *prefix_len_out = 0; + return 0; + } + + /* Clear previous completions */ + mirb_completion_free(g_ctx); + + /* Generate completions */ + mirb_generate_completions(g_ctx, line, cursor_pos); + + /* Return results */ + *prefix_len_out = g_ctx->prefix_len; + + if (g_ctx->completion_count == 0) { + *completions_out = NULL; + return 0; + } + + /* Copy completions (caller will free) */ + *completions_out = (char**)malloc(g_ctx->completion_count * sizeof(char*)); + if (!*completions_out) return 0; + + for (i = 0; i < g_ctx->completion_count; i++) { + (*completions_out)[i] = strdup(g_ctx->completions[i]); + } + + return g_ctx->completion_count; +} + +void +mirb_free_completions(char **completions, int count) +{ + int i; + if (completions) { + for (i = 0; i < count; i++) { + free(completions[i]); + } + free(completions); + } +} diff --git a/mrbgems/mruby-bin-mirb/tools/mirb/mirb_completion.h b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_completion.h new file mode 100644 index 0000000000..1e462b70fb --- /dev/null +++ b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_completion.h @@ -0,0 +1,128 @@ +/* +** mirb_completion.h - Tab completion support for mirb +** +** See Copyright Notice in mruby.h +*/ + +#ifndef MIRB_COMPLETION_H +#define MIRB_COMPLETION_H + +#include +#include + +/** + * @file mirb_completion.h + * + * Tab completion support for mirb. + * + * Architecture: + * - Core engine is library-agnostic + * - Adapters for readline/libedit and linenoise + * - Context detection based on input line analysis + * - Safe evaluation of receiver expressions + * + * Completion Types: + * - COMPLETION_METHOD: After dot operator + * - COMPLETION_KEYWORD: Ruby keywords + * - COMPLETION_LOCAL_VAR: Variables in scope + * - COMPLETION_GLOBAL_VAR: $variables + * - COMPLETION_CONSTANT: Constants and classes + * - COMPLETION_FILE: File paths (optional) + * + * Performance: + * - Completions generated on-demand + * - Results cached per tab press + * - Safe evaluation with exception handling + */ + +/* Completion types */ +typedef enum { + COMPLETION_METHOD, /* Object methods */ + COMPLETION_KEYWORD, /* Ruby keywords */ + COMPLETION_GLOBAL_VAR, /* $global */ + COMPLETION_LOCAL_VAR, /* local_var */ + COMPLETION_CONSTANT, /* CONSTANT or Class */ + COMPLETION_FILE, /* File paths */ +} mirb_completion_type; + +/* Completion context - shared state */ +typedef struct mirb_completion_ctx { + mrb_state *mrb; /* mruby VM state */ + mrb_ccontext *cxt; /* Compiler context for locals */ + const char *line_buf; /* Current input line */ + int cursor_pos; /* Cursor position in line */ + char *match_prefix; /* Text to match against */ + int prefix_len; /* Length of prefix */ + + /* Completion results */ + char **completions; /* Array of completion strings */ + int completion_count; /* Number of completions */ + int completion_alloc; /* Allocated size */ + int current_index; /* For generator pattern (readline) */ +} mirb_completion_ctx; + +/* Core Completion Engine Interface */ + +/* Initialize completion context */ +void mirb_completion_init(mirb_completion_ctx *ctx, mrb_state *mrb, + mrb_ccontext *cxt); + +/* Free completion context */ +void mirb_completion_free(mirb_completion_ctx *ctx); + +/* Analyze line and generate completions */ +void mirb_generate_completions(mirb_completion_ctx *ctx, + const char *line, int cursor_pos); + +/* Get completion type from context */ +mirb_completion_type mirb_detect_completion_type(const char *line, + int cursor_pos); + +/* Individual completion generators */ +void mirb_complete_methods(mirb_completion_ctx *ctx, mrb_value receiver); +void mirb_complete_keywords(mirb_completion_ctx *ctx); +void mirb_complete_local_vars(mirb_completion_ctx *ctx); +void mirb_complete_global_vars(mirb_completion_ctx *ctx); +void mirb_complete_constants(mirb_completion_ctx *ctx, struct RClass *scope); +void mirb_complete_files(mirb_completion_ctx *ctx, const char *partial_path); + +/* Helper functions */ + +/* Add completion if matches prefix */ +void mirb_add_completion(mirb_completion_ctx *ctx, const char *text); + +/* Extract receiver expression from line */ +char *mirb_extract_receiver(const char *line, int cursor_pos, int *recv_end); + +/* Evaluate receiver expression to get object */ +mrb_value mirb_eval_receiver(mrb_state *mrb, const char *receiver_expr, mrb_ccontext *cxt); + +/* Check if in file completion context */ +mrb_bool mirb_in_file_context(const char *line, int quote_pos); + +/* Cleanup completion context (shared by all adapters) */ +void mirb_cleanup_completion(void); + +/* Readline/Libedit adapter setup */ +#ifdef MRB_USE_READLINE +#ifndef MRB_USE_LINENOISE +void mirb_setup_readline_completion(mrb_state *mrb, mrb_ccontext *cxt); +#endif +#endif + +/* Linenoise adapter setup */ +#ifdef MRB_USE_LINENOISE +void mirb_setup_linenoise_completion(mrb_state *mrb, mrb_ccontext *cxt); +#endif + +/* Custom editor adapter */ +void mirb_setup_editor_completion(mrb_state *mrb, mrb_ccontext *cxt); + +/* Get completions for custom editor - returns number of completions */ +int mirb_get_completions(const char *line, int cursor_pos, + char ***completions_out, int *prefix_len_out); + +/* Free completions returned by mirb_get_completions */ +void mirb_free_completions(char **completions, int count); + +#endif /* MIRB_COMPLETION_H */ diff --git a/mrbgems/mruby-bin-mirb/tools/mirb/mirb_editor.c b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_editor.c new file mode 100644 index 0000000000..5302aff915 --- /dev/null +++ b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_editor.c @@ -0,0 +1,1066 @@ +/* +** mirb_editor.c - Multi-line editor for mirb +** +** See Copyright Notice in mruby.h +*/ + +#include "mirb_editor.h" +#include +#include +#include + +/* ANSI color codes */ +#define COLOR_GREEN "\033[32m" + +/* + * Check if line contains only whitespace before given column + */ +static mrb_bool +line_is_blank_before(const char *line, size_t col) +{ + for (size_t i = 0; i < col; i++) { + if (line[i] != ' ' && line[i] != '\t') { + return FALSE; + } + } + return TRUE; +} + +/* + * Get leading whitespace count on current line + */ +static size_t +leading_spaces(const char *line) +{ + size_t count = 0; + while (line[count] == ' ' || line[count] == '\t') { + count++; + } + return count; +} + +/* + * Dedent keyword table: keywords that reduce indentation level. + * allow_eol: keyword can appear alone at end of line + * delims: valid non-NUL characters that can follow the keyword + */ +static const struct { + const char *word; + const char *delims; + mrb_bool allow_eol; +} dedent_table[] = { + {"else", " \t", TRUE}, + {"elsif", " ", FALSE}, + {"end", " \t.)", TRUE}, + {"ensure", " \t", TRUE}, + {"in", " ", FALSE}, + {"rescue", " \t", TRUE}, + {"when", " ", FALSE}, +}; + +static mrb_bool +is_dedent_keyword(const char *content) +{ + size_t i; + + if (content[0] == '}') return TRUE; + for (i = 0; i < sizeof(dedent_table)/sizeof(dedent_table[0]); i++) { + size_t len = strlen(dedent_table[i].word); + if (strncmp(content, dedent_table[i].word, len) == 0) { + char c = content[len]; + if (c == '\0') return dedent_table[i].allow_eol; + return strchr(dedent_table[i].delims, c) != NULL; + } + } + return FALSE; +} + +/* + * Check if a line contains only whitespace (or is empty) + */ +static mrb_bool +is_line_blank(const mirb_line *line) +{ + for (size_t i = 0; i < line->len; i++) { + if (line->data[i] != ' ' && line->data[i] != '\t') { + return FALSE; + } + } + return TRUE; +} + +/* + * Indent keyword table: keywords that affect indentation level. + * delta: +1 for block-opening, -1 for block-closing + * allow_eol: keyword can appear at end of line/string + * delims: valid non-NUL characters that can follow the keyword + */ +static const struct { + const char *word; + const char *delims; + mrb_bool allow_eol; + int delta; +} indent_table[] = { + {"begin", "\n #", TRUE, +1}, + {"case", " ", FALSE, +1}, + {"class", " ", FALSE, +1}, + {"def", " ", FALSE, +1}, + {"do", "\n #|", TRUE, +1}, + {"end", "\n #.)", TRUE, -1}, + {"for", " ", FALSE, +1}, + {"if", " ", FALSE, +1}, + {"module", " ", FALSE, +1}, + {"unless", " ", FALSE, +1}, + {"until", " ", FALSE, +1}, + {"while", " ", FALSE, +1}, +}; + +/* + * Calculate indent level by counting open blocks in code + */ +static int +calc_indent_level(const char *code) +{ + int level = 0; + const char *p = code; + int at_line_start = 1; + + while (*p) { + /* Skip strings */ + if (*p == '"' || *p == '\'') { + char quote = *p++; + while (*p && *p != quote) { + if (*p == '\\' && p[1]) p++; + p++; + } + if (*p) p++; + at_line_start = 0; + continue; + } + /* Skip comments */ + if (*p == '#') { + while (*p && *p != '\n') p++; + continue; + } + /* Track line starts for keyword detection */ + if (*p == '\n') { + at_line_start = 1; + p++; + continue; + } + /* Skip whitespace but don't change at_line_start yet */ + if (*p == ' ' || *p == '\t') { + p++; + continue; + } + /* Check for block keywords at word boundary */ + if (at_line_start || (p > code && !mirb_is_word_char(p[-1]))) { + size_t ki; + for (ki = 0; ki < sizeof(indent_table)/sizeof(indent_table[0]); ki++) { + size_t len = strlen(indent_table[ki].word); + if (strncmp(p, indent_table[ki].word, len) == 0) { + char c = p[len]; + if ((c == '\0' && indent_table[ki].allow_eol) || + (c != '\0' && strchr(indent_table[ki].delims, c))) { + level += indent_table[ki].delta; + if (level < 0) level = 0; + } + break; + } + } + } + /* Check for block opening/closing with braces */ + if (*p == '{') { + level++; + } + else if (*p == '}') { + if (level > 0) level--; + } + at_line_start = 0; + p++; + } + return level; +} + +/* + * Calculate expected indent level for the given line index. + * Uses code up to line_idx-1 to determine nesting depth. + */ +static int +calc_expected_indent(mirb_buffer *buf, size_t line_idx) +{ + int indent = 0; + char *partial; + + if (line_idx == 0) return 0; + partial = mirb_buffer_to_string_upto_line(buf, line_idx - 1); + if (partial) { + indent = calc_indent_level(partial); + free(partial); + } + return indent; +} + +/* + * Adjust current line's leading whitespace to target_spaces. + * Preserves cursor position relative to line content. + */ +static void +adjust_line_indent(mirb_buffer *buf, size_t target_spaces) +{ + size_t current_spaces = leading_spaces(mirb_buffer_current_line(buf)); + size_t saved_col = buf->cursor_col; + + if (target_spaces == current_spaces) return; + + if (target_spaces > current_spaces) { + size_t add = target_spaces - current_spaces; + buf->cursor_col = 0; + for (size_t i = 0; i < add; i++) { + mirb_buffer_insert_char(buf, ' '); + } + buf->cursor_col = saved_col + add; + } + else { + size_t to_remove = current_spaces - target_spaces; + buf->cursor_col = 0; + for (size_t i = 0; i < to_remove; i++) { + mirb_buffer_delete_forward(buf); + } + buf->cursor_col = (saved_col > to_remove) ? (saved_col - to_remove) : 0; + } +} + +/* + * Insert indent spaces at cursor position + */ +static void +insert_indent_spaces(mirb_buffer *buf, int indent_level) +{ + for (int i = 0; i < indent_level * 2; i++) { + mirb_buffer_insert_char(buf, ' '); + } +} + +/* + * Check if we should dedent after typing a character + * Returns TRUE if current line starts with 'end' or '}' after only whitespace + */ +static mrb_bool +should_dedent(mirb_buffer *buf, char last_char) +{ + const char *line = mirb_buffer_current_line(buf); + size_t col = buf->cursor_col; + size_t line_len = buf->lines[buf->cursor_line].len; + + /* Check for '}' - dedent immediately when typed at line start */ + if (last_char == '}') { + if (col == 1 || (col > 1 && line_is_blank_before(line, col - 1))) { + return TRUE; + } + } + + /* Helper macro: check keyword completion */ + #define CHECK_KEYWORD(keyword, len, trigger_char) \ + if (last_char == trigger_char && col >= len) { \ + if (strncmp(line + col - len, keyword, len) == 0) { \ + if (col == len || line_is_blank_before(line, col - len)) { \ + if (col == line_len || \ + line[col] == ' ' || line[col] == '\t' || line[col] == '\0' || \ + line[col] == '\n' || line[col] == '.' || line[col] == ')') { \ + return TRUE; \ + } \ + } \ + } \ + } + + /* Check for 'end' - dedent when 'd' completes "end" */ + CHECK_KEYWORD("end", 3, 'd'); + + /* Check for 'else' - dedent when 'e' completes "else" */ + CHECK_KEYWORD("else", 4, 'e'); + + /* Check for 'elsif' - dedent when 'f' completes "elsif" */ + CHECK_KEYWORD("elsif", 5, 'f'); + + /* Check for 'when' - dedent when 'n' completes "when" */ + CHECK_KEYWORD("when", 4, 'n'); + + /* Check for 'in' - dedent when 'n' completes "in" (pattern matching) */ + CHECK_KEYWORD("in", 2, 'n'); + + /* Check for 'rescue' - dedent when 'e' completes "rescue" */ + CHECK_KEYWORD("rescue", 6, 'e'); + + /* Check for 'ensure' - dedent when 'e' completes "ensure" */ + CHECK_KEYWORD("ensure", 6, 'e'); + + #undef CHECK_KEYWORD + + return FALSE; +} + +/* + * Perform dedentation - adjust indent for dedent keyword + */ +static void +perform_dedent(mirb_buffer *buf) +{ + int indent = calc_expected_indent(buf, buf->cursor_line); + if (indent > 0) indent--; + adjust_line_indent(buf, (size_t)(indent * 2)); +} + + +/* + * Re-indent current line to match expected indent level + * Used before inserting newline to fix any misaligned indentation + */ +static void +reindent_line(mirb_buffer *buf) +{ + mirb_line *line = &buf->lines[buf->cursor_line]; + int indent = calc_expected_indent(buf, buf->cursor_line); + const char *content = line->data + leading_spaces(line->data); + if (is_dedent_keyword(content)) { + if (indent > 0) indent--; + } + adjust_line_indent(buf, (size_t)(indent * 2)); +} + +/* + * Initialize editor + */ +mrb_bool +mirb_editor_init(mirb_editor *ed) +{ + memset(ed, 0, sizeof(*ed)); + + if (!mirb_term_init(&ed->term)) { + /* Terminal init may fail but we can still work in simple mode */ + } + + if (!mirb_buffer_init(&ed->buf)) { + mirb_term_cleanup(&ed->term); + return FALSE; + } + + if (!mirb_history_init(&ed->hist, MIRB_HISTORY_SIZE)) { + mirb_buffer_free(&ed->buf); + mirb_term_cleanup(&ed->term); + return FALSE; + } + + ed->prompt = "> "; + ed->prompt_cont = "* "; + ed->prompt_len = 2; + ed->prompt_cont_len = 2; + ed->prompt_fmt = NULL; + ed->prompt_cont_fmt = NULL; + ed->line_num_base = 1; + ed->use_color = FALSE; + mirb_highlight_init(&ed->highlight, FALSE); + ed->initialized = TRUE; + + return TRUE; +} + +/* + * Cleanup editor + */ +void +mirb_editor_cleanup(mirb_editor *ed) +{ + if (!ed->initialized) return; + + mirb_history_free(&ed->hist); + mirb_buffer_free(&ed->buf); + mirb_term_cleanup(&ed->term); + ed->initialized = FALSE; +} + +/* + * Set prompts (fixed strings) + */ +void +mirb_editor_set_prompts(mirb_editor *ed, const char *prompt, const char *prompt_cont) +{ + ed->prompt = prompt; + ed->prompt_cont = prompt_cont; + ed->prompt_len = strlen(prompt); + ed->prompt_cont_len = strlen(prompt_cont); + ed->prompt_fmt = NULL; + ed->prompt_cont_fmt = NULL; +} + +/* + * Set prompt format strings for line-numbered prompts + */ +void +mirb_editor_set_prompt_format(mirb_editor *ed, const char *prompt_fmt, + const char *prompt_cont_fmt, int line_num) +{ + ed->prompt_fmt = prompt_fmt; + ed->prompt_cont_fmt = prompt_cont_fmt; + ed->line_num_base = line_num; + /* Estimate prompt length (assuming line numbers up to 999) */ + ed->prompt_len = strlen(prompt_fmt) + 2; /* %d -> up to 3 digits, minus 2 for %d */ + ed->prompt_cont_len = strlen(prompt_cont_fmt) + 2; +} + +/* + * Set completion checker + */ +void +mirb_editor_set_check_complete(mirb_editor *ed, mirb_check_complete_fn *fn, void *user_data) +{ + ed->check_complete = fn; + ed->check_complete_data = user_data; +} + +/* + * Set tab completion callbacks + */ +void +mirb_editor_set_tab_complete(mirb_editor *ed, + mirb_tab_complete_fn *complete_fn, + mirb_tab_complete_free_fn *free_fn, + void *user_data) +{ + ed->tab_complete = complete_fn; + ed->tab_complete_free = free_fn; + ed->tab_complete_data = user_data; +} + +/* + * Handle tab completion + * Returns TRUE if completion was performed + */ + +/* + * Handle tab auto-indent + * Adjusts current line's indentation to match the expected level + */ +static void +handle_tab_indent(mirb_editor *ed) +{ + reindent_line(&ed->buf); +} + +static mrb_bool +handle_tab_completion(mirb_editor *ed) +{ + char **completions = NULL; + int count, prefix_len; + const char *current_line; + int cursor_col; + + if (!ed->tab_complete) return FALSE; + + /* Get current line and cursor position */ + current_line = ed->buf.lines[ed->buf.cursor_line].data; + cursor_col = (int)ed->buf.cursor_col; + + /* Get completions */ + count = ed->tab_complete(current_line, cursor_col, &completions, &prefix_len, + ed->tab_complete_data); + + if (count == 0 || !completions) { + return FALSE; + } + + if (count == 1) { + /* Single completion - insert it */ + const char *completion = completions[0]; + int i; + + /* Delete the prefix we're replacing */ + for (i = 0; i < prefix_len; i++) { + mirb_buffer_delete_back(&ed->buf); + } + + /* Insert completion */ + mirb_buffer_insert_string(&ed->buf, completion, strlen(completion)); + } + else { + /* Multiple completions - find common prefix and show options */ + int common_len = (int)strlen(completions[0]); + int i, j; + + /* Find longest common prefix */ + for (i = 1; i < count; i++) { + for (j = 0; j < common_len && completions[i][j]; j++) { + if (completions[0][j] != completions[i][j]) { + common_len = j; + break; + } + } + if (j < common_len) common_len = j; + } + + if (common_len > prefix_len) { + /* Extend with common prefix */ + for (i = 0; i < prefix_len; i++) { + mirb_buffer_delete_back(&ed->buf); + } + mirb_buffer_insert_string(&ed->buf, completions[0], common_len); + } + else { + /* Show all completions */ + printf("\r\n"); + for (i = 0; i < count; i++) { + printf("%s ", completions[i]); + if ((i + 1) % 4 == 0 && i + 1 < count) printf("\r\n"); + } + printf("\r\n"); + /* Force full redraw */ + ed->prev_line_count = 0; + } + } + + /* Free completions */ + if (ed->tab_complete_free) { + ed->tab_complete_free(completions, count, ed->tab_complete_data); + } + + return TRUE; +} + +/* + * Enable/disable color + */ +void +mirb_editor_set_color(mirb_editor *ed, mrb_bool enable) +{ + ed->use_color = enable; + mirb_highlight_init(&ed->highlight, enable); +} + +/* + * Check if multi-line editing is supported + */ +mrb_bool +mirb_editor_supported(mirb_editor *ed) +{ + return ed->term.supported; +} + +/* + * Calculate prompt length for given line + */ +static size_t +calc_prompt_len(mirb_editor *ed, size_t line_idx) +{ + if (ed->prompt_fmt != NULL) { + /* Format string: calculate actual length */ + int line_num = ed->line_num_base + (int)line_idx; + const char *fmt = (line_idx == 0) ? ed->prompt_fmt : ed->prompt_cont_fmt; + return (size_t)snprintf(NULL, 0, fmt, line_num); + } + else { + /* Fixed prompt string */ + return (line_idx == 0) ? ed->prompt_len : ed->prompt_cont_len; + } +} + +/* + * Print prompt for given line + */ +static void +print_prompt(mirb_editor *ed, size_t line_idx) +{ + int line_num = ed->line_num_base + (int)line_idx; + + if (ed->use_color) { + printf("%s", COLOR_GREEN); + } + + if (ed->prompt_fmt != NULL) { + /* Use format string with line number */ + const char *fmt = (line_idx == 0) ? ed->prompt_fmt : ed->prompt_cont_fmt; + printf(fmt, line_num); + } + else { + /* Use fixed prompt string */ + const char *p = (line_idx == 0) ? ed->prompt : ed->prompt_cont; + printf("%s", p); + } + + if (ed->use_color) { + printf("%s", COLOR_RESET); + } +} + +/* + * Refresh display - uses natural terminal scrolling like irb + * + * Strategy: + * - Track which screen row we started on + * - Move cursor back to start, clear everything below, redraw all lines + * - This allows terminal to scroll naturally without corrupting history + */ +static void +refresh_display(mirb_editor *ed) +{ + size_t lines_to_go_up; + + /* Calculate how many lines up we need to go to reach start of input */ + /* We're currently on cursor_line, and prev_line_count tells us total displayed */ + if (ed->prev_line_count > 0) { + /* Go up from current position to first line of input */ + lines_to_go_up = ed->display_cursor_row; + if (lines_to_go_up > 0) { + mirb_term_cursor_up((int)lines_to_go_up); + } + } + + /* Move to column 1 and clear from here to end of screen */ + mirb_term_cursor_col(1); + mirb_term_clear_below(); + + /* Reset highlight state for fresh scan */ + mirb_highlight_reset(&ed->highlight); + + /* Redraw all lines */ + for (size_t i = 0; i < ed->buf.line_count; i++) { + print_prompt(ed, i); + mirb_highlight_print_line(&ed->highlight, mirb_buffer_line_at(&ed->buf, i)); + + if (i < ed->buf.line_count - 1) { + printf("\r\n"); + } + } + + /* Now position cursor correctly */ + /* We're at the end of last line, need to go to cursor position */ + size_t lines_up_from_end = ed->buf.line_count - 1 - ed->buf.cursor_line; + if (lines_up_from_end > 0) { + mirb_term_cursor_up((int)lines_up_from_end); + } + + /* Position column on cursor line (calculate actual prompt length) */ + size_t prompt_len = calc_prompt_len(ed, ed->buf.cursor_line); + size_t display_col = mirb_buffer_cursor_display_col(&ed->buf); + mirb_term_cursor_col((int)(prompt_len + display_col + 1)); + + /* Update tracking */ + ed->prev_line_count = ed->buf.line_count; + ed->display_cursor_row = ed->buf.cursor_line; + + mirb_term_flush(); +} + +/* + * Handle a keypress + * Returns TRUE to continue editing, FALSE to finish + */ +static mrb_bool +handle_key(mirb_editor *ed, int key, mirb_edit_result *result) +{ + switch (key) { + case MIRB_KEY_ENTER: + /* Stop history browsing */ + mirb_history_browse_stop(&ed->hist); + /* Re-indent current line before inserting newline */ + reindent_line(&ed->buf); + { + /* + * Smart Enter behavior: + * - Only evaluate when cursor is at end of last line and code is complete + * - If cursor is not at end of last line, always insert/split (no evaluation) + * - If next line is blank last line and cursor at end, move to it + */ + mrb_bool at_last_line = (ed->buf.cursor_line == ed->buf.line_count - 1); + mirb_line *current_line = &ed->buf.lines[ed->buf.cursor_line]; + mrb_bool at_end_of_line = (ed->buf.cursor_col == current_line->len); + mrb_bool can_evaluate = at_last_line && at_end_of_line; + + /* Check for smart navigation to existing blank last line */ + if (!at_last_line && at_end_of_line) { + size_t next_line_idx = ed->buf.cursor_line + 1; + mrb_bool next_is_last = (next_line_idx == ed->buf.line_count - 1); + + if (next_is_last) { + mirb_line *next_line = &ed->buf.lines[next_line_idx]; + if (is_line_blank(next_line)) { + /* Move to existing blank last line with proper indentation */ + int indent = calc_expected_indent(&ed->buf, ed->buf.line_count); + mirb_buffer_cursor_down(&ed->buf); + /* Clear existing whitespace and set correct indent */ + mirb_line *line = &ed->buf.lines[ed->buf.cursor_line]; + line->len = 0; + line->data[0] = '\0'; + ed->buf.cursor_col = 0; + insert_indent_spaces(&ed->buf, indent); + return TRUE; + } + } + } + + /* If cursor in middle of line and next is blank last, remove it before split */ + if (!at_last_line && !at_end_of_line) { + size_t next_line_idx = ed->buf.cursor_line + 1; + if (next_line_idx == ed->buf.line_count - 1) { + mirb_line *next_line = &ed->buf.lines[next_line_idx]; + if (is_line_blank(next_line)) { + mirb_buffer_delete_line(&ed->buf, next_line_idx); + } + } + } + + /* Check if input is complete - only when cursor at end of last line */ + if (can_evaluate && ed->check_complete) { + char *code = mirb_buffer_to_string(&ed->buf); + if (code) { + mrb_bool complete = ed->check_complete(code, ed->check_complete_data); + if (complete) { + free(code); + *result = MIRB_EDIT_OK; + return FALSE; + } + /* Code not complete - add new line with indentation */ + int indent = calc_indent_level(code); + free(code); + mirb_buffer_newline(&ed->buf); + insert_indent_spaces(&ed->buf, indent); + return TRUE; + } + } + + /* Not at end of last line - just insert/split with appropriate indent */ + { + int indent = calc_expected_indent(&ed->buf, ed->buf.cursor_line + 1); + mirb_buffer_newline(&ed->buf); + + /* Check if new line starts with dedenting keyword */ + mirb_line *new_line = &ed->buf.lines[ed->buf.cursor_line]; + if (is_dedent_keyword(new_line->data)) { + if (indent > 0) indent--; + } + + insert_indent_spaces(&ed->buf, indent); + return TRUE; + } + } + + case MIRB_KEY_CTRL_C: + *result = MIRB_EDIT_INTERRUPT; + return FALSE; + + case MIRB_KEY_CTRL_D: + if (mirb_buffer_total_len(&ed->buf) == 0) { + *result = MIRB_EDIT_EOF; + return FALSE; + } + /* Delete forward if not empty */ + mirb_buffer_delete_forward(&ed->buf); + return TRUE; + + case MIRB_KEY_BACKSPACE: + mirb_buffer_delete_back(&ed->buf); + return TRUE; + + case MIRB_KEY_DELETE: + mirb_buffer_delete_forward(&ed->buf); + return TRUE; + + case MIRB_KEY_LEFT: + case MIRB_KEY_CTRL_B: + mirb_buffer_cursor_left(&ed->buf); + return TRUE; + + case MIRB_KEY_RIGHT: + case MIRB_KEY_CTRL_F: + mirb_buffer_cursor_right(&ed->buf); + return TRUE; + + case MIRB_KEY_UP: + case MIRB_KEY_CTRL_P: + /* If on first line, navigate history; otherwise move cursor up */ + if (ed->buf.cursor_line == 0) { + /* Start history browsing if not already */ + if (!ed->hist.browsing) { + char *current = mirb_buffer_to_string(&ed->buf); + mirb_history_browse_start(&ed->hist, current); + free(current); + } + const char *prev = mirb_history_prev(&ed->hist); + if (prev) { + mirb_buffer_set_string(&ed->buf, prev); + mirb_buffer_cursor_finish(&ed->buf); + } + } + else { + mirb_buffer_cursor_up(&ed->buf); + } + return TRUE; + + case MIRB_KEY_DOWN: + case MIRB_KEY_CTRL_N: + /* If on last line, navigate history; otherwise move cursor down */ + if (ed->buf.cursor_line == ed->buf.line_count - 1) { + if (ed->hist.browsing) { + const char *next = mirb_history_next(&ed->hist); + if (next) { + mirb_buffer_set_string(&ed->buf, next); + mirb_buffer_cursor_finish(&ed->buf); + } + } + } + else { + mirb_buffer_cursor_down(&ed->buf); + } + return TRUE; + + case MIRB_KEY_HOME: + case MIRB_KEY_CTRL_A: + mirb_buffer_cursor_home(&ed->buf); + return TRUE; + + case MIRB_KEY_END: + case MIRB_KEY_CTRL_E: + mirb_buffer_cursor_end(&ed->buf); + return TRUE; + + case MIRB_KEY_CTRL_K: + mirb_buffer_kill_to_end(&ed->buf); + return TRUE; + + case MIRB_KEY_CTRL_U: + mirb_buffer_kill_to_start(&ed->buf); + return TRUE; + + case MIRB_KEY_CTRL_W: + mirb_buffer_kill_word_back(&ed->buf); + return TRUE; + + case MIRB_KEY_CTRL_Y: + mirb_buffer_yank(&ed->buf); + return TRUE; + + case MIRB_KEY_ALT_B: + mirb_buffer_cursor_word_back(&ed->buf); + return TRUE; + + case MIRB_KEY_ALT_F: + mirb_buffer_cursor_word_forward(&ed->buf); + return TRUE; + + case MIRB_KEY_ALT_D: + mirb_buffer_kill_word_forward(&ed->buf); + return TRUE; + + case MIRB_KEY_CTRL_L: + /* Clear screen and refresh */ + mirb_term_clear_screen(); + ed->prev_line_count = 0; + return TRUE; + + case MIRB_KEY_TAB: + /* Auto-indent if at start/end of line or preceded by whitespace */ + { + mirb_line *line = &ed->buf.lines[ed->buf.cursor_line]; + mrb_bool do_indent = FALSE; + + if (ed->buf.cursor_col == 0) { + do_indent = TRUE; + } + else if (ed->buf.cursor_col == line->len) { + /* At end of line */ + do_indent = TRUE; + } + else { + char prev_char = line->data[ed->buf.cursor_col - 1]; + if (prev_char == ' ' || prev_char == '\t') { + do_indent = TRUE; + } + } + + if (do_indent) { + handle_tab_indent(ed); + } + else { + handle_tab_completion(ed); + } + } + return TRUE; + + default: + /* Insert printable characters */ + if (key >= 32 && key < 127) { + /* Stop history browsing when user types */ + mirb_history_browse_stop(&ed->hist); + mirb_buffer_insert_char(&ed->buf, (char)key); + /* Check for auto-dedent after typing 'end' or '}' */ + if (should_dedent(&ed->buf, (char)key)) { + perform_dedent(&ed->buf); + } + } +#ifdef MRB_UTF8_STRING + /* Handle UTF-8 multibyte characters (bytes >= 0x80) */ + else if (key >= 128 && key <= 255) { + mirb_history_browse_stop(&ed->hist); + mirb_buffer_insert_char(&ed->buf, (char)key); + } +#endif + return TRUE; + } +} + +/* + * Read input with multi-line editing + */ +mirb_edit_result +mirb_editor_read(mirb_editor *ed, char **out_str) +{ + mirb_edit_result result; + int key; + + *out_str = NULL; + + /* Fall back to simple mode if raw mode not supported */ + if (!ed->term.supported) { + return mirb_editor_read_simple(ed, out_str); + } + + /* Clear buffer for new input */ + mirb_buffer_clear(&ed->buf); + ed->prev_line_count = 0; + ed->display_cursor_row = 0; + + /* Enable raw mode */ + if (!mirb_term_raw_enable(&ed->term)) { + return mirb_editor_read_simple(ed, out_str); + } + + /* Initial display */ + print_prompt(ed, 0); + mirb_term_flush(); + ed->prev_line_count = 1; + ed->display_cursor_row = 0; + + /* Main editing loop */ + result = MIRB_EDIT_ERROR; + while (1) { + key = mirb_term_read_key(&ed->term); + if (key == MIRB_KEY_NONE) { + result = MIRB_EDIT_ERROR; + break; + } + + if (!handle_key(ed, key, &result)) { + break; + } + + refresh_display(ed); + } + + /* Disable raw mode */ + mirb_term_raw_disable(&ed->term); + + /* Move to end and print newline */ + if (ed->buf.cursor_line < ed->buf.line_count - 1) { + mirb_term_cursor_down((int)(ed->buf.line_count - 1 - ed->buf.cursor_line)); + } + printf("\n"); + + /* Return result string */ + if (result == MIRB_EDIT_OK) { + *out_str = mirb_buffer_to_string(&ed->buf); + if (*out_str == NULL) { + result = MIRB_EDIT_ERROR; + } + } + + return result; +} + +/* + * Simple single-line input (fallback) + */ +mirb_edit_result +mirb_editor_read_simple(mirb_editor *ed, char **out_str) +{ + char line[4096]; + size_t total_len = 0; + size_t total_cap = 4096; + char *total = (char*)malloc(total_cap); + mrb_bool first_line = TRUE; + + *out_str = NULL; + + if (total == NULL) return MIRB_EDIT_ERROR; + total[0] = '\0'; + + while (1) { + /* Print prompt */ + print_prompt(ed, first_line ? 0 : 1); + fflush(stdout); + + /* Read line */ + if (fgets(line, sizeof(line), stdin) == NULL) { + if (total_len == 0) { + free(total); + return MIRB_EDIT_EOF; + } + break; + } + + /* Remove trailing newline */ + size_t len = strlen(line); + if (len > 0 && line[len - 1] == '\n') { + line[--len] = '\0'; + } + + /* Append to total */ + if (!first_line) { + /* Add newline separator */ + if (total_len + 1 >= total_cap) { + total_cap *= 2; + char *new_total = (char*)realloc(total, total_cap); + if (new_total == NULL) { + free(total); + return MIRB_EDIT_ERROR; + } + total = new_total; + } + total[total_len++] = '\n'; + } + + if (total_len + len >= total_cap) { + total_cap *= 2; + char *new_total = (char*)realloc(total, total_cap); + if (new_total == NULL) { + free(total); + return MIRB_EDIT_ERROR; + } + total = new_total; + } + memcpy(total + total_len, line, len + 1); + total_len += len; + + first_line = FALSE; + + /* Check if complete */ + if (ed->check_complete) { + if (ed->check_complete(total, ed->check_complete_data)) { + break; + } + } + else { + break; /* No checker, single line mode */ + } + } + + *out_str = total; + return MIRB_EDIT_OK; +} + +/* + * Add entry to history + */ +void +mirb_editor_history_add(mirb_editor *ed, const char *entry) +{ + mirb_history_add(&ed->hist, entry); +} diff --git a/mrbgems/mruby-bin-mirb/tools/mirb/mirb_editor.h b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_editor.h new file mode 100644 index 0000000000..fe50da3252 --- /dev/null +++ b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_editor.h @@ -0,0 +1,153 @@ +/* +** mirb_editor.h - Multi-line editor for mirb +** +** See Copyright Notice in mruby.h +*/ + +#ifndef MIRB_EDITOR_H +#define MIRB_EDITOR_H + +#include +#include "mirb_term.h" +#include "mirb_buffer.h" +#include "mirb_history.h" +#include "mirb_highlight.h" + +/* + * Editor result codes + */ +typedef enum mirb_edit_result { + MIRB_EDIT_OK = 0, /* Input ready (Enter pressed) */ + MIRB_EDIT_CONTINUE, /* Need more input (multi-line) */ + MIRB_EDIT_EOF, /* End of file (Ctrl+D on empty) */ + MIRB_EDIT_INTERRUPT, /* Interrupted (Ctrl+C) */ + MIRB_EDIT_ERROR /* Error occurred */ +} mirb_edit_result; + +/* + * Callback to check if input is complete + * Returns TRUE if the code is syntactically complete + */ +typedef mrb_bool mirb_check_complete_fn(const char *code, void *user_data); + +/* + * Callback for tab completion + * Returns number of completions, sets completions_out and prefix_len_out + * Caller must free completions using mirb_tab_complete_free_fn + */ +typedef int mirb_tab_complete_fn(const char *line, int cursor_pos, + char ***completions_out, int *prefix_len_out, + void *user_data); + +/* + * Callback to free tab completions + */ +typedef void mirb_tab_complete_free_fn(char **completions, int count, void *user_data); + +/* + * Editor state + */ +typedef struct mirb_editor { + mirb_term term; /* terminal state */ + mirb_buffer buf; /* editing buffer */ + mirb_history hist; /* command history */ + + const char *prompt; /* primary prompt (e.g., "> ") */ + const char *prompt_cont; /* continuation prompt (e.g., "* ") */ + size_t prompt_len; /* length of primary prompt */ + size_t prompt_cont_len; /* length of continuation prompt */ + + const char *prompt_fmt; /* prompt format string (e.g., "%d> ") */ + const char *prompt_cont_fmt;/* continuation format (e.g., "%d* ") */ + int line_num_base; /* starting line number for prompts */ + + mirb_check_complete_fn *check_complete; /* completion checker */ + void *check_complete_data; /* user data for checker */ + + mirb_tab_complete_fn *tab_complete; /* tab completion callback */ + mirb_tab_complete_free_fn *tab_complete_free; /* free completions callback */ + void *tab_complete_data; /* user data for tab completion */ + + size_t display_cursor_row; /* cursor row in buffer (for refresh tracking) */ + size_t prev_line_count; /* line count from last refresh */ + + mrb_bool initialized; /* editor is initialized */ + mrb_bool use_color; /* use colored output */ + + mirb_highlighter highlight; /* syntax highlighting state */ +} mirb_editor; + +/* + * Initialize editor + * Returns TRUE on success + */ +mrb_bool mirb_editor_init(mirb_editor *ed); + +/* + * Cleanup editor + */ +void mirb_editor_cleanup(mirb_editor *ed); + +/* + * Set prompts (fixed strings) + */ +void mirb_editor_set_prompts(mirb_editor *ed, + const char *prompt, + const char *prompt_cont); + +/* + * Set prompt format strings for line-numbered prompts + * Format strings should contain %d for line number (e.g., "%d> ", "%d* ") + * line_num is the starting line number + */ +void mirb_editor_set_prompt_format(mirb_editor *ed, + const char *prompt_fmt, + const char *prompt_cont_fmt, + int line_num); + +/* + * Set completion checker callback + */ +void mirb_editor_set_check_complete(mirb_editor *ed, + mirb_check_complete_fn *fn, + void *user_data); + +/* + * Set tab completion callbacks + */ +void mirb_editor_set_tab_complete(mirb_editor *ed, + mirb_tab_complete_fn *complete_fn, + mirb_tab_complete_free_fn *free_fn, + void *user_data); + +/* + * Enable or disable colored output + */ +void mirb_editor_set_color(mirb_editor *ed, mrb_bool enable); + +/* + * Check if multi-line editing is supported + */ +mrb_bool mirb_editor_supported(mirb_editor *ed); + +/* + * Read input with multi-line editing + * + * Returns result code (OK, EOF, INTERRUPT, ERROR) + * On success (OK), caller must free the returned string + */ +mirb_edit_result mirb_editor_read(mirb_editor *ed, char **out_str); + +/* + * Simple single-line input (fallback when raw mode not supported) + * Used internally but can be called directly + */ +mirb_edit_result mirb_editor_read_simple(mirb_editor *ed, char **out_str); + +/* + * Add entry to history + * Called after successful command execution + */ +void mirb_editor_history_add(mirb_editor *ed, const char *entry); + +#endif /* MIRB_EDITOR_H */ diff --git a/mrbgems/mruby-bin-mirb/tools/mirb/mirb_highlight.c b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_highlight.c new file mode 100644 index 0000000000..44b4bb8d21 --- /dev/null +++ b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_highlight.c @@ -0,0 +1,508 @@ +/* +** mirb_highlight.c - Syntax highlighting for mirb +** +** See Copyright Notice in mruby.h +*/ + +#include "mirb_highlight.h" +#include "mirb_buffer.h" +#include "mirb_term.h" +#include +#include +#include + +/* ANSI color codes - using standard 16-color palette for compatibility */ + +/* Dark theme colors (bright/light colors on dark background) */ +#define DARK_KEYWORD "\033[1;35m" /* bold magenta */ +#define DARK_STRING "\033[32m" /* green */ +#define DARK_COMMENT "\033[90m" /* bright black (gray) */ +#define DARK_NUMBER "\033[36m" /* cyan */ +#define DARK_SYMBOL "\033[33m" /* yellow */ +#define DARK_CONSTANT "\033[1;33m" /* bold yellow */ +#define DARK_IVAR "\033[34m" /* blue */ +#define DARK_GVAR "\033[1;34m" /* bold blue */ +#define DARK_REGEXP "\033[31m" /* red */ +#define DARK_RESULT "\033[36m" /* cyan (same as number) */ +#define DARK_ERROR "\033[1;31m" /* bold red */ +#define DARK_ARROW "\033[90m" /* gray */ + +/* Light theme colors (dark colors on light background) */ +#define LIGHT_KEYWORD "\033[35m" /* magenta */ +#define LIGHT_STRING "\033[32m" /* green */ +#define LIGHT_COMMENT "\033[37m" /* white (light gray) */ +#define LIGHT_NUMBER "\033[36m" /* cyan */ +#define LIGHT_SYMBOL "\033[33m" /* yellow */ +#define LIGHT_CONSTANT "\033[33m" /* yellow */ +#define LIGHT_IVAR "\033[34m" /* blue */ +#define LIGHT_GVAR "\033[34m" /* blue */ +#define LIGHT_REGEXP "\033[31m" /* red */ +#define LIGHT_RESULT "\033[36m" /* cyan */ +#define LIGHT_ERROR "\033[31m" /* red */ +#define LIGHT_ARROW "\033[90m" /* gray */ + +/* Keyword list - sorted alphabetically for bsearch, NULL-terminated */ +const char *mirb_keywords[] = { + "BEGIN", "END", "__ENCODING__", "__FILE__", "__LINE__", + "alias", "and", "begin", "break", "case", "class", "def", + "defined?", "do", "else", "elsif", "end", "ensure", "false", + "for", "if", "in", "module", "next", "nil", "not", "or", + "redo", "rescue", "retry", "return", "self", "super", "then", + "true", "undef", "unless", "until", "when", "while", "yield", + NULL +}; +const size_t mirb_num_keywords = sizeof(mirb_keywords) / sizeof(mirb_keywords[0]) - 1; + +static int +keyword_cmp(const void *a, const void *b) +{ + return strcmp((const char *)a, *(const char **)b); +} + +static mrb_bool +is_keyword(const char *word, size_t len) +{ + char buf[32]; + + if (len >= sizeof(buf)) return FALSE; + memcpy(buf, word, len); + buf[len] = '\0'; + return bsearch(buf, mirb_keywords, mirb_num_keywords, sizeof(mirb_keywords[0]), keyword_cmp) != NULL; +} + +static mrb_bool +is_word_start(char c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; +} + +static mrb_bool +is_upper(char c) +{ + return c >= 'A' && c <= 'Z'; +} + +/* + * Get color code for token type based on theme + */ +static const char * +get_color(mirb_highlighter *hl, mirb_token_type type) +{ + if (!hl->enabled) return ""; + + if (hl->theme == MIRB_THEME_DARK) { + switch (type) { + case MIRB_TOK_KEYWORD: return DARK_KEYWORD; + case MIRB_TOK_STRING: return DARK_STRING; + case MIRB_TOK_COMMENT: return DARK_COMMENT; + case MIRB_TOK_NUMBER: return DARK_NUMBER; + case MIRB_TOK_SYMBOL: return DARK_SYMBOL; + case MIRB_TOK_CONSTANT: return DARK_CONSTANT; + case MIRB_TOK_IVAR: return DARK_IVAR; + case MIRB_TOK_GVAR: return DARK_GVAR; + case MIRB_TOK_REGEXP: return DARK_REGEXP; + default: return ""; + } + } + else { + switch (type) { + case MIRB_TOK_KEYWORD: return LIGHT_KEYWORD; + case MIRB_TOK_STRING: return LIGHT_STRING; + case MIRB_TOK_COMMENT: return LIGHT_COMMENT; + case MIRB_TOK_NUMBER: return LIGHT_NUMBER; + case MIRB_TOK_SYMBOL: return LIGHT_SYMBOL; + case MIRB_TOK_CONSTANT: return LIGHT_CONSTANT; + case MIRB_TOK_IVAR: return LIGHT_IVAR; + case MIRB_TOK_GVAR: return LIGHT_GVAR; + case MIRB_TOK_REGEXP: return LIGHT_REGEXP; + default: return ""; + } + } +} + +static const char * +get_reset(mirb_highlighter *hl) +{ + return hl->enabled ? COLOR_RESET : ""; +} + +/* + * Print n characters with specified color + */ +static void +print_colored(mirb_highlighter *hl, const char *start, size_t len, mirb_token_type type) +{ + const char *color = get_color(hl, type); + const char *reset = get_reset(hl); + + if (*color) printf("%s", color); + fwrite(start, 1, len, stdout); + if (*color) printf("%s", reset); +} + +/* Cached terminal background color from pre-query */ +static mirb_bg_color cached_bg_color = MIRB_BG_UNKNOWN; +static mrb_bool bg_color_queried = FALSE; + +/* + * Pre-query terminal background color + * Must be called before any output to avoid response appearing on screen + */ +void +mirb_highlight_query_terminal(void) +{ + if (!bg_color_queried) { + cached_bg_color = mirb_term_query_bg_color(500); /* 500ms timeout */ + bg_color_queried = TRUE; + } +} + +/* + * Detect theme from terminal background color + * + * Priority: + * 1. MIRB_THEME environment variable (explicit override) + * 2. Cached OSC 11 result (from mirb_highlight_query_terminal) + * 3. COLORFGBG environment variable (rxvt, some xterm) + * 4. Default to dark theme + */ +mirb_theme +mirb_highlight_detect_theme(void) +{ + const char *env; + + /* 1. Check explicit MIRB_THEME first (user override) */ + env = getenv("MIRB_THEME"); + if (env) { + if (strcmp(env, "light") == 0) return MIRB_THEME_LIGHT; + if (strcmp(env, "dark") == 0) return MIRB_THEME_DARK; + } + + /* 2. Use cached OSC 11 result (must call mirb_highlight_query_terminal first) */ + if (cached_bg_color == MIRB_BG_LIGHT) return MIRB_THEME_LIGHT; + if (cached_bg_color == MIRB_BG_DARK) return MIRB_THEME_DARK; + + /* 3. Check COLORFGBG (format: "fg;bg" where bg > 6 usually means light) */ + env = getenv("COLORFGBG"); + if (env) { + const char *semi = strchr(env, ';'); + if (semi) { + int bg_color = atoi(semi + 1); + /* Background colors 7, 15, or high values typically mean light theme */ + if (bg_color == 7 || bg_color == 15 || (bg_color >= 230 && bg_color <= 255)) { + return MIRB_THEME_LIGHT; + } + /* Low values (0-6, 8) typically mean dark theme */ + if (bg_color <= 8) { + return MIRB_THEME_DARK; + } + } + } + + /* 4. Default to dark theme (more common in terminals) */ + return MIRB_THEME_DARK; +} + +void +mirb_highlight_init(mirb_highlighter *hl, mrb_bool enabled) +{ + memset(hl, 0, sizeof(*hl)); + hl->enabled = enabled; + if (enabled) { + hl->theme = mirb_highlight_detect_theme(); + } +} + +void +mirb_highlight_set_theme(mirb_highlighter *hl, mirb_theme theme) +{ + hl->theme = theme; +} + +void +mirb_highlight_reset(mirb_highlighter *hl) +{ + hl->in_string = FALSE; + hl->string_quote = 0; + hl->in_heredoc = FALSE; + hl->in_regexp = FALSE; +} + +/* + * Print a line with syntax highlighting + */ +void +mirb_highlight_print_line(mirb_highlighter *hl, const char *line) +{ + const char *p = line; + const char *token_start; + + if (!hl->enabled) { + printf("%s", line); + return; + } + + /* Handle continuation of multi-line string */ + if (hl->in_string) { + token_start = p; + while (*p) { + if (*p == '\\' && p[1]) { + p += 2; + continue; + } + if (*p == hl->string_quote) { + p++; + print_colored(hl, token_start, (size_t)(p - token_start), MIRB_TOK_STRING); + hl->in_string = FALSE; + break; + } + p++; + } + if (hl->in_string) { + /* String continues to next line */ + print_colored(hl, token_start, (size_t)(p - token_start), MIRB_TOK_STRING); + return; + } + } + + while (*p) { + /* Comment - rest of line */ + if (*p == '#') { + print_colored(hl, p, strlen(p), MIRB_TOK_COMMENT); + return; + } + + /* Strings */ + if (*p == '"' || *p == '\'') { + char quote = *p; + token_start = p++; + while (*p) { + if (*p == '\\' && p[1]) { + p += 2; + continue; + } + if (*p == quote) { + p++; + break; + } + p++; + } + if (p[-1] == quote) { + print_colored(hl, token_start, (size_t)(p - token_start), MIRB_TOK_STRING); + } + else { + /* Unterminated string - continues to next line */ + print_colored(hl, token_start, (size_t)(p - token_start), MIRB_TOK_STRING); + hl->in_string = TRUE; + hl->string_quote = quote; + return; + } + continue; + } + + /* Percent strings: %q{...}, %Q{...}, %w{...}, etc. */ + if (*p == '%' && p[1] && strchr("qQwWiIxsr", p[1])) { + char open = p[2]; + char close = 0; + int depth = 1; + + token_start = p; + if (open == '(' || open == '{' || open == '[' || open == '<') { + close = (open == '(') ? ')' : (open == '{') ? '}' : (open == '[') ? ']' : '>'; + p += 3; + while (*p && depth > 0) { + if (*p == '\\' && p[1]) { + p += 2; + continue; + } + if (*p == open) depth++; + else if (*p == close) depth--; + p++; + } + print_colored(hl, token_start, (size_t)(p - token_start), MIRB_TOK_STRING); + continue; + } + else if (open) { + /* Non-paired delimiter like %q!...! */ + p += 3; + while (*p && *p != open) { + if (*p == '\\' && p[1]) { + p += 2; + continue; + } + p++; + } + if (*p == open) p++; + print_colored(hl, token_start, (size_t)(p - token_start), MIRB_TOK_STRING); + continue; + } + /* Not a percent string, fall through */ + } + + /* Symbols: :symbol or :"string" */ + if (*p == ':' && p[1] && (is_word_start(p[1]) || p[1] == '"' || p[1] == '\'')) { + token_start = p++; + if (*p == '"' || *p == '\'') { + /* Quoted symbol */ + char quote = *p++; + while (*p && *p != quote) { + if (*p == '\\' && p[1]) { + p += 2; + continue; + } + p++; + } + if (*p == quote) p++; + } + else { + /* Regular symbol */ + while (*p && (mirb_is_word_char(*p) || *p == '?' || *p == '!')) p++; + } + print_colored(hl, token_start, (size_t)(p - token_start), MIRB_TOK_SYMBOL); + continue; + } + + /* Instance variables: @var */ + if (*p == '@') { + token_start = p++; + if (*p == '@') p++; /* @@class_var */ + while (*p && mirb_is_word_char(*p)) p++; + print_colored(hl, token_start, (size_t)(p - token_start), MIRB_TOK_IVAR); + continue; + } + + /* Global variables: $var */ + if (*p == '$') { + token_start = p++; + /* Special globals like $!, $?, $1, etc. */ + if (*p && !mirb_is_word_char(*p) && *p != ' ') { + p++; + } + else { + while (*p && mirb_is_word_char(*p)) p++; + } + print_colored(hl, token_start, (size_t)(p - token_start), MIRB_TOK_GVAR); + continue; + } + + /* Numbers */ + if ((*p >= '0' && *p <= '9') || + (*p == '-' && p[1] >= '0' && p[1] <= '9' && (p == line || !mirb_is_word_char(p[-1])))) { + token_start = p; + if (*p == '-') p++; + if (*p == '0' && (p[1] == 'x' || p[1] == 'X')) { + /* Hex */ + p += 2; + while ((*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') || + (*p >= 'A' && *p <= 'F') || *p == '_') p++; + } + else if (*p == '0' && (p[1] == 'b' || p[1] == 'B')) { + /* Binary */ + p += 2; + while (*p == '0' || *p == '1' || *p == '_') p++; + } + else if (*p == '0' && (p[1] == 'o' || p[1] == 'O')) { + /* Octal */ + p += 2; + while ((*p >= '0' && *p <= '7') || *p == '_') p++; + } + else { + /* Decimal or float */ + while ((*p >= '0' && *p <= '9') || *p == '_') p++; + if (*p == '.' && p[1] >= '0' && p[1] <= '9') { + p++; + while ((*p >= '0' && *p <= '9') || *p == '_') p++; + } + if (*p == 'e' || *p == 'E') { + p++; + if (*p == '+' || *p == '-') p++; + while ((*p >= '0' && *p <= '9') || *p == '_') p++; + } + } + /* Suffix like 'i' for complex or 'r' for rational */ + if (*p == 'i' || *p == 'r') p++; + print_colored(hl, token_start, (size_t)(p - token_start), MIRB_TOK_NUMBER); + continue; + } + + /* Identifiers and keywords */ + if (is_word_start(*p)) { + token_start = p; + mrb_bool is_const = is_upper(*p); + /* Check if preceded by dot (method call like obj.class) */ + mrb_bool after_dot = (token_start > line && token_start[-1] == '.'); + while (*p && (mirb_is_word_char(*p) || *p == '?' || *p == '!')) p++; + + size_t len = (size_t)(p - token_start); + + /* Check for hash key symbol syntax: identifier followed by ': ' */ + if (*p == ':' && (p[1] == ' ' || p[1] == '\0' || p[1] == ',' || p[1] == '}')) { + p++; /* include the colon */ + print_colored(hl, token_start, (size_t)(p - token_start), MIRB_TOK_SYMBOL); + } + else if (is_const) { + print_colored(hl, token_start, len, MIRB_TOK_CONSTANT); + } + else if (!after_dot && is_keyword(token_start, len)) { + print_colored(hl, token_start, len, MIRB_TOK_KEYWORD); + } + else { + fwrite(token_start, 1, len, stdout); + } + continue; + } + + /* Regular expression (simple heuristic: after =~, !~ or at line start after if/unless/when) */ + /* This is tricky - for now just output as-is */ + + /* Default: just output character */ + putchar(*p++); + } +} + +/* + * Print result value with highlighting + */ +void +mirb_highlight_print_result(mirb_highlighter *hl, const char *result) +{ + if (!hl->enabled) { + fputs(" => ", stdout); + fputs(result, stdout); + putchar('\n'); + return; + } + + /* Print arrow in gray */ + if (hl->theme == MIRB_THEME_DARK) { + fputs(DARK_ARROW " => " COLOR_RESET, stdout); + } + else { + fputs(LIGHT_ARROW " => " COLOR_RESET, stdout); + } + /* Syntax highlight the result value */ + mirb_highlight_print_line(hl, result); + putchar('\n'); +} + +/* + * Print error message with highlighting + */ +void +mirb_highlight_print_error(mirb_highlighter *hl, const char *error) +{ + if (!hl->enabled) { + fputs(error, stdout); + putchar('\n'); + return; + } + + if (hl->theme == MIRB_THEME_DARK) { + fputs(DARK_ERROR, stdout); + } + else { + fputs(LIGHT_ERROR, stdout); + } + fputs(error, stdout); + fputs(COLOR_RESET "\n", stdout); +} diff --git a/mrbgems/mruby-bin-mirb/tools/mirb/mirb_highlight.h b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_highlight.h new file mode 100644 index 0000000000..06d9e9a111 --- /dev/null +++ b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_highlight.h @@ -0,0 +1,104 @@ +/* +** mirb_highlight.h - Syntax highlighting for mirb +** +** See Copyright Notice in mruby.h +*/ + +#ifndef MIRB_HIGHLIGHT_H +#define MIRB_HIGHLIGHT_H + +#include +#include + +/* + * Token types for syntax highlighting + */ +typedef enum mirb_token_type { + MIRB_TOK_DEFAULT, /* default text */ + MIRB_TOK_KEYWORD, /* if, else, def, class, end, etc. */ + MIRB_TOK_STRING, /* "..." or '...' */ + MIRB_TOK_COMMENT, /* # to end of line */ + MIRB_TOK_NUMBER, /* integers, floats */ + MIRB_TOK_SYMBOL, /* :symbol */ + MIRB_TOK_CONSTANT, /* Uppercase identifiers */ + MIRB_TOK_IVAR, /* @instance_var */ + MIRB_TOK_GVAR, /* $global_var */ + MIRB_TOK_REGEXP, /* /regexp/ */ + MIRB_TOK_MAX +} mirb_token_type; + +/* + * Color theme + */ +typedef enum mirb_theme { + MIRB_THEME_DARK, /* light text on dark background (default) */ + MIRB_THEME_LIGHT /* dark text on light background */ +} mirb_theme; + +/* + * Highlighter state + */ +typedef struct mirb_highlighter { + mirb_theme theme; + mrb_bool enabled; + /* Multi-line state tracking */ + mrb_bool in_string; + char string_quote; /* '"' or '\'' */ + mrb_bool in_heredoc; + mrb_bool in_regexp; +} mirb_highlighter; + +/* + * Ruby keyword list (sorted alphabetically, NULL-terminated) + */ +extern const char *mirb_keywords[]; +extern const size_t mirb_num_keywords; + +/* + * Initialize highlighter with auto-detected or specified theme + */ +void mirb_highlight_init(mirb_highlighter *hl, mrb_bool enabled); + +/* + * Set theme explicitly + */ +void mirb_highlight_set_theme(mirb_highlighter *hl, mirb_theme theme); + +/* + * Detect theme from environment variables and terminal query + * Returns MIRB_THEME_DARK if cannot detect + */ +mirb_theme mirb_highlight_detect_theme(void); + +/* + * Pre-query terminal background color (call before any output) + * This caches the result for later use by mirb_highlight_detect_theme() + */ +void mirb_highlight_query_terminal(void); + +/* + * Print a line with syntax highlighting + * Handles multi-line strings/comments by tracking state + */ +void mirb_highlight_print_line(mirb_highlighter *hl, const char *line); + +/* + * Reset multi-line state (call when starting new input) + */ +void mirb_highlight_reset(mirb_highlighter *hl); + +/* + * Print result value with highlighting + * Prints " => " prefix and the result string with appropriate colors + */ +void mirb_highlight_print_result(mirb_highlighter *hl, const char *result); + +/* + * Print error message with highlighting + */ +void mirb_highlight_print_error(mirb_highlighter *hl, const char *error); + +/* Common ANSI reset code shared across modules */ +#define COLOR_RESET "\033[0m" + +#endif /* MIRB_HIGHLIGHT_H */ diff --git a/mrbgems/mruby-bin-mirb/tools/mirb/mirb_history.c b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_history.c new file mode 100644 index 0000000000..6d20e2920b --- /dev/null +++ b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_history.c @@ -0,0 +1,185 @@ +/* +** mirb_history.c - Command history for mirb editor +** +** See Copyright Notice in mruby.h +*/ + +#include "mirb_history.h" +#include +#include +#include + +/* Windows compatibility */ +#ifdef _MSC_VER +#define strdup _strdup +#endif + +/* + * Initialize history + */ +mrb_bool +mirb_history_init(mirb_history *hist, size_t capacity) +{ + memset(hist, 0, sizeof(*hist)); + + if (capacity == 0) capacity = MIRB_HISTORY_SIZE; + + hist->entries = (char**)calloc(capacity, sizeof(char*)); + if (hist->entries == NULL) return FALSE; + + hist->capacity = capacity; + hist->count = 0; + hist->start = 0; + hist->pos = 0; + hist->saved_input = NULL; + hist->browsing = FALSE; + + return TRUE; +} + +/* + * Free history resources + */ +void +mirb_history_free(mirb_history *hist) +{ + if (hist->entries) { + for (size_t i = 0; i < hist->capacity; i++) { + free(hist->entries[i]); + } + free(hist->entries); + hist->entries = NULL; + } + free(hist->saved_input); + hist->saved_input = NULL; + hist->count = 0; + hist->capacity = 0; +} + +/* + * Get actual index in circular buffer + */ +static size_t +actual_index(mirb_history *hist, size_t logical_idx) +{ + return (hist->start + logical_idx) % hist->capacity; +} + +/* + * Add entry to history + */ +void +mirb_history_add(mirb_history *hist, const char *entry) +{ + if (entry == NULL || entry[0] == '\0') return; + + /* Don't add if same as last entry */ + if (hist->count > 0) { + size_t last_idx = actual_index(hist, hist->count - 1); + if (strcmp(hist->entries[last_idx], entry) == 0) { + return; + } + } + + char *copy = strdup(entry); + if (copy == NULL) return; + + if (hist->count < hist->capacity) { + /* Still have room */ + size_t idx = actual_index(hist, hist->count); + hist->entries[idx] = copy; + hist->count++; + } + else { + /* Buffer is full, overwrite oldest */ + size_t idx = hist->start; + free(hist->entries[idx]); + hist->entries[idx] = copy; + hist->start = (hist->start + 1) % hist->capacity; + } + + /* Reset browsing state */ + hist->browsing = FALSE; + hist->pos = hist->count; +} + +/* + * Start browsing history + */ +void +mirb_history_browse_start(mirb_history *hist, const char *current_input) +{ + if (hist->browsing) return; + + free(hist->saved_input); + hist->saved_input = current_input ? strdup(current_input) : NULL; + hist->browsing = TRUE; + hist->pos = hist->count; /* Start past the end (at current input) */ +} + +/* + * Stop browsing history + */ +void +mirb_history_browse_stop(mirb_history *hist) +{ + free(hist->saved_input); + hist->saved_input = NULL; + hist->browsing = FALSE; + hist->pos = hist->count; +} + +/* + * Get previous entry (older) + */ +const char * +mirb_history_prev(mirb_history *hist) +{ + if (hist->count == 0) return NULL; + + if (!hist->browsing) { + /* Should call browse_start first, but handle gracefully */ + hist->browsing = TRUE; + hist->pos = hist->count; + } + + if (hist->pos == 0) { + /* Already at oldest entry */ + return NULL; + } + + hist->pos--; + return hist->entries[actual_index(hist, hist->pos)]; +} + +/* + * Get next entry (newer) + */ +const char * +mirb_history_next(mirb_history *hist) +{ + if (!hist->browsing) return NULL; + + if (hist->pos >= hist->count) { + /* Already at current input */ + return NULL; + } + + hist->pos++; + + if (hist->pos >= hist->count) { + /* Moved past newest entry, return saved input */ + return hist->saved_input ? hist->saved_input : ""; + } + + return hist->entries[actual_index(hist, hist->pos)]; +} + +/* + * Get current entry count + */ +size_t +mirb_history_count(mirb_history *hist) +{ + return hist->count; +} diff --git a/mrbgems/mruby-bin-mirb/tools/mirb/mirb_history.h b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_history.h new file mode 100644 index 0000000000..6ac074c196 --- /dev/null +++ b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_history.h @@ -0,0 +1,76 @@ +/* +** mirb_history.h - Command history for mirb editor +** +** See Copyright Notice in mruby.h +*/ + +#ifndef MIRB_HISTORY_H +#define MIRB_HISTORY_H + +#include +#include + +/* Default history size */ +#define MIRB_HISTORY_SIZE 100 + +/* + * History entry + */ +typedef struct mirb_history { + char **entries; /* array of history entries */ + size_t capacity; /* max number of entries */ + size_t count; /* current number of entries */ + size_t start; /* index of oldest entry (circular buffer) */ + size_t pos; /* current browsing position */ + char *saved_input; /* saved current input when browsing */ + mrb_bool browsing; /* TRUE if currently browsing history */ +} mirb_history; + +/* + * Initialize history + * Returns TRUE on success + */ +mrb_bool mirb_history_init(mirb_history *hist, size_t capacity); + +/* + * Free history resources + */ +void mirb_history_free(mirb_history *hist); + +/* + * Add entry to history + * Empty strings are not added + * Duplicate of last entry is not added + */ +void mirb_history_add(mirb_history *hist, const char *entry); + +/* + * Start browsing history + * Saves the current input for later restoration + */ +void mirb_history_browse_start(mirb_history *hist, const char *current_input); + +/* + * Stop browsing history + */ +void mirb_history_browse_stop(mirb_history *hist); + +/* + * Get previous entry (older) + * Returns NULL if at oldest entry or history is empty + */ +const char *mirb_history_prev(mirb_history *hist); + +/* + * Get next entry (newer) + * Returns saved input if moving past newest entry + * Returns NULL if not browsing + */ +const char *mirb_history_next(mirb_history *hist); + +/* + * Get current entry count + */ +size_t mirb_history_count(mirb_history *hist); + +#endif /* MIRB_HISTORY_H */ diff --git a/mrbgems/mruby-bin-mirb/tools/mirb/mirb_term.c b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_term.c new file mode 100644 index 0000000000..68759e2622 --- /dev/null +++ b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_term.c @@ -0,0 +1,491 @@ +/* +** mirb_term.c - Terminal control for mirb multi-line editor +** +** See Copyright Notice in mruby.h +*/ + +#include "mirb_term.h" +#include +#include +#include + +#if !defined(_WIN32) && !defined(_WIN64) +#include +#include +#include +#include +#include + +/* + * Initialize terminal state + */ +mrb_bool +mirb_term_init(mirb_term *term) +{ + memset(term, 0, sizeof(*term)); + + /* Check if stdin/stdout are terminals */ + if (!isatty(STDIN_FILENO) || !isatty(STDOUT_FILENO)) { + term->supported = FALSE; + return FALSE; + } + + term->supported = TRUE; + term->orig_termios = malloc(sizeof(struct termios)); + if (term->orig_termios == NULL) { + term->supported = FALSE; + return FALSE; + } + + mirb_term_get_size(term); + return TRUE; +} + +/* + * Cleanup terminal state + */ +void +mirb_term_cleanup(mirb_term *term) +{ + if (term->raw_mode) { + mirb_term_raw_disable(term); + } + free(term->orig_termios); + term->orig_termios = NULL; +} + +/* + * Enable raw mode + */ +mrb_bool +mirb_term_raw_enable(mirb_term *term) +{ + struct termios raw; + + if (!term->supported) return FALSE; + if (term->raw_mode) return TRUE; + + /* Save original settings */ + if (tcgetattr(STDIN_FILENO, (struct termios*)term->orig_termios) == -1) { + return FALSE; + } + + raw = *(struct termios*)term->orig_termios; + + /* + * Input flags: disable break signal, CR to NL conversion, + * parity checking, strip high bit, and software flow control + */ + raw.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON); + + /* Output flags: disable post-processing */ + raw.c_oflag &= ~(OPOST); + + /* Control flags: set 8-bit characters */ + raw.c_cflag |= (CS8); + + /* + * Local flags: disable echo, canonical mode, + * extended input processing, and signal generation + */ + raw.c_lflag &= ~(ECHO | ICANON | IEXTEN | ISIG); + + /* Control characters: return immediately with any available input */ + raw.c_cc[VMIN] = 1; + raw.c_cc[VTIME] = 0; + + if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw) == -1) { + return FALSE; + } + + term->raw_mode = TRUE; + return TRUE; +} + +/* + * Disable raw mode + */ +void +mirb_term_raw_disable(mirb_term *term) +{ + if (term->raw_mode && term->orig_termios) { + tcsetattr(STDIN_FILENO, TCSAFLUSH, (struct termios*)term->orig_termios); + term->raw_mode = FALSE; + } +} + +/* + * Read a single key, handling escape sequences + */ +int +mirb_term_read_key(mirb_term *term) +{ + unsigned char c; + ssize_t nread; + + (void)term; /* unused in POSIX implementation */ + + /* Read first character */ + while ((nread = read(STDIN_FILENO, &c, 1)) != 1) { + if (nread == -1 && errno != EAGAIN && errno != EINTR) { + return MIRB_KEY_NONE; + } + } + + /* Handle escape sequences */ + if (c == 27) { + unsigned char seq[3]; + fd_set fds; + struct timeval tv; + + /* Use select to check if more characters are available */ + FD_ZERO(&fds); + FD_SET(STDIN_FILENO, &fds); + tv.tv_sec = 0; + tv.tv_usec = 50000; /* 50ms timeout */ + + if (select(STDIN_FILENO + 1, &fds, NULL, NULL, &tv) <= 0) { + return MIRB_KEY_ESC; /* Just ESC key */ + } + + if (read(STDIN_FILENO, &seq[0], 1) != 1) return MIRB_KEY_ESC; + + /* Alt+key combinations (ESC followed by letter) */ + if (seq[0] >= 'a' && seq[0] <= 'z') { + switch (seq[0]) { + case 'b': return MIRB_KEY_ALT_B; + case 'f': return MIRB_KEY_ALT_F; + case 'd': return MIRB_KEY_ALT_D; + default: return MIRB_KEY_ESC; + } + } + + /* CSI sequences: ESC [ ... */ + if (seq[0] == '[') { + if (read(STDIN_FILENO, &seq[1], 1) != 1) return MIRB_KEY_ESC; + + /* Numeric sequences: ESC [ N ~ */ + if (seq[1] >= '0' && seq[1] <= '9') { + if (read(STDIN_FILENO, &seq[2], 1) != 1) return MIRB_KEY_ESC; + if (seq[2] == '~') { + switch (seq[1]) { + case '1': return MIRB_KEY_HOME; + case '3': return MIRB_KEY_DELETE; + case '4': return MIRB_KEY_END; + case '7': return MIRB_KEY_HOME; + case '8': return MIRB_KEY_END; + } + } + return MIRB_KEY_ESC; + } + + /* Letter sequences: ESC [ A/B/C/D/H/F */ + switch (seq[1]) { + case 'A': return MIRB_KEY_UP; + case 'B': return MIRB_KEY_DOWN; + case 'C': return MIRB_KEY_RIGHT; + case 'D': return MIRB_KEY_LEFT; + case 'H': return MIRB_KEY_HOME; + case 'F': return MIRB_KEY_END; + } + return MIRB_KEY_ESC; + } + + /* SS3 sequences: ESC O ... */ + if (seq[0] == 'O') { + if (read(STDIN_FILENO, &seq[1], 1) != 1) return MIRB_KEY_ESC; + switch (seq[1]) { + case 'A': return MIRB_KEY_UP; + case 'B': return MIRB_KEY_DOWN; + case 'C': return MIRB_KEY_RIGHT; + case 'D': return MIRB_KEY_LEFT; + case 'H': return MIRB_KEY_HOME; + case 'F': return MIRB_KEY_END; + } + return MIRB_KEY_ESC; + } + + return MIRB_KEY_ESC; + } + + /* Handle Ctrl+H as backspace (some terminals send this) */ + if (c == 8) return MIRB_KEY_BACKSPACE; + + return (int)c; +} + +/* + * Get terminal size + */ +void +mirb_term_get_size(mirb_term *term) +{ + struct winsize ws; + + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &ws) == 0 && ws.ws_col > 0) { + term->cols = ws.ws_col; + term->rows = ws.ws_row; + } + else { + /* Default fallback */ + term->cols = 80; + term->rows = 24; + } +} + +#else /* Windows */ + +/* + * Windows implementation (minimal stub) + * Full Windows console support would require significant additional code + */ + +mrb_bool +mirb_term_init(mirb_term *term) +{ + memset(term, 0, sizeof(*term)); + term->supported = FALSE; /* Not implemented for Windows yet */ + term->cols = 80; + term->rows = 24; + return FALSE; +} + +void +mirb_term_cleanup(mirb_term *term) +{ + (void)term; +} + +mrb_bool +mirb_term_raw_enable(mirb_term *term) +{ + (void)term; + return FALSE; +} + +void +mirb_term_raw_disable(mirb_term *term) +{ + (void)term; +} + +int +mirb_term_read_key(mirb_term *term) +{ + (void)term; + return MIRB_KEY_NONE; +} + +void +mirb_term_get_size(mirb_term *term) +{ + term->cols = 80; + term->rows = 24; +} + +#endif /* _WIN32 */ + +/* + * ANSI escape sequence functions (platform-independent) + */ + +void +mirb_term_cursor_up(int n) +{ + if (n > 0) printf("\033[%dA", n); +} + +void +mirb_term_cursor_down(int n) +{ + if (n > 0) printf("\033[%dB", n); +} + +void +mirb_term_cursor_right(int n) +{ + if (n > 0) printf("\033[%dC", n); +} + +void +mirb_term_cursor_left(int n) +{ + if (n > 0) printf("\033[%dD", n); +} + +void +mirb_term_cursor_col(int col) +{ + printf("\033[%dG", col); +} + +void +mirb_term_clear_line(void) +{ + printf("\033[2K"); +} + +void +mirb_term_clear_to_end(void) +{ + printf("\033[K"); +} + +void +mirb_term_clear_screen(void) +{ + printf("\033[2J\033[H"); +} + +void +mirb_term_flush(void) +{ + fflush(stdout); +} + +void +mirb_term_clear_below(void) +{ + printf("\033[J"); +} + +#if !defined(_WIN32) && !defined(_WIN64) +/* + * Query terminal background color using OSC 11 escape sequence + * + * Protocol: + * Send: ESC ] 11 ; ? ESC \ (or BEL instead of ESC \) + * Recv: ESC ] 11 ; rgb:RRRR/GGGG/BBBB ESC \ + * + * The response uses 16-bit color values (0000-FFFF per component). + * Some terminals use 8-bit (00-FF) format instead. + */ +mirb_bg_color +mirb_term_query_bg_color(int timeout_ms) +{ + struct termios old_term, new_term; + char buf[64]; + size_t total = 0; + ssize_t n; + int r, g, b; + mirb_bg_color result = MIRB_BG_UNKNOWN; + fd_set fds; + struct timeval tv; + const char *p; + + /* Must be a terminal */ + if (!isatty(STDIN_FILENO) || !isatty(STDOUT_FILENO)) { + return MIRB_BG_UNKNOWN; + } + + /* Save original terminal settings */ + if (tcgetattr(STDIN_FILENO, &old_term) < 0) { + return MIRB_BG_UNKNOWN; + } + + /* Flush any pending input first */ + tcflush(STDIN_FILENO, TCIFLUSH); + + /* Disable echo and canonical mode for raw read */ + new_term = old_term; + new_term.c_lflag &= ~(ICANON | ECHO | ECHOE | ECHOK | ECHONL); + new_term.c_iflag &= ~(IXON | IXOFF | ICRNL); + new_term.c_cc[VMIN] = 0; + new_term.c_cc[VTIME] = 0; + + /* TCSAFLUSH: flush I/O and apply settings */ + if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &new_term) < 0) { + return MIRB_BG_UNKNOWN; + } + + /* Wait for terminal settings to take effect */ + tcdrain(STDOUT_FILENO); + + /* Send OSC 11 query: ESC ] 11 ; ? ESC \ */ + if (write(STDOUT_FILENO, "\033]11;?\033\\", 8) != 8) { + goto restore; + } + /* Ensure query is sent to terminal */ + tcdrain(STDOUT_FILENO); + /* Give terminal time to process and respond */ + usleep(50000); /* 50ms */ + + /* Read response with timeout - loop to get complete response */ + while (total < sizeof(buf) - 1) { + int sel; + FD_ZERO(&fds); + FD_SET(STDIN_FILENO, &fds); + tv.tv_sec = timeout_ms / 1000; + tv.tv_usec = (timeout_ms % 1000) * 1000; + + sel = select(STDIN_FILENO + 1, &fds, NULL, NULL, &tv); + if (sel < 0) { + if (errno == EINTR) continue; /* Retry on signal interrupt */ + break; /* Other error */ + } + if (sel == 0) break; /* Timeout */ + + n = read(STDIN_FILENO, buf + total, sizeof(buf) - 1 - total); + if (n <= 0) { + if (n < 0 && errno == EINTR) continue; /* Retry on signal interrupt */ + break; + } + total += (size_t)n; + + /* Check for terminator: ESC \ (0x1b 0x5c) or BEL (0x07) */ + if (total >= 2 && buf[total-2] == '\033' && buf[total-1] == '\\') break; + if (total >= 1 && buf[total-1] == '\007') break; + + /* Reduce timeout for subsequent reads */ + timeout_ms = 10; + } + + if (total == 0) { + goto restore; + } + buf[total] = '\0'; + + /* Parse response: look for "rgb:" followed by hex values */ + p = strstr(buf, "rgb:"); + if (p) { + p += 4; + /* Try 16-bit format: rgb:RRRR/GGGG/BBBB */ + if (sscanf(p, "%4x/%4x/%4x", &r, &g, &b) == 3) { + /* Normalize to 8-bit range */ + r >>= 8; g >>= 8; b >>= 8; + } + /* Try 8-bit format: rgb:RR/GG/BB */ + else if (sscanf(p, "%2x/%2x/%2x", &r, &g, &b) == 3) { + /* Already 8-bit */ + } + else { + goto restore; + } + + /* Calculate relative luminance (ITU-R BT.709 simplified) */ + /* Y = 0.2126*R + 0.7152*G + 0.0722*B */ + /* Scale: 0-255 input, threshold at ~127.5 */ + int luminance = (2126 * r + 7152 * g + 722 * b) / 10000; + result = (luminance < 128) ? MIRB_BG_DARK : MIRB_BG_LIGHT; + } + +restore: + /* Flush any remaining input before restoring terminal */ + tcflush(STDIN_FILENO, TCIFLUSH); + tcsetattr(STDIN_FILENO, TCSAFLUSH, &old_term); + return result; +} + +#else /* Windows */ + +mirb_bg_color +mirb_term_query_bg_color(int timeout_ms) +{ + (void)timeout_ms; + /* Windows Terminal supports OSC 11, but implementation requires + * different I/O handling. For now, return unknown and rely on + * fallback detection methods. */ + return MIRB_BG_UNKNOWN; +} + +#endif /* _WIN32 */ diff --git a/mrbgems/mruby-bin-mirb/tools/mirb/mirb_term.h b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_term.h new file mode 100644 index 0000000000..8c9c5d1f26 --- /dev/null +++ b/mrbgems/mruby-bin-mirb/tools/mirb/mirb_term.h @@ -0,0 +1,137 @@ +/* +** mirb_term.h - Terminal control for mirb multi-line editor +** +** See Copyright Notice in mruby.h +*/ + +#ifndef MIRB_TERM_H +#define MIRB_TERM_H + +#include + +/* + * Key codes for mirb editor + * Values > 255 are used for special keys to avoid collision with ASCII + */ +enum mirb_key { + MIRB_KEY_NONE = 0, + + /* Control characters (ASCII values) */ + MIRB_KEY_CTRL_A = 1, + MIRB_KEY_CTRL_B = 2, + MIRB_KEY_CTRL_C = 3, + MIRB_KEY_CTRL_D = 4, + MIRB_KEY_CTRL_E = 5, + MIRB_KEY_CTRL_F = 6, + MIRB_KEY_TAB = 9, + MIRB_KEY_CTRL_K = 11, + MIRB_KEY_CTRL_L = 12, + MIRB_KEY_ENTER = 13, + MIRB_KEY_CTRL_N = 14, + MIRB_KEY_CTRL_P = 16, + MIRB_KEY_CTRL_U = 21, + MIRB_KEY_CTRL_W = 23, + MIRB_KEY_CTRL_Y = 25, + MIRB_KEY_ESC = 27, + MIRB_KEY_BACKSPACE = 127, + + /* Special keys (escape sequences mapped to values > 255) */ + MIRB_KEY_UP = 256, + MIRB_KEY_DOWN = 257, + MIRB_KEY_RIGHT = 258, + MIRB_KEY_LEFT = 259, + MIRB_KEY_HOME = 260, + MIRB_KEY_END = 261, + MIRB_KEY_DELETE = 262, + + /* Alt/Meta key combinations */ + MIRB_KEY_ALT_B = 300, + MIRB_KEY_ALT_F = 301, + MIRB_KEY_ALT_D = 302 +}; + +/* + * Terminal state structure + */ +typedef struct mirb_term { + mrb_bool raw_mode; /* TRUE if terminal is in raw mode */ + mrb_bool supported; /* TRUE if raw mode is supported */ + int cols; /* terminal width in columns */ + int rows; /* terminal height in rows */ +#if !defined(_WIN32) && !defined(_WIN64) + void *orig_termios; /* original terminal settings (struct termios*) */ +#endif +} mirb_term; + +/* + * Initialize terminal state + * Returns TRUE if terminal operations are supported + */ +mrb_bool mirb_term_init(mirb_term *term); + +/* + * Cleanup terminal state and restore original settings + */ +void mirb_term_cleanup(mirb_term *term); + +/* + * Enable raw mode for character-by-character input + * Returns TRUE on success + */ +mrb_bool mirb_term_raw_enable(mirb_term *term); + +/* + * Disable raw mode and restore normal terminal operation + */ +void mirb_term_raw_disable(mirb_term *term); + +/* + * Read a single key (handles escape sequences) + * Returns key code from mirb_key enum or ASCII value + */ +int mirb_term_read_key(mirb_term *term); + +/* + * Cursor movement functions (ANSI escape sequences) + */ +void mirb_term_cursor_up(int n); +void mirb_term_cursor_down(int n); +void mirb_term_cursor_right(int n); +void mirb_term_cursor_left(int n); +void mirb_term_cursor_col(int col); /* move to column (1-based) */ + +/* + * Line and screen control + */ +void mirb_term_clear_line(void); /* clear entire current line */ +void mirb_term_clear_to_end(void); /* clear from cursor to end of line */ +void mirb_term_clear_screen(void); /* clear entire screen */ +void mirb_term_clear_below(void); /* clear from cursor to end of screen */ + +/* + * Update terminal size information + */ +void mirb_term_get_size(mirb_term *term); + +/* + * Flush output buffer + */ +void mirb_term_flush(void); + +/* + * Background color detection result + */ +typedef enum mirb_bg_color { + MIRB_BG_UNKNOWN, /* could not detect */ + MIRB_BG_DARK, /* dark background (luminance < 0.5) */ + MIRB_BG_LIGHT /* light background (luminance >= 0.5) */ +} mirb_bg_color; + +/* + * Query terminal background color using OSC 11 + * Returns MIRB_BG_UNKNOWN if terminal doesn't respond within timeout + * timeout_ms: timeout in milliseconds (recommended: 100) + */ +mirb_bg_color mirb_term_query_bg_color(int timeout_ms); + +#endif /* MIRB_TERM_H */ diff --git a/mrbgems/mruby-bin-mrb/README.md b/mrbgems/mruby-bin-mrb/README.md new file mode 100644 index 0000000000..7169b91833 --- /dev/null +++ b/mrbgems/mruby-bin-mrb/README.md @@ -0,0 +1,86 @@ +# mruby-bin-mrb + +`mrb` is a lightweight mruby runtime that executes only precompiled +RiteBinary (.mrb) files. Unlike the full `mruby` command, it does not +depend on `mruby-compiler`, resulting in a significantly smaller binary. + +This gem is intended for embedded deployments where Ruby scripts are +precompiled on a development machine and only the runtime is needed on +the target device. + +## Size comparison + +By excluding `mruby-compiler` (and gems that depend on it such as +`mruby-eval`, `mruby-binding`, and `mruby-bin-mirb`), the text segment +can be reduced by approximately 300KB or more, depending on the build +configuration. + +## Usage + +``` +mrb [switches] programfile.mrb [arguments] +``` + +### Options + +- `-d` - set debugging flags (set `$DEBUG` to true) +- `-r library` - load a library (.mrb) before executing your script +- `-v` - print version number, then run in verbose mode +- `--verbose` - run in verbose mode +- `--version` - print the version +- `--copyright` - print the copyright + +## Workflow + +```bash +# On the development machine (with full mruby + mrbc): +mrbc -o program.mrb program.rb + +# On the target device (with mrb only): +mrb program.mrb + +# Load a precompiled library before the main program: +mrb -r lib.mrb program.mrb + +# Pass arguments to the script: +mrb program.mrb arg1 arg2 +``` + +## Build configuration example + +To build a minimal mruby with only the `mrb` runtime: + +```ruby +# build_config/runtime.rb +MRuby::Build.new do |conf| + conf.toolchain + + # Use a gembox that does not pull in the compiler. + # For example, the default gembox does not require it. + conf.gembox 'default' + + # The runtime-only executor (no compiler dependency) + conf.gem :core => 'mruby-bin-mrb' + + # Do NOT include these (they require mruby-compiler): + # conf.gem :core => 'mruby-bin-mruby' + # conf.gem :core => 'mruby-bin-mirb' + # conf.gem :core => 'mruby-eval' + # conf.gem :core => 'mruby-binding' +end +``` + +## Differences from `mruby` command + +| Feature | `mruby` | `mrb` | +| ----------------------- | ------- | -------------------------- | +| Execute .rb files | yes | no | +| Execute .mrb files | yes | yes | +| `-e` inline code | yes | no | +| `-c` syntax check | yes | no | +| `-b` force binary mode | yes | not needed (always binary) | +| Requires mruby-compiler | yes | **no** | + +## License + +MIT License - see the mruby LICENSE file. diff --git a/mrbgems/mruby-bin-mrb/bintest/mrb.rb b/mrbgems/mruby-bin-mrb/bintest/mrb.rb new file mode 100644 index 0000000000..afd4d99877 --- /dev/null +++ b/mrbgems/mruby-bin-mrb/bintest/mrb.rb @@ -0,0 +1,73 @@ +require 'tempfile' +require 'open3' + +def assert_mrb(exp_out, exp_err, exp_success, args) + out, err, stat = Open3.capture3(*(cmd_list("mrb") + args)) + assert "assert_mrb" do + assert_operator(exp_out, :===, out, "standard output") + assert_operator(exp_err, :===, err, "standard error") + assert_equal(exp_success, stat.success?, "exit success?") + end +end + +assert('mrb can execute .mrb files') do + script = Tempfile.new(['test', '.rb']) + bin = Tempfile.new(['test', '.mrb']) + File.write(script.path, 'print "hello from mrb"') + system("#{cmd('mrbc')} -o #{bin.path} #{script.path}") + o = `#{cmd('mrb')} #{bin.path}`.strip + assert_equal 'hello from mrb', o +end + +assert('mrb $0 value') do + script = Tempfile.new(['test', '.rb']) + bin = Tempfile.new(['test', '.mrb']) + File.write(script.path, 'print $0') + system("#{cmd('mrbc')} -o #{bin.path} #{script.path}") + o = `#{cmd('mrb')} #{bin.path}`.strip + assert_equal bin.path, o +end + +assert('mrb ARGV value') do + script = Tempfile.new(['test', '.rb']) + bin = Tempfile.new(['test', '.mrb']) + File.write(script.path, 'p ARGV') + system("#{cmd('mrbc')} -o #{bin.path} #{script.path}") + o = `#{cmd('mrb')} #{bin.path} foo bar`.strip + assert_equal '["foo", "bar"]', o +end + +assert('mrb with no arguments prints error') do + assert_mrb("", /no program file given/, false, []) +end + +assert('mrb --version') do + assert_mrb(/\Amruby \d+\.\d+/, "", true, %w[--version]) +end + +assert('mrb -r option loads library') do + lib = Tempfile.new(['lib', '.rb']) + main = Tempfile.new(['main', '.rb']) + lib_mrb = Tempfile.new(['lib', '.mrb']) + main_mrb = Tempfile.new(['main', '.mrb']) + + File.write(lib.path, '$lib_loaded = true') + File.write(main.path, 'print $lib_loaded') + system("#{cmd('mrbc')} -o #{lib_mrb.path} #{lib.path}") + system("#{cmd('mrbc')} -o #{main_mrb.path} #{main.path}") + o = `#{cmd('mrb')} -r #{lib_mrb.path} #{main_mrb.path}`.strip + assert_equal 'true', o +end + +assert('mrb -d sets $DEBUG') do + script = Tempfile.new(['test', '.rb']) + bin = Tempfile.new(['test', '.mrb']) + File.write(script.path, 'print $DEBUG') + system("#{cmd('mrbc')} -o #{bin.path} #{script.path}") + o = `#{cmd('mrb')} -d #{bin.path}`.strip + assert_equal 'true', o +end + +assert('mrb nonexistent file') do + assert_mrb("", /Cannot open/, false, %w[nonexistent.mrb]) +end diff --git a/mrbgems/mruby-bin-mrb/mrbgem.rake b/mrbgems/mruby-bin-mrb/mrbgem.rake new file mode 100644 index 0000000000..20d6a85139 --- /dev/null +++ b/mrbgems/mruby-bin-mrb/mrbgem.rake @@ -0,0 +1,19 @@ +MRuby::Gem::Specification.new('mruby-bin-mrb') do |spec| + spec.license = 'MIT' + spec.author = 'mruby developers' + spec.summary = 'mruby runtime command (compiler-free)' + spec.bins = %w(mrb) + + # NOTE: Unlike mruby-bin-mruby, this gem does NOT depend on + # mruby-compiler. This makes it suitable for builds where the + # compiler is excluded to reduce binary size. + # + # To use this gem in your build_config.rb: + # + # MRuby::Build.new do |conf| + # conf.toolchain + # conf.gem :core => 'mruby-bin-mrb' + # # Do NOT include mruby-bin-mruby or mruby-compiler + # # unless other gems require them. + # end +end diff --git a/mrbgems/mruby-bin-mrb/tools/mrb/mrb.c b/mrbgems/mruby-bin-mrb/tools/mrb/mrb.c new file mode 100644 index 0000000000..670b66b703 --- /dev/null +++ b/mrbgems/mruby-bin-mrb/tools/mrb/mrb.c @@ -0,0 +1,311 @@ +/* +** mrb - mruby runtime executor (compiler-free) +** +** This is a lightweight alternative to the `mruby` command that only +** executes precompiled RiteBinary (.mrb) files. It does not depend on +** mruby-compiler, making it suitable for embedded deployments where +** binary size matters. +** +** Typical workflow: +** +** # On the development machine (with full mruby + compiler): +** mrbc -o program.mrb program.rb +** +** # On the target device (with mrb only, no compiler): +** mrb program.mrb +** +** By excluding mruby-compiler (and gems that depend on it such as +** mruby-eval, mruby-binding, mruby-bin-mirb), the resulting binary +** can be significantly smaller (~300KB+ savings on the text segment). +*/ + +#include + +#ifdef MRB_NO_STDIO +# error mruby-bin-mrb conflicts 'MRB_NO_STDIO' in your build configuration +#endif + +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) +# include +# include +#endif + +struct mrb_args { + FILE *rfp; + char *cmdline; + mrb_bool verbose : 1; + mrb_bool version : 1; + mrb_bool debug : 1; + int argc; + char **argv; + int libc; + char **libv; +}; + +static void +usage(const char *name) +{ + static const char *const usage_msg[] = { + "switches:", + "-d set debugging flags (set $DEBUG to true)", + "-r library load the library (.mrb) before executing your script", + "-v print version number, then run in verbose mode", + "--verbose run in verbose mode", + "--version print the version", + "--copyright print the copyright", + NULL + }; + const char *const *p = usage_msg; + + printf("Usage: %s [switches] programfile.mrb [arguments]\n", name); + while (*p) + printf(" %s\n", *p++); +} + +struct options { + int argc; + char **argv; + char *program; + char *opt; + char short_opt[2]; +}; + +static void +options_init(struct options *opts, int argc, char **argv) +{ + opts->argc = argc; + opts->argv = argv; + opts->program = *argv; + *opts->short_opt = 0; +} + +static const char * +options_opt(struct options *opts) +{ + /* concatenated short options (e.g. `-dv`) */ + if (*opts->short_opt && *++opts->opt) { + opts->short_opt[0] = *opts->opt; + opts->short_opt[1] = 0; + return opts->short_opt; + } + + while (++opts->argv, --opts->argc) { + opts->opt = *opts->argv; + + /* not start with `-` or just `-` */ + if (opts->opt[0] != '-' || !opts->opt[1]) return NULL; + + if (opts->opt[1] == '-') { + /* `--` */ + if (!opts->opt[2]) { + opts->argv++, opts->argc--; + return NULL; + } + /* long option */ + opts->opt += 2; + *opts->short_opt = 0; + return opts->opt; + } + else { + /* short option */ + opts->opt++; + opts->short_opt[0] = *opts->opt; + opts->short_opt[1] = 0; + return opts->short_opt; + } + } + return NULL; +} + +static const char * +options_arg(struct options *opts) +{ + if (*opts->short_opt && opts->opt[1]) { + /* concatenated short option and argument (e.g. `-rlibrary`) */ + *opts->short_opt = 0; + return opts->opt + 1; + } + --opts->argc, ++opts->argv; + return opts->argc ? *opts->argv : NULL; +} + +static char * +dup_arg_item(mrb_state *mrb, const char *item) +{ + size_t buflen = strlen(item) + 1; + char *buf = (char*)mrb_malloc(mrb, buflen); + memcpy(buf, item, buflen); + return buf; +} + +static int +parse_args(mrb_state *mrb, int argc, char **argv, struct mrb_args *args) +{ + static const struct mrb_args args_zero = { 0 }; + struct options opts[1]; + const char *opt, *item; + + *args = args_zero; + options_init(opts, argc, argv); + while ((opt = options_opt(opts))) { + if (strcmp(opt, "d") == 0) { + args->debug = TRUE; + } + else if (strcmp(opt, "h") == 0) { + usage(opts->program); + exit(EXIT_SUCCESS); + } + else if (strcmp(opt, "r") == 0) { + if ((item = options_arg(opts))) { + if (args->libc == 0) { + args->libv = (char**)mrb_malloc(mrb, sizeof(char*)); + } + else { + args->libv = (char**)mrb_realloc(mrb, args->libv, sizeof(char*) * (args->libc + 1)); + } + args->libv[args->libc++] = dup_arg_item(mrb, item); + } + else { + fprintf(stderr, "%s: No library specified for -r\n", opts->program); + return EXIT_FAILURE; + } + } + else if (strcmp(opt, "v") == 0) { + if (!args->verbose) { + mrb_show_version(mrb); + args->version = TRUE; + } + args->verbose = TRUE; + } + else if (strcmp(opt, "version") == 0) { + mrb_show_version(mrb); + exit(EXIT_SUCCESS); + } + else if (strcmp(opt, "verbose") == 0) { + args->verbose = TRUE; + } + else if (strcmp(opt, "copyright") == 0) { + mrb_show_copyright(mrb); + exit(EXIT_SUCCESS); + } + else { + fprintf(stderr, "%s: invalid option %s%s (-h will show valid options)\n", + opts->program, opt[1] ? "--" : "-", opt); + return EXIT_FAILURE; + } + } + + argc = opts->argc; argv = opts->argv; + if (*argv == NULL) { + if (args->version) exit(EXIT_SUCCESS); + fprintf(stderr, "%s: no program file given (only .mrb files are supported)\n", + opts->program); + return EXIT_FAILURE; + } + args->rfp = strcmp(argv[0], "-") == 0 ? + stdin : fopen(argv[0], "rb"); + if (args->rfp == NULL) { + fprintf(stderr, "%s: Cannot open program file: %s\n", opts->program, argv[0]); + return EXIT_FAILURE; + } + args->cmdline = argv[0]; + argc--; argv++; + +#if defined(_WIN32) + if (args->rfp == stdin) { + _setmode(_fileno(stdin), O_BINARY); + } +#endif + args->argv = (char **)mrb_realloc(mrb, args->argv, sizeof(char*) * (argc + 1)); + memcpy(args->argv, argv, (argc+1) * sizeof(char*)); + args->argc = argc; + + return EXIT_SUCCESS; +} + +static void +cleanup(mrb_state *mrb, struct mrb_args *args) +{ + if (args->rfp && args->rfp != stdin) + fclose(args->rfp); + mrb_free(mrb, args->argv); + if (args->libc) { + while (args->libc--) { + mrb_free(mrb, args->libv[args->libc]); + } + mrb_free(mrb, args->libv); + } + mrb_close(mrb); +} + +int +main(int argc, char **argv) +{ + mrb_state *mrb = mrb_open(); + int n = -1; + struct mrb_args args; + mrb_value ARGV; + mrb_value v; + + if (MRB_OPEN_FAILURE(mrb)) { + mrb_print_error(mrb); + mrb_close(mrb); + return EXIT_FAILURE; + } + + n = parse_args(mrb, argc, argv, &args); + if (n == EXIT_FAILURE || args.rfp == NULL) { + cleanup(mrb, &args); + return n; + } + + int ai = mrb_gc_arena_save(mrb); + ARGV = mrb_ary_new_capa(mrb, args.argc); + for (int i = 0; i < args.argc; i++) { + char* utf8 = mrb_utf8_from_locale(args.argv[i], -1); + if (utf8) { + mrb_ary_push(mrb, ARGV, mrb_str_new_cstr(mrb, utf8)); + mrb_utf8_free(utf8); + } + } + mrb_define_global_const(mrb, "ARGV", ARGV); + mrb_gv_set(mrb, mrb_intern_lit(mrb, "$DEBUG"), mrb_bool_value(args.debug)); + + /* Set $0 */ + const char *cmdline = args.cmdline ? args.cmdline : "-"; + mrb_gv_set(mrb, mrb_intern_lit(mrb, "$0"), mrb_str_new_cstr(mrb, cmdline)); + + /* Load libraries (.mrb only) */ + for (int i = 0; i < args.libc; i++) { + FILE *lfp = fopen(args.libv[i], "rb"); + if (lfp == NULL) { + fprintf(stderr, "%s: Cannot open library file: %s\n", cmdline, args.libv[i]); + cleanup(mrb, &args); + return EXIT_FAILURE; + } + mrb_load_irep_file(mrb, lfp); + fclose(lfp); + } + + /* Load and execute program (.mrb only) */ + v = mrb_load_irep_file(mrb, args.rfp); + + mrb_gc_arena_restore(mrb, ai); + if (mrb->exc) { + MRB_EXC_CHECK_EXIT(mrb, mrb->exc); + if (!mrb_undef_p(v)) { + mrb_print_error(mrb); + } + n = EXIT_FAILURE; + } + + cleanup(mrb, &args); + return n; +} diff --git a/mrbgems/mruby-bin-mrbc/README.md b/mrbgems/mruby-bin-mrbc/README.md new file mode 100644 index 0000000000..45e9623a46 --- /dev/null +++ b/mrbgems/mruby-bin-mrbc/README.md @@ -0,0 +1,58 @@ +# mruby-bin-mrbc + +mrbc is the mruby compiler that compiles Ruby source files into bytecode. + +## Usage + +``` +mrbc [switches] programfile... +``` + +### Options + +- `-c` - check syntax only +- `-o` - place the output into ``; required for multi-files +- `-v` - print version number, then turn on verbose mode +- `-g` - produce debugging information +- `-B` - binary `` output in C language format +- `-S` - dump C struct (requires `-B`) +- `-s` - define `` as static variable +- `--remove-lv` - remove local variables +- `--no-ext-ops` - prohibit using OP_EXTs +- `--no-optimize` - disable peephole optimization +- `--verbose` - run at verbose mode +- `--version` - print the version +- `--copyright` - print the copyright + +## Examples + +```bash +# Compile a Ruby script to bytecode +mrbc script.rb +# Creates script.mrb + +# Specify output file +mrbc -o output.mrb script.rb + +# Compile multiple files into one +mrbc -o combined.mrb file1.rb file2.rb + +# Generate C source with symbol name +mrbc -Bscript_bytecode script.rb +# Creates script.c with const uint8_t script_bytecode[] + +# Check syntax only +mrbc -c script.rb + +# Compile with debug information +mrbc -g script.rb +``` + +## Output Formats + +- `.mrb` - RiteBinary format (default), executable by `mruby -b` +- `.c` - C source file (with `-B` option), for embedding in C programs + +## License + +MIT License - see the mruby LICENSE file. diff --git a/mrbgems/mruby-bin-mrbc/bintest/mrbc.rb b/mrbgems/mruby-bin-mrbc/bintest/mrbc.rb index 90bbd123fd..4e3aea1685 100644 --- a/mrbgems/mruby-bin-mrbc/bintest/mrbc.rb +++ b/mrbgems/mruby-bin-mrbc/bintest/mrbc.rb @@ -28,3 +28,25 @@ assert_equal "#{a.path}:3:0: embedded document meets end of file", result.chomp assert_equal 1, $?.exitstatus end + +assert('debug info preserves line/filename across multiple inputs. #1316') do + # Verifying mrbc's debug info requires running the compiled output through + # the mruby binary; skip when bin-mruby isn't built. Kernel#puts only + # exists when mruby-io is loaded, so the script uses print. + skip "mruby command not built" unless File.exist?(cmd_bin("mruby")) + + a = Tempfile.new(['a', '.rb']) + b = Tempfile.new(['b', '.rb']) + out = Tempfile.new(['out', '.mrb']) + a.write("# line 1\n# line 2\nprint \"from a\"\n# line 4\nundefined_in_a\n") + a.flush + b.write("# b line 1\nprint \"from b\"\n") + b.flush + `#{cmd('mrbc')} -g -o #{out.path} #{a.path} #{b.path}` + assert_equal 0, $?.exitstatus + result = `#{cmd('mruby')} -b #{out.path} 2>&1` + # Error should point at a.rb line 5 (the `undefined_in_a` line), + # not b.rb or a different line within a.rb. + assert_include result, "#{a.path}:5:" + assert_not_include result, b.path +end diff --git a/mrbgems/mruby-bin-mrbc/tools/mrbc/mrbc.c b/mrbgems/mruby-bin-mrbc/tools/mrbc/mrbc.c index ef806bdf02..9050d138b8 100644 --- a/mrbgems/mruby-bin-mrbc/tools/mrbc/mrbc.c +++ b/mrbgems/mruby-bin-mrbc/tools/mrbc/mrbc.c @@ -24,10 +24,9 @@ struct mrbc_args { mrb_bool dump_struct : 1; mrb_bool check_syntax : 1; mrb_bool verbose : 1; - mrb_bool remove_lv : 1; mrb_bool no_ext_ops : 1; mrb_bool no_optimize : 1; - uint8_t flags : 2; + uint8_t flags : 3; }; static void @@ -165,7 +164,7 @@ parse_args(mrb_state *mrb, int argc, char **argv, struct mrbc_args *args) exit(EXIT_SUCCESS); } else if (strcmp(argv[i] + 2, "remove-lv") == 0) { - args->remove_lv = TRUE; + args->flags |= MRB_DUMP_NO_LVAR; break; } else if (strcmp(argv[i] + 2, "no-ext-ops") == 0) { @@ -198,16 +197,15 @@ cleanup(mrb_state *mrb, struct mrbc_args *args) static int partial_hook(struct mrb_parser_state *p) { - mrbc_context *c = p->cxt; - struct mrbc_args *args = (struct mrbc_args *)c->partial_data; - const char *fn; + mrb_ccontext *c = p->cxt; + struct mrbc_args *args = (struct mrbc_args*)c->partial_data; if (p->f) fclose(p->f); if (args->idx >= args->argc) { p->f = NULL; return -1; } - fn = args->argv[args->idx++]; + const char *fn = args->argv[args->idx++]; p->f = fopen(fn, "rb"); if (p->f == NULL) { fprintf(stderr, "%s: cannot open program file. (%s)\n", args->prog, fn); @@ -220,13 +218,11 @@ partial_hook(struct mrb_parser_state *p) static mrb_value load_file(mrb_state *mrb, struct mrbc_args *args) { - mrbc_context *c; - mrb_value result; char *input = args->argv[args->idx]; FILE *infile; mrb_bool need_close = FALSE; - c = mrbc_context_new(mrb); + mrb_ccontext *c = mrb_ccontext_new(mrb); if (args->verbose) c->dump_result = TRUE; c->no_exec = TRUE; @@ -242,16 +238,16 @@ load_file(mrb_state *mrb, struct mrbc_args *args) return mrb_nil_value(); } } - mrbc_filename(mrb, c, input); + mrb_ccontext_filename(mrb, c, input); args->idx++; if (args->idx < args->argc) { need_close = FALSE; - mrbc_partial_hook(mrb, c, partial_hook, (void*)args); + mrb_ccontext_partial_hook(c, partial_hook, (void*)args); } - result = mrb_load_file_cxt(mrb, infile, c); + mrb_value result = mrb_load_file_cxt(mrb, infile, c); if (need_close) fclose(infile); - mrbc_context_free(mrb, c); + mrb_ccontext_free(mrb, c); if (mrb_undef_p(result)) { return mrb_nil_value(); } @@ -259,14 +255,11 @@ load_file(mrb_state *mrb, struct mrbc_args *args) } static int -dump_file(mrb_state *mrb, FILE *wfp, const char *outfile, struct RProc *proc, struct mrbc_args *args) +dump_file(mrb_state *mrb, FILE *wfp, const char *outfile, const struct RProc *proc, struct mrbc_args *args) { int n = MRB_DUMP_OK; const mrb_irep *irep = proc->body.irep; - if (args->remove_lv) { - mrb_irep_remove_lv(mrb, (mrb_irep*)irep); - } if (args->initname) { if (args->dump_struct) { n = mrb_dump_irep_cstruct(mrb, irep, args->flags, wfp, args->initname); @@ -290,18 +283,16 @@ dump_file(mrb_state *mrb, FILE *wfp, const char *outfile, struct RProc *proc, st int main(int argc, char **argv) { - mrb_state *mrb = mrb_open_core(NULL, NULL); - int n, result; + mrb_state *mrb = mrb_open_core(); struct mrbc_args args; FILE *wfp; - mrb_value load; if (mrb == NULL) { fputs("Invalid mrb_state, exiting mrbc\n", stderr); return EXIT_FAILURE; } - n = parse_args(mrb, argc, argv, &args); + int n = parse_args(mrb, argc, argv, &args); if (n < 0) { cleanup(mrb, &args); usage(argv[0]); @@ -322,7 +313,7 @@ main(int argc, char **argv) } args.idx = n; - load = load_file(mrb, &args); + mrb_value load = load_file(mrb, &args); if (mrb_nil_p(load)) { cleanup(mrb, &args); return EXIT_FAILURE; @@ -346,10 +337,10 @@ main(int argc, char **argv) } } else { - fprintf(stderr, "Output file is required\n"); + fputs("Output file is required\n", stderr); return EXIT_FAILURE; } - result = dump_file(mrb, wfp, args.outfile, mrb_proc_ptr(load), &args); + int result = dump_file(mrb, wfp, args.outfile, mrb_proc_ptr(load), &args); fclose(wfp); cleanup(mrb, &args); if (result != MRB_DUMP_OK) { @@ -368,9 +359,4 @@ void mrb_init_mrbgems(mrb_state *mrb) { } - -void -mrb_final_mrbgems(mrb_state *mrb) -{ -} #endif diff --git a/mrbgems/mruby-bin-mrbc/tools/mrbc/stub.c b/mrbgems/mruby-bin-mrbc/tools/mrbc/stub.c index 13ca03932e..a0c982230e 100644 --- a/mrbgems/mruby-bin-mrbc/tools/mrbc/stub.c +++ b/mrbgems/mruby-bin-mrbc/tools/mrbc/stub.c @@ -68,7 +68,37 @@ mrb_value mrb_rational_to_f(mrb_state *mrb, mrb_value x) { return mrb_nil_value(); } +mrb_value +mrb_as_rational(mrb_state *mrb, mrb_value x) +{ + return mrb_nil_value(); +} void mrb_rational_copy(mrb_state *mrb, mrb_value x, mrb_value y) { } +int mrb_rational_mark(mrb_state *mrb, struct RBasic *x) +{ + return 2; +} +#endif + +#ifdef MRB_USE_SET +size_t mrb_gc_mark_set(mrb_state *mrb, struct RBasic *obj) +{ + /* stub for mrbc */ + return 0; +} + +void mrb_gc_free_set(mrb_state *mrb, struct RBasic *obj) +{ + /* stub for mrbc */ +} +#endif + +#ifdef MRB_USE_TASK_SCHEDULER +void mrb_task_mark_all(mrb_state *mrb) +{ + /* stub for mrbc */ + (void)mrb; +} #endif diff --git a/mrbgems/mruby-bin-mruby/README.md b/mrbgems/mruby-bin-mruby/README.md new file mode 100644 index 0000000000..5452927992 --- /dev/null +++ b/mrbgems/mruby-bin-mruby/README.md @@ -0,0 +1,47 @@ +# mruby-bin-mruby + +mruby is the main interpreter for executing Ruby scripts with mruby. + +## Usage + +``` +mruby [switches] [programfile] [arguments] +``` + +### Options + +- `-b` - load and execute RiteBinary (mrb) file +- `-c` - check syntax only +- `-d` - set debugging flags (set `$DEBUG` to true) +- `-e 'command'` - one line of script +- `-r library` - load the library before executing your script +- `-v` - print version number, then run in verbose mode +- `--verbose` - run in verbose mode +- `--version` - print the version +- `--copyright` - print the copyright + +## Examples + +```bash +# Execute a Ruby script +mruby script.rb + +# Execute inline code +mruby -e 'puts "Hello, mruby!"' + +# Check syntax without executing +mruby -c script.rb + +# Execute a compiled binary +mruby -b script.mrb + +# Load a library before running script +mruby -r mruby-io script.rb + +# Pass arguments to script +mruby script.rb arg1 arg2 +``` + +## License + +MIT License - see the mruby LICENSE file. diff --git a/mrbgems/mruby-bin-mruby/bintest/mruby.rb b/mrbgems/mruby-bin-mruby/bintest/mruby.rb index a626a13cd7..1310eb0bd0 100644 --- a/mrbgems/mruby-bin-mruby/bintest/mruby.rb +++ b/mrbgems/mruby-bin-mruby/bintest/mruby.rb @@ -166,8 +166,8 @@ def hoge system "#{cmd('mrbc')} -g -o #{amrb.path} #{arb.path}" File.write brb.path, 'p a' system "#{cmd('mrbc')} -g -o #{bmrb.path} #{brb.path}" - assert_mruby("", /:1: undefined method 'a' \(NoMethodError\)\n\z/, false, ["-r", arb.path, brb.path]) - assert_mruby("", /:1: undefined method 'a' \(NoMethodError\)\n\z/, false, ["-b", "-r", amrb.path, bmrb.path]) + assert_mruby("", /:1: undefined method 'a' .*\(NoMethodError\)\n\z/, false, ["-r", arb.path, brb.path]) + assert_mruby("", /:1: undefined method 'a' .*\(NoMethodError\)\n\z/, false, ["-b", "-r", amrb.path, bmrb.path]) File.write crb.path, 'a, b, c = 1, 2, 3; A = -> { b = -2; [a, b, c] }' system "#{cmd('mrbc')} -g -o #{cmrb.path} #{crb.path}" @@ -176,3 +176,21 @@ def hoge assert_mruby("[1, -2, 3]\n5\n6\n", "", true, ["-r", crb.path, drb.path]) assert_mruby("[1, -2, 3]\n5\n6\n", "", true, ["-b", "-r", cmrb.path, dmrb.path]) end + +assert('String#split still works when mruby-regexp is loaded') do + # Only meaningful when mruby-regexp is built in; skip otherwise. + _, _, stat = Open3.capture3(*(cmd_list("mruby") + ["-e", "Regexp"])) + skip "mruby-regexp not loaded" unless stat.success? + + # The regexp-aware override in mruby-regexp/mrblib/string_regexp.rb used to + # replace the C-defined String#split, leaving its `return super if ...` + # fast paths with no method to delegate to (NoMethodError). Now the + # override delegates via `__split`, an alias of the original C method + # installed in mrb_mruby_regexp_gem_init before mrblib runs. + assert_mruby(%Q(["a", "b", "c"]\n), "", true, + ["-e", 'p "a,b,c".split(",")']) + assert_mruby(%Q(["abc", "abc", "abc"]\n), "", true, + ["-e", 'p "abc abc abc".split']) + assert_mruby(%Q(["hello", "world"]\n), "", true, + ["-e", 'p "hello world".split(/\s+/)']) +end diff --git a/mrbgems/mruby-bin-mruby/mrbgem.rake b/mrbgems/mruby-bin-mruby/mrbgem.rake index 36bf2fa61f..ba7fad1fa7 100644 --- a/mrbgems/mruby-bin-mruby/mrbgem.rake +++ b/mrbgems/mruby-bin-mruby/mrbgem.rake @@ -4,9 +4,4 @@ MRuby::Gem::Specification.new('mruby-bin-mruby') do |spec| spec.summary = 'mruby command' spec.bins = %w(mruby) spec.add_dependency('mruby-compiler', :core => 'mruby-compiler') - spec.add_test_dependency('mruby-print', :core => 'mruby-print') - - if build.cxx_exception_enabled? - build.compile_as_cxx("#{spec.dir}/tools/mruby/mruby.c") - end end diff --git a/mrbgems/mruby-bin-mruby/tools/mruby/mruby.c b/mrbgems/mruby-bin-mruby/tools/mruby/mruby.c index 13003d693b..d5c159f3ea 100644 --- a/mrbgems/mruby-bin-mruby/tools/mruby/mruby.c +++ b/mrbgems/mruby-bin-mruby/tools/mruby/mruby.c @@ -12,9 +12,8 @@ #include #include #include -#include -#if defined(_WIN32) || defined(_WIN64) +#if defined(_WIN32) # include /* for setmode */ # include #endif @@ -98,13 +97,13 @@ options_opt(struct options *opts) while (++opts->argv, --opts->argc) { opts->opt = *opts->argv; - /* empty || not start with `-` || `-` */ - if (!opts->opt[0] || opts->opt[0] != '-' || !opts->opt[1]) return NULL; + /* not start with `-` || `-` */ + if (opts->opt[0] != '-' || !opts->opt[1]) return NULL; if (opts->opt[1] == '-') { /* `--` */ if (!opts->opt[2]) { - ++opts->argv, --opts->argc; + opts->argv++, opts->argc--; return NULL; } /* long option */ @@ -114,7 +113,7 @@ options_opt(struct options *opts) } else { /* short option */ - ++opts->opt; + opts->opt++; goto short_opt; } } @@ -172,8 +171,7 @@ parse_args(mrb_state *mrb, int argc, char **argv, struct _args *args) cmdlinelen = strlen(args->cmdline); itemlen = strlen(item); - args->cmdline = - (char *)mrb_realloc(mrb, args->cmdline, cmdlinelen + itemlen + 2); + args->cmdline = (char*)mrb_realloc(mrb, args->cmdline, cmdlinelen + itemlen + 2); args->cmdline[cmdlinelen] = '\n'; memcpy(args->cmdline + cmdlinelen + 1, item, itemlen + 1); } @@ -245,7 +243,7 @@ parse_args(mrb_state *mrb, int argc, char **argv, struct _args *args) argc--; argv++; } } -#if defined(_WIN32) || defined(_WIN64) +#if defined(_WIN32) if (args->rfp == stdin) { _setmode(_fileno(stdin), O_BINARY); } @@ -279,14 +277,13 @@ main(int argc, char **argv) { mrb_state *mrb = mrb_open(); int n = -1; - int i; struct _args args; mrb_value ARGV; - mrbc_context *c; mrb_value v; - if (mrb == NULL) { - fprintf(stderr, "%s: Invalid mrb_state, exiting mruby\n", *argv); + if (MRB_OPEN_FAILURE(mrb)) { + mrb_print_error(mrb); /* handles NULL */ + mrb_close(mrb); /* handles NULL */ return EXIT_FAILURE; } @@ -298,7 +295,7 @@ main(int argc, char **argv) else { int ai = mrb_gc_arena_save(mrb); ARGV = mrb_ary_new_capa(mrb, args.argc); - for (i = 0; i < args.argc; i++) { + for (int i = 0; i < args.argc; i++) { char* utf8 = mrb_utf8_from_locale(args.argv[i], -1); if (utf8) { mrb_ary_push(mrb, ARGV, mrb_str_new_cstr(mrb, utf8)); @@ -308,7 +305,7 @@ main(int argc, char **argv) mrb_define_global_const(mrb, "ARGV", ARGV); mrb_gv_set(mrb, mrb_intern_lit(mrb, "$DEBUG"), mrb_bool_value(args.debug)); - c = mrbc_context_new(mrb); + mrb_ccontext *c = mrb_ccontext_new(mrb); if (args.verbose) c->dump_result = TRUE; if (args.check_syntax) @@ -325,31 +322,29 @@ main(int argc, char **argv) mrb_gv_set(mrb, mrb_intern_lit(mrb, "$0"), mrb_str_new_cstr(mrb, cmdline)); /* Load libraries */ - for (i = 0; i < args.libc; i++) { - struct REnv *e; + for (int i = 0; i < args.libc; i++) { FILE *lfp = fopen(args.libv[i], "rb"); if (lfp == NULL) { fprintf(stderr, "%s: Cannot open library file: %s\n", *argv, args.libv[i]); - mrbc_context_free(mrb, c); + mrb_ccontext_free(mrb, c); cleanup(mrb, &args); return EXIT_FAILURE; } - mrbc_filename(mrb, c, args.libv[i]); + mrb_ccontext_filename(mrb, c, args.libv[i]); if (mrb_extension_p(args.libv[i])) { - v = mrb_load_irep_file_cxt(mrb, lfp, c); + mrb_load_irep_file_cxt(mrb, lfp, c); } else { - v = mrb_load_detect_file_cxt(mrb, lfp, c); + mrb_load_detect_file_cxt(mrb, lfp, c); } fclose(lfp); - e = mrb_vm_ci_env(mrb->c->cibase); - mrb_vm_ci_env_set(mrb->c->cibase, NULL); - mrb_env_unshare(mrb, e, FALSE); - mrbc_cleanup_local_variables(mrb, c); + mrb_vm_ci_env_clear(mrb, mrb->c->cibase); + mrb_ccontext_cleanup_local_variables(c); } - /* set program file name */ - mrbc_filename(mrb, c, cmdline); + /* set program filename */ + mrb_ccontext_filename(mrb, c, cmdline); + c->no_return_value = TRUE; /* main program doesn't need return value */ /* Load program */ if (args.mrbfile || mrb_extension_p(cmdline)) { @@ -366,7 +361,7 @@ main(int argc, char **argv) } mrb_gc_arena_restore(mrb, ai); - mrbc_context_free(mrb, c); + mrb_ccontext_free(mrb, c); if (mrb->exc) { MRB_EXC_CHECK_EXIT(mrb, mrb->exc); if (!mrb_undef_p(v)) { diff --git a/mrbgems/mruby-bin-strip/README.md b/mrbgems/mruby-bin-strip/README.md new file mode 100644 index 0000000000..6e9fa01e9b --- /dev/null +++ b/mrbgems/mruby-bin-strip/README.md @@ -0,0 +1,38 @@ +# mruby-bin-strip + +mruby-strip removes debug information from compiled mruby bytecode files to reduce file size. + +## Usage + +``` +mruby-strip [switches] irepfiles +``` + +### Options + +- `-l, --lvar` - remove LVAR section too (local variable names) + +## Examples + +```bash +# Strip debug info from a compiled file +mruby-strip script.mrb + +# Strip debug info and local variable names +mruby-strip -l script.mrb +mruby-strip --lvar script.mrb + +# Strip multiple files +mruby-strip file1.mrb file2.mrb file3.mrb +``` + +## Notes + +- The input files are modified in-place +- Stripping removes debug symbols used by the debugger (mrdb) +- The `-l` option additionally removes local variable names, which are needed for some reflection features +- Stripped files will still execute correctly but cannot be debugged effectively + +## License + +MIT License - see the mruby LICENSE file. diff --git a/mrbgems/mruby-bin-strip/bintest/mruby-strip.rb b/mrbgems/mruby-bin-strip/bintest/mruby_strip.rb similarity index 100% rename from mrbgems/mruby-bin-strip/bintest/mruby-strip.rb rename to mrbgems/mruby-bin-strip/bintest/mruby_strip.rb diff --git a/mrbgems/mruby-bin-strip/mrbgem.rake b/mrbgems/mruby-bin-strip/mrbgem.rake index 2abd25eea6..b98fbafcf7 100644 --- a/mrbgems/mruby-bin-strip/mrbgem.rake +++ b/mrbgems/mruby-bin-strip/mrbgem.rake @@ -2,5 +2,6 @@ MRuby::Gem::Specification.new('mruby-bin-strip') do |spec| spec.license = 'MIT' spec.author = 'mruby developers' spec.summary = 'irep dump debug section remover command' + spec.add_dependency 'mruby-compiler', :core => 'mruby-compiler' spec.bins = %w(mruby-strip) end diff --git a/mrbgems/mruby-bin-strip/tools/mruby-strip/mruby-strip.c b/mrbgems/mruby-bin-strip/tools/mruby-strip/mruby_strip.c similarity index 86% rename from mrbgems/mruby-bin-strip/tools/mruby-strip/mruby-strip.c rename to mrbgems/mruby-bin-strip/tools/mruby-strip/mruby_strip.c index ff39dd3578..172ba313f6 100644 --- a/mrbgems/mruby-bin-strip/tools/mruby-strip/mruby-strip.c +++ b/mrbgems/mruby-bin-strip/tools/mruby-strip/mruby_strip.c @@ -14,7 +14,7 @@ struct strip_args { int argc_start; int argc; char **argv; - mrb_bool lvar; + uint8_t flags; }; static void @@ -33,18 +33,18 @@ parse_args(int argc, char **argv, struct strip_args *args) args->argc_start = 0; args->argc = argc; args->argv = argv; - args->lvar = FALSE; + args->flags = 0; - for (i = 1; i < argc; ++i) { + for (i = 1; i < argc; i++) { const size_t len = strlen(argv[i]); if (len >= 2 && argv[i][0] == '-') { switch (argv[i][1]) { case 'l': - args->lvar = TRUE; + args->flags = MRB_DUMP_NO_LVAR; break; case '-': if (strncmp((*argv) + 2, "lvar", len) == 0) { - args->lvar = TRUE; + args->flags = MRB_DUMP_NO_LVAR; break; } default: @@ -65,7 +65,7 @@ strip(mrb_state *mrb, struct strip_args *args) { int i; - for (i = args->argc_start; i < args->argc; ++i) { + for (i = args->argc_start; i < args->argc; i++) { char *filename; FILE *rfile; mrb_irep *irep; @@ -86,11 +86,6 @@ strip(mrb_state *mrb, struct strip_args *args) return EXIT_FAILURE; } - /* clear lv if --lvar is enabled */ - if (args->lvar) { - mrb_irep_remove_lv(mrb, irep); - } - wfile = fopen(filename, "wb"); if (wfile == NULL) { fprintf(stderr, "can't open file for writing %s\n", filename); @@ -99,7 +94,7 @@ strip(mrb_state *mrb, struct strip_args *args) } /* debug flag must always be false */ - dump_result = mrb_dump_irep_binary(mrb, irep, FALSE, wfile); + dump_result = mrb_dump_irep_binary(mrb, irep, args->flags, wfile); fclose(wfile); mrb_irep_decref(mrb, irep); @@ -131,7 +126,7 @@ main(int argc, char **argv) print_usage(argv[0]); return EXIT_FAILURE; } - mrb = mrb_open_core(mrb_default_allocf, NULL); + mrb = mrb_open_core(); if (mrb == NULL) { fputs("Invalid mrb_state, exiting mruby-strip\n", stderr); return EXIT_FAILURE; diff --git a/mrbgems/mruby-binding-core/mrbgem.rake b/mrbgems/mruby-binding-core/mrbgem.rake deleted file mode 100644 index c0ba48207f..0000000000 --- a/mrbgems/mruby-binding-core/mrbgem.rake +++ /dev/null @@ -1,7 +0,0 @@ -MRuby::Gem::Specification.new('mruby-binding-core') do |spec| - spec.license = 'MIT' - spec.author = 'mruby developers' - spec.summary = 'Binding class (core features only)' - - spec.add_test_dependency('mruby-proc-ext', :core => 'mruby-proc-ext') -end diff --git a/mrbgems/mruby-binding-core/src/binding-core.c b/mrbgems/mruby-binding-core/src/binding-core.c deleted file mode 100644 index 47ec34f80b..0000000000 --- a/mrbgems/mruby-binding-core/src/binding-core.c +++ /dev/null @@ -1,309 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -void mrb_proc_merge_lvar(mrb_state *mrb, mrb_irep *irep, struct REnv *env, int num, const mrb_sym *lv, const mrb_value *stack); -mrb_value mrb_proc_local_variables(mrb_state *mrb, const struct RProc *proc); -const struct RProc *mrb_proc_get_caller(mrb_state *mrb, struct REnv **env); - -static mrb_int -binding_extract_pc(mrb_state *mrb, mrb_value binding) -{ - mrb_value obj = mrb_iv_get(mrb, binding, MRB_SYM(pc)); - if (mrb_nil_p(obj)) { - return -1; - } - else { - mrb_check_type(mrb, obj, MRB_TT_INTEGER); - return mrb_int(mrb, obj); - } -} - -const struct RProc * -mrb_binding_extract_proc(mrb_state *mrb, mrb_value binding) -{ - mrb_value obj = mrb_iv_get(mrb, binding, MRB_SYM(proc)); - mrb_check_type(mrb, obj, MRB_TT_PROC); - return mrb_proc_ptr(obj); -} - -struct REnv * -mrb_binding_extract_env(mrb_state *mrb, mrb_value binding) -{ - mrb_value obj = mrb_iv_get(mrb, binding, MRB_SYM(env)); - if (mrb_nil_p(obj)) { - return NULL; - } - else { - mrb_check_type(mrb, obj, MRB_TT_ENV); - return (struct REnv *)mrb_obj_ptr(obj); - } -} - -static void -binding_local_variable_name_check(mrb_state *mrb, mrb_sym id) -{ - if (id == 0) { - badname: - mrb_raisef(mrb, E_NAME_ERROR, "wrong local variable name %!n for binding", id); - } - - mrb_int len; - const char *name = mrb_sym_name_len(mrb, id, &len); - if (len == 0) { - goto badname; - } - - if (ISASCII(*name) && !(*name == '_' || ISLOWER(*name))) { - goto badname; - } - len--; - name++; - - for (; len > 0; len--, name++) { - if (ISASCII(*name) && !(*name == '_' || ISALNUM(*name))) { - goto badname; - } - } -} - -static mrb_value * -binding_local_variable_search(mrb_state *mrb, const struct RProc *proc, struct REnv *env, mrb_sym varname) -{ - binding_local_variable_name_check(mrb, varname); - - while (proc) { - if (MRB_PROC_CFUNC_P(proc)) break; - - const mrb_irep *irep = proc->body.irep; - const mrb_sym *lv; - if (irep && (lv = irep->lv)) { - for (int i = 0; i + 1 < irep->nlocals; i++, lv++) { - if (varname == *lv) { - return (env && MRB_ENV_LEN(env) > i) ? &env->stack[i + 1] : NULL; - } - } - } - - if (MRB_PROC_SCOPE_P(proc)) break; - env = MRB_PROC_ENV(proc); - proc = proc->upper; - } - - return NULL; -} - -/* - * call-seq: - * local_variable_defined?(symbol) -> bool - */ -static mrb_value -binding_local_variable_defined_p(mrb_state *mrb, mrb_value self) -{ - mrb_sym varname; - mrb_get_args(mrb, "n", &varname); - - const struct RProc *proc = mrb_binding_extract_proc(mrb, self); - struct REnv *env = mrb_binding_extract_env(mrb, self); - mrb_value *e = binding_local_variable_search(mrb, proc, env, varname); - if (e) { - return mrb_true_value(); - } - else { - return mrb_false_value(); - } -} - -/* - * call-seq: - * local_variable_get(symbol) -> object - */ -static mrb_value -binding_local_variable_get(mrb_state *mrb, mrb_value self) -{ - mrb_sym varname; - mrb_get_args(mrb, "n", &varname); - - const struct RProc *proc = mrb_binding_extract_proc(mrb, self); - struct REnv *env = mrb_binding_extract_env(mrb, self); - mrb_value *e = binding_local_variable_search(mrb, proc, env, varname); - if (!e) { - mrb_raisef(mrb, E_NAME_ERROR, "local variable %!n is not defined", varname); - } - - return *e; -} - -static mrb_value -binding_local_variable_set(mrb_state *mrb, mrb_value self) -{ - mrb_sym varname; - mrb_value obj; - mrb_get_args(mrb, "no", &varname, &obj); - - const struct RProc *proc = mrb_binding_extract_proc(mrb, self); - struct REnv *env = mrb_binding_extract_env(mrb, self); - mrb_value *e = binding_local_variable_search(mrb, proc, env, varname); - if (e) { - *e = obj; - if (!mrb_immediate_p(obj)) { - mrb_field_write_barrier(mrb, (struct RBasic*)env, (struct RBasic*)mrb_obj_ptr(obj)); - } - } - else { - mrb_proc_merge_lvar(mrb, (mrb_irep*)proc->body.irep, env, 1, &varname, &obj); - } - - return obj; -} - -static mrb_value -binding_local_variables(mrb_state *mrb, mrb_value self) -{ - const struct RProc *proc = mrb_proc_ptr(mrb_iv_get(mrb, self, MRB_SYM(proc))); - return mrb_proc_local_variables(mrb, proc); -} - -static mrb_value -binding_receiver(mrb_state *mrb, mrb_value self) -{ - return mrb_iv_get(mrb, self, MRB_SYM(recv)); -} - -/* - * call-seq: - * source_location -> [String, Integer] - */ -static mrb_value -binding_source_location(mrb_state *mrb, mrb_value self) -{ - if (mrb_iv_defined(mrb, self, MRB_SYM(source_location))) { - return mrb_iv_get(mrb, self, MRB_SYM(source_location)); - } - - mrb_value srcloc; - const struct RProc *proc = mrb_binding_extract_proc(mrb, self); - if (!proc || MRB_PROC_CFUNC_P(proc) || - !proc->upper || MRB_PROC_CFUNC_P(proc->upper)) { - srcloc = mrb_nil_value(); - } - else { - const mrb_irep *irep = proc->upper->body.irep; - mrb_int pc = binding_extract_pc(mrb, self); - if (pc < 0) { - srcloc = mrb_nil_value(); - } - else { - const char *fname = mrb_debug_get_filename(mrb, irep, (uint32_t)pc); - mrb_int fline = mrb_debug_get_line(mrb, irep, (uint32_t)pc); - - if (fname && fline >= 0) { - srcloc = mrb_assoc_new(mrb, mrb_str_new_cstr(mrb, fname), mrb_fixnum_value(fline)); - } - else { - srcloc = mrb_nil_value(); - } - } - } - - if (!mrb_frozen_p(mrb_obj_ptr(self))) { - mrb_iv_set(mrb, self, MRB_SYM(source_location), srcloc); - } - return srcloc; -} - -mrb_value -mrb_binding_alloc(mrb_state *mrb) -{ - struct RObject *obj = MRB_OBJ_ALLOC(mrb, MRB_TT_OBJECT, mrb_class_get_id(mrb, MRB_SYM(Binding))); - return mrb_obj_value(obj); -} - -struct RProc* -mrb_binding_wrap_lvspace(mrb_state *mrb, const struct RProc *proc, struct REnv **envp) -{ - /* - * local variable space: It is a space to hold the top-level variable of - * binding.eval and binding.local_variable_set. - */ - - static const mrb_code iseq_dummy[] = { OP_RETURN, 0 }; - - struct RProc *lvspace = MRB_OBJ_ALLOC(mrb, MRB_TT_PROC, mrb->proc_class); - mrb_irep *irep = mrb_add_irep(mrb); - irep->flags = MRB_ISEQ_NO_FREE; - irep->iseq = iseq_dummy; - irep->ilen = sizeof(iseq_dummy) / sizeof(iseq_dummy[0]); - irep->lv = (mrb_sym*)mrb_calloc(mrb, 1, sizeof(mrb_sym)); /* initial allocation for dummy */ - irep->nlocals = 1; - irep->nregs = 1; - lvspace->body.irep = irep; - lvspace->upper = proc; - if (*envp) { - lvspace->e.env = *envp; - lvspace->flags |= MRB_PROC_ENVSET; - } - - *envp = MRB_OBJ_ALLOC(mrb, MRB_TT_ENV, NULL); - (*envp)->stack = (mrb_value*)mrb_calloc(mrb, 1, sizeof(mrb_value)); - (*envp)->stack[0] = lvspace->e.env ? lvspace->e.env->stack[0] : mrb_nil_value(); - (*envp)->cxt = lvspace->e.env ? lvspace->e.env->cxt : mrb->c; - (*envp)->mid = 0; - (*envp)->flags = MRB_ENV_CLOSED; - MRB_ENV_SET_LEN(*envp, 1); - - return lvspace; -} - -static mrb_value -mrb_f_binding(mrb_state *mrb, mrb_value self) -{ - mrb_value binding; - struct RProc *proc; - struct REnv *env; - - binding = mrb_binding_alloc(mrb); - proc = (struct RProc*)mrb_proc_get_caller(mrb, &env); - if (!env || MRB_PROC_CFUNC_P(proc)) { - proc = NULL; - env = NULL; - } - - if (proc && !MRB_PROC_CFUNC_P(proc)) { - const mrb_irep *irep = proc->body.irep; - mrb_iv_set(mrb, binding, MRB_SYM(pc), mrb_fixnum_value(mrb->c->ci[-1].pc - irep->iseq - 1 /* step back */)); - } - proc = mrb_binding_wrap_lvspace(mrb, proc, &env); - mrb_iv_set(mrb, binding, MRB_SYM(proc), mrb_obj_value(proc)); - mrb_iv_set(mrb, binding, MRB_SYM(recv), self); - mrb_iv_set(mrb, binding, MRB_SYM(env), mrb_obj_value(env)); - return binding; -} - -void -mrb_mruby_binding_core_gem_init(mrb_state *mrb) -{ - struct RClass *binding = mrb_define_class(mrb, "Binding", mrb->object_class); - mrb_undef_class_method(mrb, binding, "new"); - mrb_undef_class_method(mrb, binding, "allocate"); - - mrb_define_method(mrb, mrb->kernel_module, "binding", mrb_f_binding, MRB_ARGS_NONE()); - - mrb_define_method(mrb, binding, "local_variable_defined?", binding_local_variable_defined_p, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, binding, "local_variable_get", binding_local_variable_get, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, binding, "local_variable_set", binding_local_variable_set, MRB_ARGS_REQ(2)); - mrb_define_method(mrb, binding, "local_variables", binding_local_variables, MRB_ARGS_NONE()); - mrb_define_method(mrb, binding, "receiver", binding_receiver, MRB_ARGS_NONE()); - mrb_define_method(mrb, binding, "source_location", binding_source_location, MRB_ARGS_NONE()); - mrb_define_method(mrb, binding, "inspect", mrb_any_to_s, MRB_ARGS_NONE()); -} - -void -mrb_mruby_binding_core_gem_final(mrb_state *mrb) -{ -} diff --git a/mrbgems/mruby-binding-core/test/binding-core.rb b/mrbgems/mruby-binding-core/test/binding-core.rb deleted file mode 100644 index 066e79b181..0000000000 --- a/mrbgems/mruby-binding-core/test/binding-core.rb +++ /dev/null @@ -1,40 +0,0 @@ -assert("Kernel.#binding") do - assert_kind_of Binding, binding -end - -assert("Binding#local_variables") do - block = Proc.new do |a| - b = 1 - binding - end - assert_equal [:a, :b, :block], block.call(0).local_variables.sort -end - -assert("Binding#local_variable_set") do - bind = binding - 1.times { - assert_equal(9, bind.local_variable_set(:x, 9)) - assert_raise(NameError) { x } - assert_equal([:bind, :x], bind.local_variables.sort) - } -end - -assert("Binding#local_variable_get") do - bind = binding - x = 1 - 1.times { - y = 2 - assert_equal(1, bind.local_variable_get(:x)) - x = 10 - assert_equal(10, bind.local_variable_get(:x)) - assert_raise(NameError) { bind.local_variable_get(:y) } - assert_equal([:bind, :x], bind.local_variables.sort) - } -end - -assert("Binding#source_location") do - skip unless -> {}.source_location - - bind, source_location = binding, [__FILE__, __LINE__] - assert_equal source_location, bind.source_location -end diff --git a/mrbgems/mruby-binding/README.md b/mrbgems/mruby-binding/README.md new file mode 100644 index 0000000000..ae0d69ddf3 --- /dev/null +++ b/mrbgems/mruby-binding/README.md @@ -0,0 +1,256 @@ +# mruby-binding + +The `mruby-binding` mrbgem provides the `Binding` class for mruby. This class allows you to encapsulate the execution context (variables, methods, and `self`) at a particular point in your code, making it available for later use. It is similar in purpose to the `Binding` class in standard Ruby. + +## Obtaining a Binding Object + +You can obtain a `Binding` object using the `Kernel#binding` method: + +### `Kernel#binding` -> Binding + +Returns a `Binding` object that encapsulates the execution context (including local variables, `self`, and any active block) at the point of the call. + +Example: + +```ruby +def get_binding(param) + local_var = 42 + binding # This will capture param, local_var, and self +end + +b = get_binding("hello") +# b now holds the context from inside get_binding +``` + +**Note:** `Kernel#binding` cannot be called from a C function or a Proc defined in C. Attempting to do so will raise a `RuntimeError`. + +## `Binding` Class Methods + +A `Binding` object has the following methods: + +### `local_variables` -> Array of Symbol + +Returns an array of symbols representing the names of the local variables defined in the binding's context. + +Example: + +```ruby +def my_method + a = 10 + b = 20 + binding.local_variables # => [:a, :b] +end +``` + +### `local_variable_get(symbol)` -> Object + +Retrieves the value of the local variable named by `symbol`. Raises a `NameError` if the variable is not defined in the binding's context. + +Example: + +```ruby +def my_method + a = 10 + b = binding + b.local_variable_get(:a) # => 10 +end +``` + +### `local_variable_set(symbol, value)` -> Object + +Sets the local variable named by `symbol` to `value`. If the variable is not already defined, it will be defined in the binding's scope. Returns the `value` that was set. + +Example: + +```ruby +def my_method + a = 10 + b = binding + b.local_variable_set(:a, 20) # a is now 20 + b.local_variable_set(:c, 30) # c is now defined as 30 in this scope + a # => 20 + c # => 30 +end +``` + +### `local_variable_defined?(symbol)` -> Boolean + +Returns `true` if the local variable named by `symbol` is defined in the binding's context, `false` otherwise. + +Example: + +```ruby +def my_method + a = 10 + b = binding + b.local_variable_defined?(:a) # => true + b.local_variable_defined?(:c) # => false +end +``` + +### `receiver` -> Object + +Returns the receiver object (`self`) of the binding. + +Example: + +```ruby +class MyClass + def get_binding + @x = "instance var" + binding + end +end + +obj = MyClass.new +b = obj.get_binding +b.receiver # => obj (the instance of MyClass) +b.receiver.instance_variable_get(:@x) # => "instance var" (Not directly using eval) +``` + +### `source_location` -> [String, Integer] | nil + +Returns a two-element array containing the filename and line number where the binding was created. Returns `nil` if the source location cannot be determined (e.g., for bindings created from C). + +Example: + +```ruby +# In a file named 'test.rb' +b = binding # Assuming this is line 2 +b.source_location # => ["test.rb", 2] (approximately) +``` + +### `dup` / `clone` -> Binding + +Creates a shallow copy of the binding. Modifications to local variables in one binding object can affect the other if the variables themselves are mutable objects, but setting a variable in one binding will not create it in the other after duplication. The internal state concerning local variable storage is also duplicated. + +(The C code refers to `binding_initialize_copy`, which is what `dup` and `clone` would use.) + +```ruby +def my_method + x = 1 + original_binding = binding + original_binding.local_variable_set(:y, 2) + + copied_binding = original_binding.dup + + original_binding.local_variable_set(:x, 10) + original_binding.local_variable_set(:y, 20) + original_binding.local_variable_set(:z, 30) # New variable in original + + puts copied_binding.local_variable_get(:x) # => 1 (Original value before duplication for variables existing at duplication time) + # Correction: The tests show that changes to existing variables are reflected. + # Let's re-verify test behavior for `dup`. + + # Re-checking test `Binding#dup`: + # x = 5 + # bind1 = binding + # bind1.local_variable_set(:y, 10) + # bind2 = bind1.dup + # assert_equal 5, bind2.local_variable_get(:x) + # assert_equal 10, bind2.local_variable_get(:y) + # x = 50 # x is changed in the original scope AFTER duplication + # assert_equal 50, bind1.local_variable_get(:x) + # assert_equal 50, bind2.local_variable_get(:x) # bind2 sees the change to x! + # bind1.local_variable_set(:y, 20) # y is changed in bind1 AFTER duplication + # assert_equal 20, bind1.local_variable_get(:y) + # assert_equal 20, bind2.local_variable_get(:y) # bind2 sees the change to y! + # bind1.local_variable_set(:z, 30) # z is added to bind1 + # assert_raise(NameError) { bind2.local_variable_get(:z) } # bind2 does not see new z + # bind2.local_variable_set(:z, 40) # z is added to bind2 + # assert_equal 30, bind1.local_variable_get(:z) + # assert_equal 40, bind2.local_variable_get(:z) + + # Corrected explanation for dup/clone: + # Creates a copy of the binding. Both the original and copied bindings share the same + # underlying environment for local variables that existed at the time of duplication. + # This means: + # - If a variable that existed when `dup` was called is modified (either in the original + # scope or via `local_variable_set` on either binding), the change is visible in both bindings. + # - If a *new* local variable is added to one binding using `local_variable_set` *after* + # duplication, it is not visible in the other binding. +end +``` + +**Note on `eval`:** While the `Binding` object is often used with `eval` in standard Ruby to execute code within the binding's context, `mruby-binding` itself does not provide an `eval` method directly on the `Binding` object. You would typically use `Binding` with mruby's core `eval` method if you need to evaluate a string of code within a captured context. The `mruby-binding` gem provides the necessary infrastructure (like `mrb_binding_extract_proc` and `mrb_binding_extract_env` in C) that can be utilized by an `eval` implementation. + +```ruby +# Conceptual example (actual eval might vary based on mruby core) +def my_method + a = 10 + b = binding + # eval("puts a", b) # => would print 10 + # eval("a = 20", b) # a in my_method's scope would become 20 +end +``` + +## Usage Example + +Here's a more complete example demonstrating some of the `Binding` object's capabilities: + +```ruby +class Greeter + def initialize(name) + @name = name + end + + def get_binding_for_greeting(additional_message) + greeting_type = "Hello" + # Capture the binding here + binding + end +end + +# Create an instance and get a binding +greeter_instance = Greeter.new("World") +captured_binding = greeter_instance.get_binding_for_greeting("Have a nice day!") + +# Access the receiver (self) +puts "Receiver: #{captured_binding.receiver}" +# => Receiver: # (actual object id will vary) +puts "Receiver's name: #{captured_binding.receiver.instance_variable_get(:@name)}" +# => Receiver's name: World + +# List local variables in the binding +puts "Local variables: #{captured_binding.local_variables.inspect}" +# => Local variables: [:additional_message, :greeting_type] (order may vary) + +# Get local variable values +puts "Greeting type: #{captured_binding.local_variable_get(:greeting_type)}" +# => Greeting type: Hello +puts "Additional message: #{captured_binding.local_variable_get(:additional_message)}" +# => Additional message: Have a nice day! + +# Set a local variable within the binding's context +captured_binding.local_variable_set(:greeting_type, "Hi") +puts "New greeting type: #{captured_binding.local_variable_get(:greeting_type)}" +# => New greeting type: Hi + +# Check if a variable is defined +puts "Is 'greeting_type' defined? #{captured_binding.local_variable_defined?(:greeting_type)}" +# => Is 'greeting_type' defined? true +puts "Is 'non_existent_var' defined? #{captured_binding.local_variable_defined?(:non_existent_var)}" +# => Is 'non_existent_var' defined? false + +# Source location (will vary based on where the code is run) +location = captured_binding.source_location +if location + puts "Binding created at: #{location[0]}:#{location[1]}" +else + puts "Source location not available for this binding." +end +``` + +This example illustrates how a `Binding` object captures the state of local variables and `self` from the scope where it was created, and how these can be inspected and manipulated. + +## Limitations and mruby-specific Considerations + +- **Nesting Depth for Local Variables:** mruby has an internal limit on how deeply nested Procs (blocks) can be while still allowing the `Binding` object to access and manage their local variables. This limit is defined by the `BINDING_UPPER_MAX` constant (defaulting to 20, with a minimum of 10 and a maximum of 100, configurable at compile time via `MRB_BINDING_UPPER_MAX`). If you exceed this nesting depth, attempting to create or manipulate a binding that needs to access variables across too many Proc scopes might result in a `RuntimeError` ("too many upper procs for local variables"). + +- **`eval` Method:** As noted earlier, this gem provides the `Binding` object itself, not a `Binding#eval` method. You would use `Kernel.eval(string, binding)` if you need to evaluate code within the context of a binding, relying on mruby's core `eval` capabilities. + +- **C Function Callers:** `Kernel#binding` cannot create a `Binding` object if the direct caller is a C function. It must be called from Ruby code. + +``` + +``` diff --git a/mrbgems/mruby-binding/mrbgem.rake b/mrbgems/mruby-binding/mrbgem.rake index 4ad5638ea9..fe48f3add0 100644 --- a/mrbgems/mruby-binding/mrbgem.rake +++ b/mrbgems/mruby-binding/mrbgem.rake @@ -1,11 +1,7 @@ MRuby::Gem::Specification.new('mruby-binding') do |spec| spec.license = 'MIT' spec.author = 'mruby developers' - spec.summary = 'Binding class' + spec.summary = 'Binding class (core features only)' - spec.add_dependency('mruby-binding-core', :core => 'mruby-binding-core') - spec.add_dependency('mruby-eval', :core => 'mruby-eval') - spec.add_test_dependency('mruby-metaprog', :core => 'mruby-metaprog') - spec.add_test_dependency('mruby-method', :core => 'mruby-method') spec.add_test_dependency('mruby-proc-ext', :core => 'mruby-proc-ext') end diff --git a/mrbgems/mruby-binding/src/binding.c b/mrbgems/mruby-binding/src/binding.c index de3147f68e..a5c9d6c01d 100644 --- a/mrbgems/mruby-binding/src/binding.c +++ b/mrbgems/mruby-binding/src/binding.c @@ -1,166 +1,514 @@ #include #include #include -#include -#include +#include #include -#include +#include +#include +#include #include -#include -void mrb_proc_merge_lvar(mrb_state *mrb, mrb_irep *irep, struct REnv *env, int num, const mrb_sym *lv, const mrb_value *stack); -const struct RProc *mrb_binding_extract_proc(mrb_state *mrb, mrb_value binding); -struct REnv *mrb_binding_extract_env(mrb_state *mrb, mrb_value binding); -typedef mrb_bool mrb_parser_foreach_top_variable_func(mrb_state *mrb, mrb_sym sym, void *user); -void mrb_parser_foreach_top_variable(mrb_state *mrb, struct mrb_parser_state *p, mrb_parser_foreach_top_variable_func *func, void *user); +#define BINDING_UPPER_DEFAULT 20 +#define BINDING_UPPER_MINIMUM 10 +#define BINDING_UPPER_MAXIMUM 100 -static void -binding_eval_error_check(mrb_state *mrb, struct mrb_parser_state *p, const char *file) +#ifndef MRB_BINDING_UPPER_MAX +# define BINDING_UPPER_MAX BINDING_UPPER_DEFAULT +#else +# if (MRB_BINDING_UPPER_MAX) > BINDING_UPPER_MAXIMUM +# define BINDING_UPPER_MAX BINDING_UPPER_MAXIMUM +# elif (MRB_BINDING_UPPER_MAX) < BINDING_UPPER_MINIMUM +# define BINDING_UPPER_MAX BINDING_UPPER_MINIMUM +# else +# define BINDING_UPPER_MAX MRB_BINDING_UPPER_MAX +# endif +#endif + +static mrb_int +binding_extract_pc(mrb_state *mrb, mrb_value binding) { - if (!p) { - mrb_raise(mrb, E_RUNTIME_ERROR, "Failed to create parser state (out of memory)"); + mrb_value obj = mrb_iv_get(mrb, binding, MRB_SYM(pc)); + if (mrb_nil_p(obj)) { + return -1; + } + else { + mrb_check_type(mrb, obj, MRB_TT_INTEGER); + return mrb_int(mrb, obj); } +} - if (0 < p->nerr) { - mrb_value str; +const struct RProc * +mrb_binding_extract_proc(mrb_state *mrb, mrb_value binding) +{ + mrb_value obj = mrb_iv_get(mrb, binding, MRB_SYM(proc)); + mrb_check_type(mrb, obj, MRB_TT_PROC); + return mrb_proc_ptr(obj); +} - if (file) { - str = mrb_format(mrb, "file %s line %d: %s", - file, - p->error_buffer[0].lineno, - p->error_buffer[0].message); - } - else { - str = mrb_format(mrb, "line %d: %s", - p->error_buffer[0].lineno, - p->error_buffer[0].message); - } - mrb_exc_raise(mrb, mrb_exc_new_str(mrb, E_SYNTAX_ERROR, str)); +struct REnv * +mrb_binding_extract_env(mrb_state *mrb, mrb_value binding) +{ + mrb_value obj = mrb_iv_get(mrb, binding, MRB_SYM(env)); + if (mrb_nil_p(obj)) { + return NULL; + } + else { + mrb_check_type(mrb, obj, MRB_TT_ENV); + return (struct REnv*)mrb_obj_ptr(obj); } } -#define LV_BUFFERS 8 +static mrb_irep * +binding_irep_new_lvspace(mrb_state *mrb) +{ + static const mrb_code iseq_dummy[] = { OP_RETURN, 0 }; -struct expand_lvspace { - mrb_irep *irep; - struct REnv *env; - int numvar; - mrb_sym syms[LV_BUFFERS]; -}; - -static mrb_bool -expand_lvspace(mrb_state *mrb, mrb_sym sym, void *user) -{ - struct expand_lvspace *p = (struct expand_lvspace*)user; - mrb_int symlen; - const char *symname = mrb_sym_name_len(mrb, sym, &symlen); - - if (symname && symlen > 0) { - if (symname[0] != '&' && symname[0] != '*') { - p->syms[p->numvar++] = sym; - if (p->numvar >= LV_BUFFERS) { - mrb_proc_merge_lvar(mrb, p->irep, p->env, p->numvar, p->syms, NULL); - p->numvar = 0; - } + mrb_irep *irep = mrb_add_irep(mrb); + irep->flags = MRB_ISEQ_NO_FREE; + irep->iseq = iseq_dummy; + irep->ilen = sizeof(iseq_dummy) / sizeof(iseq_dummy[0]); + irep->lv = NULL; + irep->nlocals = 1; + irep->nregs = 1; + return irep; +} + +static struct RProc * +binding_proc_new_lvspace(mrb_state *mrb, const struct RProc *upper, struct REnv *env) +{ + struct RProc *lvspace = MRB_OBJ_ALLOC(mrb, MRB_TT_PROC, mrb->proc_class); + lvspace->body.irep = binding_irep_new_lvspace(mrb); + lvspace->upper = upper; + if (env && env->tt == MRB_TT_ENV) { + lvspace->e.env = env; + lvspace->flags |= MRB_PROC_ENVSET; + } + return lvspace; +} + +static struct REnv * +binding_env_new_lvspace(mrb_state *mrb, const struct REnv *e) +{ + struct REnv *env = MRB_OBJ_ALLOC(mrb, MRB_TT_ENV, NULL); + mrb_value *stacks = (mrb_value*)mrb_calloc(mrb, 1, sizeof(mrb_value)); + env->mid = 0; + env->stack = stacks; + if (e && e->stack && MRB_ENV_LEN(e) > 0) { + env->stack[0] = e->stack[0]; + } + else { + env->stack[0] = mrb_nil_value(); + } + MRB_ENV_SET_LEN(env, 1); + return env; +} + +static void +binding_check_proc_upper_count(mrb_state *mrb, const struct RProc *proc) +{ + for (size_t count = 0; proc && !MRB_PROC_CFUNC_P(proc); proc = proc->upper) { + count++; + if (count > BINDING_UPPER_MAX) { + mrb_raise(mrb, E_RUNTIME_ERROR, + "too many upper procs for local variables (mruby limitation; maximum is " MRB_STRINGIZE(BINDING_UPPER_MAX) ")"); } + if (MRB_PROC_SCOPE_P(proc)) break; } +} +mrb_bool +mrb_binding_p(mrb_state *mrb, mrb_value obj) +{ + if (!mrb_obj_is_kind_of(mrb, obj, mrb_class_get_id(mrb, MRB_SYM(Binding)))) return FALSE; + if (mrb_type(obj) != MRB_TT_OBJECT) return FALSE; + if (!mrb_obj_iv_defined(mrb, mrb_obj_ptr(obj), MRB_SYM(proc))) return FALSE; + if (!mrb_obj_iv_defined(mrb, mrb_obj_ptr(obj), MRB_SYM(recv))) return FALSE; + if (!mrb_obj_iv_defined(mrb, mrb_obj_ptr(obj), MRB_SYM(env))) return FALSE; return TRUE; } -struct binding_eval_prepare_body { - mrb_value binding; - const char *file; - const char *expr; - mrb_int exprlen; - mrbc_context *mrbc; - struct mrb_parser_state *pstate; -}; +static void +binding_type_ensure(mrb_state *mrb, mrb_value obj) +{ + if (mrb_binding_p(mrb, obj)) return; + mrb_raise(mrb, E_TYPE_ERROR, "not a binding"); +} + +static struct RProc* +binding_wrap_lvspace(mrb_state *mrb, const struct RProc *proc, struct REnv **envp) +{ + /* + * local variable space: It is a space to hold the top-level variable of + * binding.eval and binding.local_variable_set. + */ + + struct RProc *lvspace = binding_proc_new_lvspace(mrb, proc, *envp); + *envp = binding_env_new_lvspace(mrb, *envp); + return lvspace; +} static mrb_value -binding_eval_prepare_body(mrb_state *mrb, void *opaque) +binding_initialize_copy(mrb_state *mrb, mrb_value binding) { - struct binding_eval_prepare_body *p = (struct binding_eval_prepare_body*)opaque; + mrb_value src = mrb_get_arg1(mrb); + binding_type_ensure(mrb, src); + const struct RProc *src_proc = mrb_binding_extract_proc(mrb, src); + struct REnv *src_env = mrb_binding_extract_env(mrb, src); - const struct RProc *proc = mrb_binding_extract_proc(mrb, p->binding); - mrb_assert(!MRB_PROC_CFUNC_P(proc)); + mrb_check_frozen(mrb, mrb_obj_ptr(binding)); - p->mrbc = mrbc_context_new(mrb); - mrbc_filename(mrb, p->mrbc, p->file ? p->file : "(eval)"); - p->mrbc->upper = proc; - p->mrbc->capture_errors = TRUE; - p->pstate = mrb_parse_nstring(mrb, p->expr, p->exprlen, p->mrbc); - binding_eval_error_check(mrb, p->pstate, p->file); + struct RProc *lvspace; + struct REnv *env; + if (MRB_ENV_LEN(src_env) < 2) { + /* when local variables of src are self only */ + env = src_proc->e.env; + lvspace = binding_wrap_lvspace(mrb, src_proc->upper, &env); + } + else { + binding_check_proc_upper_count(mrb, src_proc); + + env = src_env; + lvspace = binding_wrap_lvspace(mrb, src_proc, &env); - struct expand_lvspace args = { - (mrb_irep*)proc->body.irep, - mrb_binding_extract_env(mrb, p->binding), - 0, - { 0 } - }; - mrb_parser_foreach_top_variable(mrb, p->pstate, expand_lvspace, &args); - if (args.numvar > 0) { - mrb_proc_merge_lvar(mrb, args.irep, args.env, args.numvar, args.syms, NULL); + // The reason for using the mrb_obj_iv_set_force() function is to allow local + // variables to be modified even if src is frozen. This behavior is CRuby imitation. + src_proc = binding_wrap_lvspace(mrb, src_proc, &src_env); + struct RObject *o = mrb_obj_ptr(src); + mrb_obj_iv_set_force(mrb, o, MRB_SYM(proc), mrb_obj_value((struct RProc*)src_proc)); + mrb_obj_iv_set_force(mrb, o, MRB_SYM(env), mrb_obj_value(src_env)); } + mrb_iv_set(mrb, binding, MRB_SYM(proc), mrb_obj_value(lvspace)); + mrb_iv_set(mrb, binding, MRB_SYM(env), mrb_obj_value(env)); - return mrb_nil_value(); + return binding; +} + +static mrb_noreturn void +badname_error(mrb_state *mrb, mrb_sym id) +{ + mrb_raisef(mrb, E_NAME_ERROR, "wrong local variable name %!n for binding", id); } static void -binding_eval_prepare(mrb_state *mrb, mrb_value binding) +binding_local_variable_name_check(mrb_state *mrb, mrb_sym id) { - struct binding_eval_prepare_body d = { binding, NULL, NULL, 0, NULL, NULL }; - mrb_int argc; - mrb_value *argv; - mrb_get_args(mrb, "s|z*!", &d.expr, &d.exprlen, &d.file, &argv, &argc); + if (id == 0) { + badname_error(mrb, id); + } - /* `eval` should take (string[, file, line]) */ - if (argc > 3) mrb_argnum_error(mrb, argc, 1, 3); - mrb_bool error; - mrb_value ret = mrb_protect_error(mrb, binding_eval_prepare_body, &d, &error); - if (d.pstate) mrb_parser_free(d.pstate); - if (d.mrbc) mrbc_context_free(mrb, d.mrbc); - if (error) mrb_exc_raise(mrb, ret); + mrb_int len; + const char *name = mrb_sym_name_len(mrb, id, &len); + if (len == 0) { + badname_error(mrb, id); + } + + if (ISASCII(*name) && !(*name == '_' || ISLOWER(*name))) { + badname_error(mrb, id); + } + len--; + name++; + + for (; len > 0; len--, name++) { + if (ISASCII(*name) && !(*name == '_' || ISALNUM(*name))) { + badname_error(mrb, id); + } + } } +static mrb_value * +binding_local_variable_search(mrb_state *mrb, const struct RProc *proc, struct REnv *env, mrb_sym varname) +{ + binding_local_variable_name_check(mrb, varname); + + while (proc) { + if (MRB_PROC_CFUNC_P(proc)) break; + + const mrb_irep *irep = proc->body.irep; + const mrb_sym *lv; + if (irep && (lv = irep->lv)) { + for (int i = 0; i + 1 < irep->nlocals; i++, lv++) { + if (varname == *lv) { + return (env && MRB_ENV_LEN(env) > i) ? &env->stack[i + 1] : NULL; + } + } + } + + if (MRB_PROC_SCOPE_P(proc)) break; + env = MRB_PROC_ENV(proc); + proc = proc->upper; + } + + return NULL; +} + +/* + * call-seq: + * local_variable_defined?(symbol) -> bool + * + * Returns true if a local variable with the given name is defined + * in the binding's context, false otherwise. + * + * def foo + * a = 1 + * b = binding + * b.local_variable_defined?(:a) #=> true + * b.local_variable_defined?(:c) #=> false + * end + * + * x = 10 + * bind = binding + * bind.local_variable_defined?(:x) #=> true + * bind.local_variable_defined?(:y) #=> false + * bind.local_variable_set(:y, 20) + * bind.local_variable_defined?(:y) #=> true + */ static mrb_value -mrb_binding_eval(mrb_state *mrb, mrb_value binding) +binding_local_variable_defined_p(mrb_state *mrb, mrb_value self) { - binding_eval_prepare(mrb, binding); + mrb_sym varname; + mrb_get_args(mrb, "n", &varname); - struct RClass *c = mrb->kernel_module; - mrb_method_t m = mrb_method_search_vm(mrb, &c, MRB_SYM(eval)); - mrb_callinfo *ci = mrb->c->ci; - int argc = ci->n; - mrb_value *argv = ci->stack + 1; - struct RProc *proc; + const struct RProc *proc = mrb_binding_extract_proc(mrb, self); + struct REnv *env = mrb_binding_extract_env(mrb, self); + mrb_value *e = binding_local_variable_search(mrb, proc, env, varname); + if (e) { + return mrb_true_value(); + } + else { + return mrb_false_value(); + } +} + +/* + * call-seq: + * local_variable_get(symbol) -> object + * + * Returns the value of the local variable with the given name + * in the binding's context. Raises NameError if the variable + * is not defined. + * + * def foo + * a = 42 + * b = "hello" + * bind = binding + * bind.local_variable_get(:a) #=> 42 + * bind.local_variable_get(:b) #=> "hello" + * bind.local_variable_get(:c) #=> NameError + * end + * + * x = [1, 2, 3] + * bind = binding + * bind.local_variable_get(:x) #=> [1, 2, 3] + * x = "modified" + * bind.local_variable_get(:x) #=> "modified" + */ +static mrb_value +binding_local_variable_get(mrb_state *mrb, mrb_value self) +{ + mrb_sym varname; + mrb_get_args(mrb, "n", &varname); - if (argc < 15) { - argv[0] = mrb_ary_new_from_values(mrb, argc, argv); - argv[1] = argv[argc]; /* copy block */ - ci->n = 15; + const struct RProc *proc = mrb_binding_extract_proc(mrb, self); + struct REnv *env = mrb_binding_extract_env(mrb, self); + mrb_value *e = binding_local_variable_search(mrb, proc, env, varname); + if (!e) { + mrb_raisef(mrb, E_NAME_ERROR, "local variable %!n is not defined", varname); } - if (MRB_METHOD_UNDEF_P(m)) { - mrb_method_missing(mrb, MRB_SYM(eval), binding, argv[0]); + + return *e; +} + +/* + * call-seq: + * binding.local_variable_set(symbol, obj) -> obj + * + * Set local variable named symbol as obj in binding's context. + * If the variable is not defined in the binding, it will be created. + * + * def foo + * a = 1 + * binding.local_variable_set(:a, 2) + * binding.local_variable_set(:b, 3) + * [a, b] #=> [2, 3] + * end + */ +static mrb_value +binding_local_variable_set(mrb_state *mrb, mrb_value self) +{ + mrb_sym varname; + mrb_value obj; + mrb_get_args(mrb, "no", &varname, &obj); + + const struct RProc *proc = mrb_binding_extract_proc(mrb, self); + struct REnv *env = mrb_binding_extract_env(mrb, self); + mrb_value *e = binding_local_variable_search(mrb, proc, env, varname); + if (e) { + *e = obj; + if (!mrb_immediate_p(obj)) { + mrb_field_write_barrier(mrb, (struct RBasic*)env, (struct RBasic*)mrb_obj_ptr(obj)); + } } + else { + mrb_proc_merge_lvar(mrb, (mrb_irep*)proc->body.irep, env, 1, &varname, &obj); + } + + return obj; +} + +/* + * call-seq: + * binding.local_variables -> array + * + * Returns an array of symbols representing the names of the local variables + * in the binding. + * + * def foo + * a = 1 + * b = 2 + * binding.local_variables #=> [:a, :b] + * end + */ +static mrb_value +binding_local_variables(mrb_state *mrb, mrb_value self) +{ + const struct RProc *proc = mrb_proc_ptr(mrb_iv_get(mrb, self, MRB_SYM(proc))); + return mrb_proc_local_variables(mrb, proc); +} - mrb_ary_splice(mrb, argv[0], 1, 0, binding); /* insert binding as 2nd argument */ - if (MRB_METHOD_FUNC_P(m)) { - proc = mrb_proc_new_cfunc(mrb, MRB_METHOD_FUNC(m)); - MRB_PROC_SET_TARGET_CLASS(proc, c); +/* + * call-seq: + * binding.receiver -> object + * + * Returns the bound receiver of the binding object. + * + * class Demo + * def get_binding + * binding + * end + * end + * Demo.new.get_binding.receiver #=> # + */ +static mrb_value +binding_receiver(mrb_state *mrb, mrb_value self) +{ + return mrb_iv_get(mrb, self, MRB_SYM(recv)); +} + +/* + * call-seq: + * source_location -> [String, Integer] + */ +static mrb_value +binding_source_location(mrb_state *mrb, mrb_value self) +{ + if (mrb_iv_defined(mrb, self, MRB_SYM(source_location))) { + return mrb_iv_get(mrb, self, MRB_SYM(source_location)); + } + + mrb_value srcloc; + const struct RProc *proc = mrb_binding_extract_proc(mrb, self); + if (!proc || MRB_PROC_CFUNC_P(proc) || + !proc->upper || MRB_PROC_CFUNC_P(proc->upper)) { + srcloc = mrb_nil_value(); } else { - proc = MRB_METHOD_PROC(m); + const mrb_irep *irep = proc->upper->body.irep; + mrb_int pc = binding_extract_pc(mrb, self); + if (pc < 0) { + srcloc = mrb_nil_value(); + } + else { + const char *fname; + int32_t line; + + if (!mrb_debug_get_position(mrb, irep, (uint32_t)pc, &line, &fname)) { + srcloc = mrb_nil_value(); + } + else { + srcloc = mrb_assoc_new(mrb, mrb_str_new_cstr(mrb, fname), mrb_fixnum_value(line)); + } + } + } + + if (!mrb_frozen_p(mrb_obj_ptr(self))) { + mrb_iv_set(mrb, self, MRB_SYM(source_location), srcloc); + } + return srcloc; +} + +mrb_value +mrb_binding_new(mrb_state *mrb, const struct RProc *proc, mrb_value recv, struct REnv *env) +{ + struct RObject *binding = MRB_OBJ_ALLOC(mrb, MRB_TT_OBJECT, mrb_class_get_id(mrb, MRB_SYM(Binding))); + + if (proc && !MRB_PROC_CFUNC_P(proc)) { + const mrb_irep *irep = proc->body.irep; + mrb_obj_iv_set(mrb, binding, MRB_SYM(pc), mrb_fixnum_value(mrb->c->ci[-1].pc - irep->iseq - 1 /* step back */)); + } + proc = binding_wrap_lvspace(mrb, proc, &env); + + mrb_obj_iv_set(mrb, binding, MRB_SYM(proc), mrb_obj_value((void*)proc)); + mrb_obj_iv_set(mrb, binding, MRB_SYM(recv), recv); + mrb_obj_iv_set(mrb, binding, MRB_SYM(env), mrb_obj_value(env)); + + return mrb_obj_value(binding); +} + +/* + * call-seq: + * binding -> binding + * + * Returns a Binding object, describing the variable and method bindings + * at the point of call. This object can be used when calling eval to + * execute the evaluated command in this environment. + * + * def get_binding(param) + * binding + * end + * b = get_binding("hello") + * b.eval("param") #=> "hello" + */ +static mrb_noreturn void +caller_error(mrb_state *mrb) +{ + mrb_raise(mrb, E_RUNTIME_ERROR, "Cannot create Binding object for non-Ruby caller"); +} + +static mrb_value +mrb_f_binding(mrb_state *mrb, mrb_value self) +{ + struct RProc *proc; + struct REnv *env; + + if (mrb->c->ci->cci != 0) { + caller_error(mrb); } - ci->u.target_class = c; - return mrb_exec_irep(mrb, binding, proc); + proc = (struct RProc*)mrb_proc_get_caller(mrb, &env); + if (!env || MRB_PROC_CFUNC_P(proc)) { + caller_error(mrb); + } + return mrb_binding_new(mrb, proc, self, env); } void mrb_mruby_binding_gem_init(mrb_state *mrb) { - struct RClass *binding = mrb_class_get_id(mrb, MRB_SYM(Binding)); - mrb_define_method(mrb, binding, "eval", mrb_binding_eval, MRB_ARGS_ANY()); + struct RClass *binding = mrb_define_class_id(mrb, MRB_SYM(Binding), mrb->object_class); + MRB_SET_INSTANCE_TT(binding, MRB_TT_OBJECT); + MRB_UNDEF_ALLOCATOR(binding); + mrb_undef_class_method_id(mrb, binding, MRB_SYM(new)); + mrb_undef_class_method_id(mrb, binding, MRB_SYM(allocate)); + + mrb_define_private_method_id(mrb, mrb->kernel_module, MRB_SYM(binding), mrb_f_binding, MRB_ARGS_NONE()); + + mrb_define_private_method_id(mrb, binding, MRB_SYM(initialize_copy), binding_initialize_copy, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, binding, MRB_SYM_Q(local_variable_defined), binding_local_variable_defined_p, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, binding, MRB_SYM(local_variable_get), binding_local_variable_get, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, binding, MRB_SYM(local_variable_set), binding_local_variable_set, MRB_ARGS_REQ(2)); + mrb_define_method_id(mrb, binding, MRB_SYM(local_variables), binding_local_variables, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, binding, MRB_SYM(receiver), binding_receiver, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, binding, MRB_SYM(source_location), binding_source_location, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, binding, MRB_SYM(inspect), mrb_any_to_s, MRB_ARGS_NONE()); } void diff --git a/mrbgems/mruby-binding/test/binding.c b/mrbgems/mruby-binding/test/binding.c index efc982a758..e7dc5b08c0 100644 --- a/mrbgems/mruby-binding/test/binding.c +++ b/mrbgems/mruby-binding/test/binding.c @@ -1,5 +1,4 @@ #include -#include static mrb_value binding_in_c(mrb_state *mrb, mrb_value self) diff --git a/mrbgems/mruby-binding/test/binding.rb b/mrbgems/mruby-binding/test/binding.rb index bfae84c596..1f76b29fae 100644 --- a/mrbgems/mruby-binding/test/binding.rb +++ b/mrbgems/mruby-binding/test/binding.rb @@ -1,9 +1,5 @@ -assert("Binding#eval") do - b = nil - 1.times { x, y, z = 1, 2, 3; [x,y,z]; b = binding } - assert_equal([1, 2, 3], b.eval("[x, y, z]")) - here = self - assert_equal(here, b.eval("self")) +assert("Kernel.#binding") do + assert_kind_of Binding, binding end assert("Binding#local_variables") do @@ -11,18 +7,15 @@ b = 1 binding end - bind = block.call(0) - assert_equal [:a, :b, :bind, :block], bind.local_variables.sort - bind.eval("x = 2") - assert_equal [:a, :b, :bind, :block, :x], bind.local_variables.sort + assert_equal [:a, :b, :block], block.call(0).local_variables.sort end assert("Binding#local_variable_set") do bind = binding 1.times { assert_equal(9, bind.local_variable_set(:x, 9)) - assert_equal(9, bind.eval("x")) - assert_equal([:bind, :x], bind.eval("local_variables.sort")) + assert_raise(NameError) { x } + assert_equal([:bind, :x], bind.local_variables.sort) } end @@ -35,36 +28,37 @@ x = 10 assert_equal(10, bind.local_variable_get(:x)) assert_raise(NameError) { bind.local_variable_get(:y) } - bind.eval("z = 3") - assert_equal(3, bind.local_variable_get(:z)) - bind.eval("y = 5") - assert_equal(5, bind.local_variable_get(:y)) - assert_equal(2, y) + assert_equal([:bind, :x], bind.local_variables.sort) } end -assert "Kernel#binding and .eval from C" do - bind = binding_in_c - assert_equal 5, bind.eval("2 + 3") - assert_nothing_raised { bind.eval("self") } -end +assert("Binding#source_location") do + skip unless -> {}.source_location -assert "Binding#eval with Binding.new via UnboundMethod" do - assert_raise(NoMethodError) { Class.instance_method(:new).bind_call(Binding) } + bind, source_location = binding, [__FILE__, __LINE__] + assert_equal source_location, bind.source_location end -assert "Binding#eval with Binding.new via Method" do - # The following test is OK if SIGSEGV does not occur - cx = Class.new(Binding) - cx.define_singleton_method(:allocate, &Object.method(:allocate)) - Class.instance_method(:new).bind_call(cx).eval("") - - assert_true true +assert("Binding#dup") do + x = 5 + bind1 = binding + bind1.local_variable_set(:y, 10) + bind2 = bind1.dup + assert_equal 5, bind2.local_variable_get(:x) + assert_equal 10, bind2.local_variable_get(:y) + x = 50 + assert_equal 50, bind1.local_variable_get(:x) + assert_equal 50, bind2.local_variable_get(:x) + bind1.local_variable_set(:y, 20) + assert_equal 20, bind1.local_variable_get(:y) + assert_equal 20, bind2.local_variable_get(:y) + bind1.local_variable_set(:z, 30) + assert_raise(NameError) { bind2.local_variable_get(:z) } + bind2.local_variable_set(:z, 40) + assert_equal 30, bind1.local_variable_get(:z) + assert_equal 40, bind2.local_variable_get(:z) end -assert "access local variables into procs" do - bx = binding - block = bx.eval("a = 1; proc { a }") - bx.eval("a = 2") - assert_equal 2, block.call +assert "Kernel#binding and .eval from C" do + assert_raise(RuntimeError) { binding_in_c } end diff --git a/mrbgems/mruby-catch/README.md b/mrbgems/mruby-catch/README.md new file mode 100644 index 0000000000..4a7d365ef8 --- /dev/null +++ b/mrbgems/mruby-catch/README.md @@ -0,0 +1,94 @@ +# mruby-catch + +This mrbgem provides `catch` and `throw` functionality similar to what is available in standard Ruby. It allows for non-local exits from blocks of code. + +## `catch` + +The `catch` method is used to establish a block that can be exited prematurely using `throw`. + +```ruby +catch(tag) do |current_tag| + # ... code ... + if some_condition + throw(tag, return_value) + end + # ... more code ... +end +``` + +- **With a tag:** When `catch` is called with a `tag` (any Ruby object), it executes the block. If `throw` is called with the same `tag` from within this block (or any method called from within it), the `catch` block immediately exits and returns the value provided to `throw`. +- **Block completion:** If the block executes to completion without `throw` being called with a matching tag, the `catch` block returns the result of the last expression evaluated in the block. +- **No tag:** If `catch` is called without a tag, a new unique `Object` is created and used as the tag. This tag is passed as an argument to the block. + +```ruby +catch do |generated_tag| + # generated_tag is a new Object + throw(generated_tag, "hello") +end # => "hello" +``` + +## `throw` + +The `throw` method is used to initiate a non-local exit to a corresponding `catch` block. + +```ruby +throw(tag) +throw(tag, value) +``` + +- **With a tag and value:** `throw(tag, value)` jumps to the innermost active `catch` block that is waiting for `tag`. The `catch` block then returns `value`. +- **With only a tag:** `throw(tag)` is equivalent to `throw(tag, nil)`. +- **Uncaught throw:** If `throw` is called with a `tag` for which there is no matching `catch` block in the current call stack, an `UncaughtThrowError` is raised. + +## `UncaughtThrowError` + +This is a custom error class that inherits from `ArgumentError`. It is raised when `throw` is called for a tag that is not currently being caught. +It has two attributes: + +- `tag`: The tag that was thrown. +- `value`: The value that was thrown with the tag. + +## Example + +```ruby +def check_value(val) + puts "Checking: #{val}" + if val < 0 + throw(:negative_value, val) + elsif val == 0 + throw(:zero_value) # value will be nil + end + puts "#{val} is positive" + val * 2 +end + +result = catch(:negative_value) do + puts catch(:zero_value) do + puts check_value(10) + puts check_value(-5) # This will throw to :negative_value + puts check_value(0) # This would throw to :zero_value, but it's not reached + end + puts "This line is skipped if :zero_value is thrown." +end + +puts "Result: #{result}" + +# Output: +# Checking: 10 +# 10 is positive +# 20 +# Checking: -5 +# Result: -5 + +puts "--- Next example --- " + +result2 = catch do |tag_a| + catch do |tag_b| + puts "In tag_b block" + throw(tag_a, "Exited from A via B") + puts "This is not printed" + end + puts "This is not printed either" +end +puts result2 # => Exited from A via B +``` diff --git a/mrbgems/mruby-catch/mrblib/catch.rb b/mrbgems/mruby-catch/mrblib/catch.rb index 9a60a67a37..bfcfbc20c9 100644 --- a/mrbgems/mruby-catch/mrblib/catch.rb +++ b/mrbgems/mruby-catch/mrblib/catch.rb @@ -1,5 +1,26 @@ +# +# Exception raised when a throw is executed without a corresponding catch. +# This error contains the tag and value that were thrown. +# class UncaughtThrowError < ArgumentError - attr_reader :tag, :value + # The tag that was thrown + attr_reader :tag + # The value that was thrown with the tag + attr_reader :value + + # + # call-seq: + # UncaughtThrowError.new(tag, value) -> exception + # + # Creates a new UncaughtThrowError with the given tag and value. + # The tag is the symbol or object that was thrown, and value is + # the associated value. + # + # error = UncaughtThrowError.new(:done, "finished") + # error.tag #=> :done + # error.value #=> "finished" + # error.message #=> "uncaught throw :done" + # def initialize(tag, value) @tag = tag @value = value diff --git a/mrbgems/mruby-catch/src/catch.c b/mrbgems/mruby-catch/src/catch.c index 048a44738e..4acbd822c0 100644 --- a/mrbgems/mruby-catch/src/catch.c +++ b/mrbgems/mruby-catch/src/catch.c @@ -4,120 +4,144 @@ #include #include #include -#include - - -MRB_PRESYM_DEFINE_VAR_AND_INITER(catch_syms_3, 1, MRB_SYM(call)) -static const mrb_code catch_iseq_3[18] = { - OP_ENTER, 0x00, 0x00, 0x00, - OP_GETUPVAR, 0x02, 0x02, 0x01, - OP_GETUPVAR, 0x03, 0x01, 0x01, - OP_SEND, 0x02, 0x00, 0x01, - OP_RETURN, 0x02,}; -static const mrb_irep catch_irep_3 = { - 2,5,0, - MRB_IREP_STATIC,catch_iseq_3, - NULL,catch_syms_3,NULL, - NULL, - NULL, - 18,0,1,0,0 -}; -static const mrb_irep *catch_reps_2[1] = { - &catch_irep_3, -}; -static const mrb_code catch_iseq_2[13] = { - OP_ENTER, 0x00, 0x00, 0x00, - OP_LAMBDA, 0x02, 0x00, - OP_SEND, 0x02, 0x00, 0x00, - OP_RETURN, 0x02,}; -static const mrb_irep catch_irep_2 = { - 2,4,0, - MRB_IREP_STATIC,catch_iseq_2, - NULL,catch_syms_3,catch_reps_2, - NULL, - NULL, - 13,0,1,1,0 -}; -static const mrb_irep *catch_reps_1[1] = { - &catch_irep_2, + +/* Pre-defined symbols used by catch implementation */ +MRB_PRESYM_DEFINE_VAR_AND_INITER(catch_syms, 3, MRB_SYM(Object), MRB_SYM(new), MRB_SYM(call)) + +/* + * Bytecode implementation of catch method: + * def catch(r1 = Object.new, &r2) + * r2.call(r1) + * end + * + * This creates a default tag (Object.new) if none provided, then calls + * the block with the tag as argument. + */ +static const mrb_code catch_iseq[] = { + OP_ENTER, 0x00, 0x20, 0x01, // 000 ENTER 0:1:0:0:0:0:1 (0x2001) + OP_JMP, 0x00, 0x06, // 004 JMP 013 + + // copy for block parameter "tag" when method argument are given + OP_MOVE, 0x03, 0x01, // 007 MOVE R3 R1 + OP_JMP, 0x00, 0x0a, // 010 JMP 023 + + // create a tag for default parameter + OP_GETCONST, 0x03, 0x00, // 013 GETCONST R3 Object + OP_SEND, 0x03, 0x01, 0x00, // 016 SEND R3 :new n=0 + OP_MOVE, 0x01, 0x03, // 020 MOVE R1 R3 + + // to save on the stack, block variables are used as is + OP_SEND, 0x02, 0x02, 0x01, // 023 SEND R2 :call n=1 + OP_RETURN, 0x02, // 027 RETURN R2 }; -MRB_PRESYM_DEFINE_VAR_AND_INITER(catch_syms_1, 3, MRB_SYM(Object), MRB_SYM(new), MRB_SYM(call)) -static const mrb_code catch_iseq_1[29] = { - OP_ENTER, 0x00, 0x20, 0x01, - OP_JMP, 0x00, 0x03, - OP_JMP, 0x00, 0x0a, - OP_GETCONST, 0x03, 0x00, - OP_SEND, 0x03, 0x01, 0x00, - OP_MOVE, 0x01, 0x03, - OP_LAMBDA, 0x03, 0x00, - OP_SEND, 0x03, 0x02, 0x00, - OP_RETURN, 0x03,}; + +/* Instruction representation for catch method bytecode */ static const mrb_irep catch_irep = { 3,5,0, - MRB_IREP_STATIC,catch_iseq_1, - NULL,catch_syms_1,catch_reps_1, + MRB_IREP_STATIC,catch_iseq, + NULL,catch_syms,NULL, NULL, NULL, - 29,0,3,1,0 + sizeof(catch_iseq),0,3,0,0 }; +/* Procedure object for catch method - used to identify catch blocks in call stack */ +mrb_alignas(8) static const struct RProc catch_proc = { - NULL, NULL, MRB_TT_PROC, MRB_GC_RED, MRB_FL_OBJ_IS_FROZEN | MRB_PROC_SCOPE | MRB_PROC_STRICT, + NULL, MRB_TT_PROC, MRB_GC_RED, MRB_OBJ_IS_FROZEN, MRB_PROC_SCOPE | MRB_PROC_STRICT, { &catch_irep }, NULL, { NULL } }; -static const mrb_callinfo * +/* Helper function to find a matching catch block in the call stack */ +static size_t find_catcher(mrb_state *mrb, mrb_value tag) { - const mrb_callinfo *ci = mrb->c->ci; - size_t n = ci - mrb->c->cibase; - ci--; + const mrb_callinfo *ci = mrb->c->ci - 1; // skip oneself throw + ptrdiff_t n = ci - mrb->c->cibase; + for (; n > 0; n--, ci--) { const mrb_value *arg1 = ci->stack + 1; if (ci->proc == &catch_proc && mrb_obj_eq(mrb, *arg1, tag)) { - return ci; + return (uintptr_t)n; } } - return NULL; + return 0; } +/* + * call-seq: + * throw(tag) -> obj + * throw(tag, obj) -> obj + * + * Transfers control to the end of the active catch block waiting for tag. + * Raises UncaughtThrowError if there is no catch block for the tag. The + * optional second parameter supplies a return value for the catch block, + * which otherwise defaults to nil. + * + * def routine(n) + * puts n + * throw :done if n <= 0 + * routine(n-1) + * end + * + * catch(:done) { routine(3) } + * 3 + * 2 + * 1 + * 0 + * + * catch(:done) { throw :done, "hello" } #=> "hello" + */ static mrb_value -mrb_f_throw(mrb_state *mrb, mrb_value self) +throw_m(mrb_state *mrb, mrb_value self) { mrb_value tag, obj; if (mrb_get_args(mrb, "o|o", &tag, &obj) == 1) { obj = mrb_nil_value(); } - const mrb_callinfo *ci = find_catcher(mrb, tag); - if (ci) { - struct RBreak *b = MRB_OBJ_ALLOC(mrb, MRB_TT_BREAK, NULL); - mrb_break_value_set(b, obj); - mrb_break_proc_set(b, ci[2].proc); /* Back to the closure in `catch` method */ - mrb_exc_raise(mrb, mrb_obj_value(b)); - } - else { + uintptr_t ci_index = find_catcher(mrb, tag); + if (ci_index == 0) { mrb_value argv[2] = {tag, obj}; mrb_exc_raise(mrb, mrb_obj_new(mrb, mrb_exc_get_id(mrb, MRB_ERROR_SYM(UncaughtThrowError)), 2, argv)); } + struct RBreak *b = MRB_OBJ_ALLOC(mrb, MRB_TT_BREAK, NULL); + mrb_break_value_set(b, obj); + b->ci_break_index = ci_index; /* Back to the caller directly */ + mrb_exc_raise(mrb, mrb_obj_value(b)); /* not reached */ return mrb_nil_value(); } +/* + * Initializes the mruby-catch gem by defining catch and throw methods. + * + * - catch: defined using the pre-compiled bytecode procedure for efficiency, + * marked as private method in Kernel module + * - throw: defined as a regular C method that searches for matching catch blocks, + * also marked as private method in Kernel module + * + * Both methods are added to the Kernel module, making them available globally + * as private methods that can be called without a receiver. + */ void mrb_mruby_catch_gem_init(mrb_state *mrb) { mrb_method_t m; - MRB_PRESYM_INIT_SYMBOLS(mrb, catch_syms_3); - MRB_PRESYM_INIT_SYMBOLS(mrb, catch_syms_1); + MRB_PRESYM_INIT_SYMBOLS(mrb, catch_syms); MRB_METHOD_FROM_PROC(m, &catch_proc); + m.flags |= MRB_METHOD_PRIVATE_FL; mrb_define_method_raw(mrb, mrb->kernel_module, MRB_SYM(catch), m); - mrb_define_method(mrb, mrb->kernel_module, "throw", mrb_f_throw, MRB_ARGS_ARG(1,1)); + mrb_define_private_method_id(mrb, mrb->kernel_module, MRB_SYM(throw), throw_m, MRB_ARGS_ARG(1,1)); } +/* + * Finalizes the mruby-catch gem. Currently no cleanup is required + * as the catch/throw implementation uses static data structures. + */ void mrb_mruby_catch_gem_final(mrb_state *mrb) { diff --git a/mrbgems/mruby-catch/test/catch.rb b/mrbgems/mruby-catch/test/catch.rb index 38a4eb9073..418f1bdb62 100644 --- a/mrbgems/mruby-catch/test/catch.rb +++ b/mrbgems/mruby-catch/test/catch.rb @@ -1,6 +1,6 @@ assert "return throw value" do val = ["val"] - result = catch :foo do + result = catch(:foo) do loop do loop do begin @@ -22,7 +22,7 @@ end assert "no throw value" do - result = catch :foo do + result = catch(:foo) do throw :foo 1 end diff --git a/mrbgems/mruby-class-ext/README.md b/mrbgems/mruby-class-ext/README.md new file mode 100644 index 0000000000..38b784409f --- /dev/null +++ b/mrbgems/mruby-class-ext/README.md @@ -0,0 +1,81 @@ +# mruby-class-ext + +This mrbgem extends the `Module` and `Class` classes in mruby, providing additional methods for reflection and class manipulation. + +## Module Methods + +### `mod < other` + +Returns `true` if `mod` is a subclass/submodule of `other`. Returns `false` if `mod` is the same as `other`. Returns `nil` if there's no relationship between the two. (Think of the relationship in terms of the class definition: "class A < B" implies "A < B".) + +### `mod <= other` + +Returns `true` if `mod` is a subclass/submodule of `other` or is the same as `other`. Returns `nil` if there's no relationship between the two. (Think of the relationship in terms of the class definition: "class A < B" implies "A <= B".) + +### `mod > other` + +Returns `true` if `mod` is an ancestor of `other`. Returns `false` if `mod` is the same as `other`. Returns `nil` if there's no relationship between the two. (Think of the relationship in terms of the class definition: "class A < B" implies "B > A".) + +### `mod >= other` + +Returns `true` if `mod` is an ancestor of `other`, or the two modules are the same. Returns `nil` if there's no relationship between the two. (Think of the relationship in terms of the class definition: "class A < B" implies "B >= A".) + +### `module <=> other_module` + +Comparison---Returns -1, 0, +1 or `nil` depending on whether `module` includes `other_module`, they are the same, or if `module` is included by `other_module`. + +Returns `nil` if `module` has no relationship with `other_module`, if `other_module` is not a module, or if the two values are incomparable. + +### `name` + +Returns the name of the module. + +### `singleton_class?` + +Returns `true` if the module is a singleton class. + +### `module_exec(arg...) {|var...| block } -> obj` + +### `class_exec(arg...) {|var...| block } -> obj` + +Evaluates the given block in the context of the class/module. The method defined in the block will belong to the receiver. Any arguments passed to the method will be passed to the block. This can be used if the block needs to access instance variables. + +```ruby +class Thing +end +Thing.class_exec{ + def hello() "Hello there!" end +} +puts Thing.new.hello() +``` + +## Class Methods + +### `subclasses -> array` + +Returns an array of classes where the receiver is the direct superclass of the class, excluding singleton classes. The order of the returned array is not defined. + +```ruby +class A; end +class B < A; end +class C < B; end +class D < A; end + +A.subclasses #=> [D, B] +B.subclasses #=> [C] +C.subclasses #=> [] +``` + +### `attached_object -> object` + +Returns the object for which the receiver is the singleton class. Raises a `TypeError` if the class is not a singleton class. + +```ruby +class Foo; end + +Foo.singleton_class.attached_object #=> Foo +Foo.attached_object #=> TypeError: not a singleton class +Foo.new.singleton_class.attached_object #=> # +TrueClass.attached_object #=> TypeError: not a singleton class +NilClass.attached_object #=> TypeError: not a singleton class +``` diff --git a/mrbgems/mruby-class-ext/mrblib/module.rb b/mrbgems/mruby-class-ext/mrblib/module.rb deleted file mode 100644 index 3015851873..0000000000 --- a/mrbgems/mruby-class-ext/mrblib/module.rb +++ /dev/null @@ -1,89 +0,0 @@ -class Module - - ## - # call-seq: - # mod < other -> true, false, or nil - # - # Returns true if `mod` is a subclass of `other`. Returns - # nil if there's no relationship between the two. - # (Think of the relationship in terms of the class definition: - # "class A < B" implies "A < B".) - # - def <(other) - if self.equal?(other) - false - else - self <= other - end - end - - ## - # call-seq: - # mod <= other -> true, false, or nil - # - # Returns true if `mod` is a subclass of `other` or - # is the same as `other`. Returns - # nil if there's no relationship between the two. - # (Think of the relationship in terms of the class definition: - # "class A < B" implies "A < B".) - def <=(other) - raise TypeError, 'compared with non class/module' unless other.is_a?(Module) - if self.ancestors.include?(other) - return true - elsif other.ancestors.include?(self) - return false - end - end - - ## - # call-seq: - # mod > other -> true, false, or nil - # - # Returns true if `mod` is an ancestor of `other`. Returns - # nil if there's no relationship between the two. - # (Think of the relationship in terms of the class definition: - # "class A < B" implies "B > A".) - # - def >(other) - if self.equal?(other) - false - else - self >= other - end - end - - ## - # call-seq: - # mod >= other -> true, false, or nil - # - # Returns true if `mod` is an ancestor of `other`, or the - # two modules are the same. Returns - # nil if there's no relationship between the two. - # (Think of the relationship in terms of the class definition: - # "class A < B" implies "B > A".) - # - def >=(other) - raise TypeError, 'compared with non class/module' unless other.is_a?(Module) - return other < self - end - - ## - # call-seq: - # module <=> other_module -> -1, 0, +1, or nil - # - # Comparison---Returns -1, 0, +1 or nil depending on whether `module` - # includes `other_module`, they are the same, or if `module` is included by - # `other_module`. - # - # Returns `nil` if `module` has no relationship with `other_module`, if - # `other_module` is not a module, or if the two values are incomparable. - # - def <=>(other) - return 0 if self.equal?(other) - return nil unless other.is_a?(Module) - cmp = self < other - return -1 if cmp - return 1 unless cmp.nil? - return nil - end -end diff --git a/mrbgems/mruby-class-ext/src/class.c b/mrbgems/mruby-class-ext/src/class.c index 373af83128..13c9653a99 100644 --- a/mrbgems/mruby-class-ext/src/class.c +++ b/mrbgems/mruby-class-ext/src/class.c @@ -1,21 +1,48 @@ -#include "mruby.h" -#include "mruby/class.h" -#include "mruby/string.h" -#include "mruby/array.h" -#include "mruby/proc.h" +#include +#include +#include +#include +#include +#include +#include +/* + * Get the name of a module/class. + * + * Returns the fully qualified name of the module/class as a frozen string. + * If the module/class is anonymous, returns nil. + * + * Args: + * mrb: The mruby state + * self: The module/class object + * + * Returns: + * String: The name of the module/class (frozen) + * nil: If the module/class is anonymous + */ static mrb_value -mrb_mod_name(mrb_state *mrb, mrb_value self) +mod_name(mrb_state *mrb, mrb_value self) { mrb_value name = mrb_class_path(mrb, mrb_class_ptr(self)); if (mrb_string_p(name)) { - MRB_SET_FROZEN_FLAG(mrb_basic_ptr(name)); + mrb_basic_ptr(name)->frozen = 1; } return name; } +/* + * Check if a module/class is a singleton class. + * + * Args: + * mrb: The mruby state + * self: The module/class object to check + * + * Returns: + * true: if the object is a singleton class + * false: if the object is not a singleton class + */ static mrb_value -mrb_mod_singleton_class_p(mrb_state *mrb, mrb_value self) +mod_singleton_class_p(mrb_state *mrb, mrb_value self) { return mrb_bool_value(mrb_sclass_p(self)); } @@ -40,28 +67,31 @@ mrb_mod_singleton_class_p(mrb_state *mrb, mrb_value self) */ static mrb_value -mrb_mod_module_exec(mrb_state *mrb, mrb_value self) +mod_module_exec(mrb_state *mrb, mrb_value self) { - const mrb_value *argv; - mrb_int argc; - mrb_value blk; - struct RClass *c; - - mrb_get_args(mrb, "*&!", &argv, &argc, &blk); - - c = mrb_class_ptr(self); - if (mrb->c->ci->cci > 0) { - return mrb_yield_with_class(mrb, blk, argc, argv, self, c); - } - mrb_vm_ci_target_class_set(mrb->c->ci, c); - return mrb_yield_cont(mrb, blk, self, argc, argv); + return mrb_object_exec(mrb, self, mrb_class_ptr(self)); } +/* Helper structure for subclass enumeration */ struct subclass_args { - struct RClass *c; - mrb_value ary; + struct RClass *c; /* The parent class to find subclasses of */ + mrb_value ary; /* Array to collect subclasses into */ }; +/* + * Callback function for mrb_objspace_each_objects to find direct subclasses. + * + * This function is called for each object in the object space. It checks if + * the object is a class whose direct superclass matches the target class. + * + * Args: + * mrb: The mruby state + * obj: The current object being examined + * data: Pointer to subclass_args structure + * + * Returns: + * MRB_EACH_OBJ_OK: Continue iteration + */ static int add_subclasses(mrb_state *mrb, struct RBasic *obj, void *data) { @@ -93,13 +123,10 @@ add_subclasses(mrb_state *mrb, struct RBasic *obj, void *data) * C.subclasses #=> [] */ static mrb_value -mrb_class_subclasses(mrb_state *mrb, mrb_value self) +class_subclasses(mrb_state *mrb, mrb_value self) { - struct RClass *c; - mrb_value ary; - - c = mrb_class_ptr(self); - ary = mrb_ary_new(mrb); + struct RClass *c = mrb_class_ptr(self); + mrb_value ary = mrb_ary_new(mrb); if (c->flags & MRB_FL_CLASS_IS_INHERITED) { struct subclass_args arg = {c, ary}; @@ -108,18 +135,239 @@ mrb_class_subclasses(mrb_state *mrb, mrb_value self) return ary; } +/* + * call-seq: + * attached_object -> object + * + * Returns the object for which the receiver is the singleton class. + * Raises an TypeError if the class is not a singleton class. + * + * class Foo; end + * + * Foo.singleton_class.attached_object #=> Foo + * Foo.attached_object #=> TypeError: not a singleton class + * Foo.new.singleton_class.attached_object #=> # + * TrueClass.attached_object #=> TypeError: not a singleton class + * NilClass.attached_object #=> TypeError: not a singleton class + */ +static mrb_value +class_attached_object(mrb_state *mrb, mrb_value self) +{ + struct RClass *c = mrb_class_ptr(self); + if (c->tt != MRB_TT_SCLASS) { + mrb_raise(mrb, E_TYPE_ERROR, "not a singleton class"); + } + return mrb_obj_iv_get(mrb, (struct RObject*)c, MRB_SYM(__attached__)); +} + +/* + * Check if a class/module is an ancestor of another. + * + * This function traverses the inheritance chain of `klass` upwards to determine + * if `super` appears anywhere in the hierarchy. It handles both regular classes/modules + * and included classes (ICLASS) which represent modules included in the inheritance chain. + * + * Args: + * klass: The class/module to check (potential descendant) + * super: The class/module to search for (potential ancestor) + * + * Returns: + * true: if `super` is found in `klass`'s inheritance chain + * false: if `super` is not an ancestor of `klass` + */ +static mrb_bool +is_ancestor(struct RClass *klass, struct RClass *super) +{ + struct RClass *c = klass; + while (c) { + if (c->tt == MRB_TT_ICLASS) { + if (c->c == super) return TRUE; + } + else { + if (c == super) return TRUE; + } + c = c->super; + } + return FALSE; +} + +/* + * Compare hierarchy relationship between two modules/classes. + * + * This function determines the ancestor relationship between `self` and `other`. + * It checks if one is an ancestor of the other by traversing the inheritance chain. + * + * Args: + * mrb: The mruby state + * self: The first module/class to compare + * other: The second module/class to compare + * + * Returns: + * true: if `self` is an ancestor of `other` (self > other) + * false: if `other` is an ancestor of `self` (self < other) + * nil: if there's no inheritance relationship between them + * + * Raises: + * TypeError: if `other` is not a class, module, or included class + */ +static mrb_value +mod_compare_hierarchy(mrb_state *mrb, mrb_value self, mrb_value other) +{ + if ((!mrb_class_p(self) && !mrb_module_p(self) && !mrb_iclass_p(self)) || + (!mrb_class_p(other) && !mrb_module_p(other) && !mrb_iclass_p(other))) { + mrb_raise(mrb, E_TYPE_ERROR, "compared with non class/module"); + } + + struct RClass *self_c = mrb_class_ptr(self); + struct RClass *other_c = mrb_class_ptr(other); + + if (is_ancestor(self_c, other_c)) { + return mrb_true_value(); + } + if (is_ancestor(other_c, self_c)) { + return mrb_false_value(); + } + return mrb_nil_value(); +} + +/* + * call-seq: + * mod <= other -> true, false, or nil + * + * Returns true if mod is a subclass of other or is the same as other. + * Returns nil if there's no relationship between the two. + */ +static mrb_value +mrb_mod_le(mrb_state *mrb, mrb_value self) +{ + mrb_value other; + mrb_get_args(mrb, "o", &other); + return mod_compare_hierarchy(mrb, self, other); +} + +/* + * call-seq: + * mod < other -> true, false, or nil + * + * Returns true if mod is a subclass of other. Returns false if mod + * is the same as other. Returns nil if there's no relationship between the two. + */ +static mrb_value +mrb_mod_lt(mrb_state *mrb, mrb_value self) +{ + mrb_value other; + mrb_get_args(mrb, "o", &other); + if (mrb_obj_equal(mrb, self, other)) { + return mrb_false_value(); + } + return mod_compare_hierarchy(mrb, self, other); +} + +/* + * call-seq: + * mod >= other -> true, false, or nil + * + * Returns true if mod is an ancestor of other, or the two modules are the same. + * Returns nil if there's no relationship between the two. + */ +static mrb_value +mrb_mod_ge(mrb_state *mrb, mrb_value self) +{ + mrb_value other; + mrb_get_args(mrb, "o", &other); + return mod_compare_hierarchy(mrb, other, self); +} + +/* + * call-seq: + * mod > other -> true, false, or nil + * + * Returns true if mod is an ancestor of other. Returns false if mod + * is the same as other. Returns nil if there's no relationship between the two. + */ +static mrb_value +mrb_mod_gt(mrb_state *mrb, mrb_value self) +{ + mrb_value other; + mrb_get_args(mrb, "o", &other); + if (mrb_obj_equal(mrb, self, other)) { + return mrb_false_value(); + } + return mod_compare_hierarchy(mrb, other, self); +} + +/* + * call-seq: + * module <=> other_module -> -1, 0, +1, or nil + * + * Comparison - Returns -1, 0, +1 or nil depending on whether module + * includes other_module, they are the same, or if module is included by + * other_module. + * + * Returns nil if module has no relationship with other_module, if + * other_module is not a module, or if the two values are incomparable. + */ +static mrb_value +mrb_mod_cmp(mrb_state *mrb, mrb_value self) +{ + mrb_value other; + mrb_get_args(mrb, "o", &other); + + if (mrb_obj_equal(mrb, self, other)) { + return mrb_fixnum_value(0); + } + if (!mrb_class_p(other) && !mrb_module_p(other) && !mrb_iclass_p(other)) { + return mrb_nil_value(); + } + + mrb_value cmp = mod_compare_hierarchy(mrb, self, other); + + if (mrb_true_p(cmp)) { + return mrb_fixnum_value(-1); + } + else if (mrb_false_p(cmp)) { + return mrb_fixnum_value(1); + } + else { + return mrb_nil_value(); + } +} + +/* ---------------------------*/ +static const mrb_mt_entry mod_ext_rom_entries[] = { + MRB_MT_ENTRY(mrb_mod_lt, MRB_OPSYM(lt), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_mod_le, MRB_OPSYM(le), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_mod_cmp, MRB_OPSYM(cmp), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_mod_gt, MRB_OPSYM(gt), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_mod_ge, MRB_OPSYM(ge), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mod_module_exec, MRB_SYM(class_exec), MRB_ARGS_ANY()|MRB_ARGS_BLOCK()), + MRB_MT_ENTRY(mod_module_exec, MRB_SYM(module_exec), MRB_ARGS_ANY()|MRB_ARGS_BLOCK()), + MRB_MT_ENTRY(mod_name, MRB_SYM(name), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mod_singleton_class_p, MRB_SYM_Q(singleton_class), MRB_ARGS_NONE()), +}; + +static const mrb_mt_entry cls_ext_rom_entries[] = { + MRB_MT_ENTRY(class_attached_object, MRB_SYM(attached_object), MRB_ARGS_NONE()), + MRB_MT_ENTRY(class_subclasses, MRB_SYM(subclasses), MRB_ARGS_NONE()), +}; + +/* + * Initialize the mruby-class-ext gem. + * + * This function registers all the extension methods to the Module and Class classes. + * It's called automatically when the gem is loaded. + * + * Args: + * mrb: The mruby state + */ void mrb_mruby_class_ext_gem_init(mrb_state *mrb) { struct RClass *mod = mrb->module_class; + struct RClass *cls = mrb->class_class; - mrb_define_method(mrb, mod, "name", mrb_mod_name, MRB_ARGS_NONE()); - mrb_define_method(mrb, mod, "singleton_class?", mrb_mod_singleton_class_p, MRB_ARGS_NONE()); - mrb_define_method(mrb, mod, "module_exec", mrb_mod_module_exec, MRB_ARGS_ANY()|MRB_ARGS_BLOCK()); - mrb_define_method(mrb, mod, "class_exec", mrb_mod_module_exec, MRB_ARGS_ANY()|MRB_ARGS_BLOCK()); - - struct RClass *cls = mrb->module_class; - mrb_define_method(mrb, cls, "subclasses", mrb_class_subclasses, MRB_ARGS_NONE()); + MRB_MT_INIT_ROM(mrb, mod, mod_ext_rom_entries); + MRB_MT_INIT_ROM(mrb, cls, cls_ext_rom_entries); } void diff --git a/mrbgems/mruby-class-ext/test/class.rb b/mrbgems/mruby-class-ext/test/class.rb index 2043b39cb5..65b16bb286 100644 --- a/mrbgems/mruby-class-ext/test/class.rb +++ b/mrbgems/mruby-class-ext/test/class.rb @@ -11,3 +11,36 @@ assert_equal([c], b.subclasses) assert_equal([], c.subclasses) end + +assert 'Class#attached_object' do + foo = Class.new + foo_s = class < 'mruby-complex' end diff --git a/mrbgems/mruby-cmath/src/cmath.c b/mrbgems/mruby-cmath/src/cmath.c index de59adf96c..e89ec285a6 100644 --- a/mrbgems/mruby-cmath/src/cmath.c +++ b/mrbgems/mruby-cmath/src/cmath.c @@ -33,7 +33,7 @@ cmath_get_complex(mrb_state *mrb, mrb_value c, mrb_float *r, mrb_float *i) *i = 0; return FALSE; } - else if (mrb_obj_is_kind_of(mrb, c, mrb_class_get(mrb, "Complex"))) { + else if (mrb_type(c) == MRB_TT_COMPLEX) { mrb_complex_get(mrb, c, r, i); return TRUE; } @@ -76,13 +76,13 @@ CXDIVc(mrb_complex a, mrb_complex b) if ((abi = cimag(b)) < 0) abi = - abi; if (abr <= abi) { - ratio = creal(b) / cimag(b) ; + ratio = creal(b) / cimag(b); den = cimag(a) * (1 + ratio*ratio); cr = (creal(a)*ratio + cimag(a)) / den; ci = (cimag(a)*ratio - creal(a)) / den; } else { - ratio = cimag(b) / creal(b) ; + ratio = cimag(b) / creal(b); den = creal(a) * (1 + ratio*ratio); cr = (creal(a) + cimag(a)*ratio) / den; ci = (cimag(a) - creal(a)*ratio) / den; @@ -92,7 +92,9 @@ CXDIVc(mrb_complex a, mrb_complex b) #else -#if defined(__cplusplus) && (defined(__APPLE__) || (defined(__clang__) && (defined(__FreeBSD__) || defined(__OpenBSD__)))) +#if defined(__cplusplus) && \ + (defined(__APPLE__) || defined(__EMSCRIPTEN__) || \ + (defined(__clang__) && (defined(__FreeBSD__) || defined(__OpenBSD__)))) #ifdef MRB_USE_FLOAT32 typedef std::complex mrb_complex; @@ -139,10 +141,32 @@ cmath_ ## name(mrb_state *mrb, mrb_value self)\ return mrb_float_value(mrb, F(name)(real));\ } -/* exp(z): return the exponential of z */ +/* + * call-seq: + * CMath.exp(z) -> numeric + * + * Returns the exponential of `z`. + * If `z` is a complex number, returns a complex result. + * If `z` is real and positive, returns a float. + * + * CMath.exp(1) #=> 2.718281828459045 + * CMath.exp(1+1i) #=> (1.4686939399158851+2.2873552871788423i) + */ DEF_CMATH_METHOD(exp) -/* log(z): return the natural logarithm of z, with branch cut along the negative real axis */ +/* + * call-seq: + * CMath.log(z) -> numeric + * CMath.log(z, base) -> numeric + * + * Returns the natural logarithm of `z`. + * If a second argument `base` is given, returns the logarithm of `z` to the given base. + * Has a branch cut along the negative real axis. + * + * CMath.log(1) #=> 0.0 + * CMath.log(-1) #=> (0.0+3.141592653589793i) + * CMath.log(8, 2) #=> 3.0 + */ static mrb_value cmath_log(mrb_state *mrb, mrb_value self) { mrb_value z; @@ -166,7 +190,16 @@ cmath_log(mrb_state *mrb, mrb_value self) { return mrb_float_value(mrb, F(log)(real)/F(log)(base)); } -/* log10(z): return the base-10 logarithm of z, with branch cut along the negative real axis */ +/* + * call-seq: + * CMath.log10(z) -> numeric + * + * Returns the base-10 logarithm of `z`. + * Has a branch cut along the negative real axis. + * + * CMath.log10(100) #=> 2.0 + * CMath.log10(-1) #=> (0.0+1.3643763538418412i) + */ static mrb_value cmath_log10(mrb_state *mrb, mrb_value self) { mrb_value z = mrb_get_arg1(mrb); @@ -179,7 +212,16 @@ cmath_log10(mrb_state *mrb, mrb_value self) { return mrb_float_value(mrb, F(log10)(real)); } -/* log2(z): return the base-2 logarithm of z, with branch cut along the negative real axis */ +/* + * call-seq: + * CMath.log2(z) -> numeric + * + * Returns the base-2 logarithm of `z`. + * Has a branch cut along the negative real axis. + * + * CMath.log2(8) #=> 3.0 + * CMath.log2(-1) #=> (0.0+4.532360141827194i) + */ static mrb_value cmath_log2(mrb_state *mrb, mrb_value self) { mrb_value z = mrb_get_arg1(mrb); @@ -192,7 +234,16 @@ cmath_log2(mrb_state *mrb, mrb_value self) { return mrb_float_value(mrb, F(log2)(real)); } -/* sqrt(z): return square root of z */ +/* + * call-seq: + * CMath.sqrt(z) -> numeric + * + * Returns the square root of `z`. + * Has a branch cut along the negative real axis. + * + * CMath.sqrt(4) #=> 2.0 + * CMath.sqrt(-1) #=> (0.0+1.0i) + */ static mrb_value cmath_sqrt(mrb_state *mrb, mrb_value self) { mrb_value z = mrb_get_arg1(mrb); @@ -205,29 +256,135 @@ cmath_sqrt(mrb_state *mrb, mrb_value self) { return mrb_float_value(mrb, F(sqrt)(real)); } -/* sin(z): sine function */ +/* + * call-seq: + * CMath.sin(z) -> numeric + * + * Returns the sine of `z`. + * + * CMath.sin(0) #=> 0.0 + * CMath.sin(1i) #=> (0.0+1.1752011936438014i) + */ DEF_CMATH_METHOD(sin) -/* cos(z): cosine function */ + +/* + * call-seq: + * CMath.cos(z) -> numeric + * + * Returns the cosine of `z`. + * + * CMath.cos(0) #=> 1.0 + * CMath.cos(1i) #=> (1.5430806348152437+0.0i) + */ DEF_CMATH_METHOD(cos) -/* tan(z): tangent function */ + +/* + * call-seq: + * CMath.tan(z) -> numeric + * + * Returns the tangent of `z`. + * + * CMath.tan(0) #=> 0.0 + * CMath.tan(1i) #=> (0.0+0.7615941559557649i) + */ DEF_CMATH_METHOD(tan) -/* asin(z): arc sine function */ +/* + * call-seq: + * CMath.asin(z) -> numeric + * + * Returns the arc sine of `z`. + * + * CMath.asin(0) #=> 0.0 + * CMath.asin(2) #=> (1.5707963267948966-1.3169578969248166i) + */ DEF_CMATH_METHOD(asin) -/* acos(z): arc cosine function */ + +/* + * call-seq: + * CMath.acos(z) -> numeric + * + * Returns the arc cosine of `z`. + * + * CMath.acos(1) #=> 0.0 + * CMath.acos(2) #=> (0.0+1.3169578969248166i) + */ DEF_CMATH_METHOD(acos) -/* atan(z): arg tangent function */ + +/* + * call-seq: + * CMath.atan(z) -> numeric + * + * Returns the arc tangent of `z`. + * + * CMath.atan(0) #=> 0.0 + * CMath.atan(1i) #=> (0.0+Infinity*i) + */ DEF_CMATH_METHOD(atan) -/* sinh(z): hyperbolic sine function */ +/* + * call-seq: + * CMath.sinh(z) -> numeric + * + * Returns the hyperbolic sine of `z`. + * + * CMath.sinh(0) #=> 0.0 + * CMath.sinh(1i) #=> (0.0+0.8414709848078965i) + */ DEF_CMATH_METHOD(sinh) -/* cosh(z): hyperbolic cosine function */ + +/* + * call-seq: + * CMath.cosh(z) -> numeric + * + * Returns the hyperbolic cosine of `z`. + * + * CMath.cosh(0) #=> 1.0 + * CMath.cosh(1i) #=> (0.5403023058681398+0.0i) + */ DEF_CMATH_METHOD(cosh) -/* tanh(z): hyperbolic tangent function */ + +/* + * call-seq: + * CMath.tanh(z) -> numeric + * + * Returns the hyperbolic tangent of `z`. + * + * CMath.tanh(0) #=> 0.0 + * CMath.tanh(1i) #=> (0.0+1.557407724654902i) + */ DEF_CMATH_METHOD(tanh) -/* asinh(z): inverse hyperbolic sine function */ +/* + * call-seq: + * CMath.asinh(z) -> numeric + * + * Returns the inverse hyperbolic sine of `z`. + * + * CMath.asinh(0) #=> 0.0 + * CMath.asinh(1i) #=> (0.0+1.5707963267948966i) + */ DEF_CMATH_METHOD(asinh) -/* acosh(z): inverse hyperbolic cosine function */ + +/* + * call-seq: + * CMath.acosh(z) -> numeric + * + * Returns the inverse hyperbolic cosine of `z`. + * Has a branch cut at values less than 1. + * + * CMath.acosh(1) #=> 0.0 + * CMath.acosh(0) #=> (0.0+1.5707963267948966i) + */ DEF_CMATH_METHOD(acosh) -/* atanh(z): inverse hyperbolic tangent function */ + +/* + * call-seq: + * CMath.atanh(z) -> numeric + * + * Returns the inverse hyperbolic tangent of `z`. + * Has branch cuts at values less than -1 and greater than 1. + * + * CMath.atanh(0) #=> 0.0 + * CMath.atanh(2) #=> (0.5493061443340549+1.5707963267948966i) + */ DEF_CMATH_METHOD(atanh) /* ------------------------------------------------------------------------*/ @@ -235,32 +392,31 @@ DEF_CMATH_METHOD(atanh) void mrb_mruby_cmath_gem_init(mrb_state* mrb) { - struct RClass *cmath; - cmath = mrb_define_module(mrb, "CMath"); + struct RClass *cmath = mrb_define_module_id(mrb, MRB_SYM(CMath)); mrb_include_module(mrb, cmath, mrb_module_get(mrb, "Math")); - mrb_define_module_function(mrb, cmath, "sin", cmath_sin, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "cos", cmath_cos, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "tan", cmath_tan, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(sin), cmath_sin, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(cos), cmath_cos, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(tan), cmath_tan, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "asin", cmath_asin, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "acos", cmath_acos, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "atan", cmath_atan, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(asin), cmath_asin, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(acos), cmath_acos, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(atan), cmath_atan, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "sinh", cmath_sinh, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "cosh", cmath_cosh, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "tanh", cmath_tanh, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(sinh), cmath_sinh, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(cosh), cmath_cosh, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(tanh), cmath_tanh, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "asinh", cmath_asinh, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "acosh", cmath_acosh, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "atanh", cmath_atanh, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(asinh), cmath_asinh, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(acosh), cmath_acosh, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(atanh), cmath_atanh, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "exp", cmath_exp, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "log", cmath_log, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - mrb_define_module_function(mrb, cmath, "log2", cmath_log2, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "log10", cmath_log10, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cmath, "sqrt", cmath_sqrt, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(exp), cmath_exp, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(log), cmath_log, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(log2), cmath_log2, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(log10), cmath_log10, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, cmath, MRB_SYM(sqrt), cmath_sqrt, MRB_ARGS_REQ(1)); } void diff --git a/mrbgems/mruby-cmath/test/cmath.rb b/mrbgems/mruby-cmath/test/cmath.rb index ddd0e5106c..b48939ca42 100644 --- a/mrbgems/mruby-cmath/test/cmath.rb +++ b/mrbgems/mruby-cmath/test/cmath.rb @@ -27,7 +27,7 @@ def assert_complex(c1, c2) assert('CMath trigonometric_functions') do assert_complex(Math.sinh(2).i, CMath.sin(2i)) - assert_complex(Math.cosh(2)+0i, CMath.cos(2i)) + assert_complex(Math.cosh(2)+0i, CMath.cos(2i)) assert_complex(Math.tanh(2).i, CMath.tan(2i)) assert_complex(Math.sin(2).i, CMath.sinh(2i)) assert_complex(Math.cos(2)+0i, CMath.cosh(2i)) diff --git a/mrbgems/mruby-compar-ext/README.md b/mrbgems/mruby-compar-ext/README.md new file mode 100644 index 0000000000..6cc86582e8 --- /dev/null +++ b/mrbgems/mruby-compar-ext/README.md @@ -0,0 +1,65 @@ +# mruby-compar-ext + +Comparable module extension. + +This mrbgem adds the `clamp` method to the `Comparable` module. + +## `Comparable#clamp` + +The `clamp` method restricts a value to a given range. + +**Syntax:** + +```ruby +obj.clamp(min, max) -> obj +obj.clamp(range) -> obj +``` + +**Description:** + +- In the `(min, max)` form, it returns: + - `min` if `obj <=> min` is less than zero. + - `max` if `obj <=> max` is greater than zero. + - `obj` otherwise. +- In the `(range)` form, it returns: + - `range.begin` if `obj <=> range.begin` is less than zero. + - `range.end` if `obj <=> range.end` is greater than zero (and `range.end` is not `nil` and the range is inclusive). + - `obj` otherwise. +- If `range.begin` is `nil`, it is considered smaller than `obj`. +- If `range.end` is `nil`, it is considered greater than `obj`. + +**Examples:** + +```ruby +# Using min and max arguments +12.clamp(0, 100) #=> 12 +523.clamp(0, 100) #=> 100 +-3.123.clamp(0, 100) #=> 0 + +'d'.clamp('a', 'f') #=> 'd' +'z'.clamp('a', 'f') #=> 'f' + +# Using a Range argument +12.clamp(0..100) #=> 12 +523.clamp(0..100) #=> 100 +-3.123.clamp(0..100) #=> 0 + +'d'.clamp('a'..'f') #=> 'd' +'z'.clamp('a'..'f') #=> 'f' + +# Using ranges with nil begin or end +-20.clamp(0..) #=> 0 +523.clamp(..100) #=> 100 +``` + +**Error Handling:** + +- Raises an `ArgumentError` if an exclusive range (e.g., `0...100`) is provided as the `range` argument when `range.end` is not `nil`. +- Raises an `ArgumentError` if `min` and `max` arguments cannot be compared (e.g., a `String` and an `Integer`). +- Raises an `ArgumentError` if the `min` argument is greater than the `max` argument. + +```ruby +100.clamp(0...100) # ArgumentError: cannot clamp with an exclusive range +10.clamp("a", "z") # ArgumentError: comparison of String with String failed (depending on mruby version, may also be other errors if types are incompatible) +100.clamp(10, 0) # ArgumentError: min argument must be smaller than max argument +``` diff --git a/mrbgems/mruby-compar-ext/mrblib/compar.rb b/mrbgems/mruby-compar-ext/mrblib/compar.rb index 3b1fdb7f83..4a82cd448b 100644 --- a/mrbgems/mruby-compar-ext/mrblib/compar.rb +++ b/mrbgems/mruby-compar-ext/mrblib/compar.rb @@ -4,9 +4,9 @@ module Comparable # obj.clamp(min, max) -> obj # obj.clamp(range) -> obj # - # In (min, max) form, returns _min_ if _obj_ - # <=> _min_ is less than zero, _max_ if _obj_ - # <=> _max_ is greater than zero, and _obj_ + # In `(min, max)` form, returns _min_ if _obj_ + # `<=>` _min_ is less than zero, _max_ if _obj_ + # `<=>` _max_ is greater than zero, and _obj_ # otherwise. # # 12.clamp(0, 100) #=> 12 @@ -16,9 +16,9 @@ module Comparable # 'd'.clamp('a', 'f') #=> 'd' # 'z'.clamp('a', 'f') #=> 'f' # - # In (range) form, returns _range.begin_ if _obj_ - # <=> _range.begin_ is less than zero, _range.end_ - # if _obj_ <=> _range.end_ is greater than zero, and + # In `(range)` form, returns _range.begin_ if _obj_ + # `<=>` _range.begin_ is less than zero, _range.end_ + # if _obj_ `<=>` _range.end_ is greater than zero, and # _obj_ otherwise. # # 12.clamp(0..100) #=> 12 @@ -28,49 +28,48 @@ module Comparable # 'd'.clamp('a'..'f') #=> 'd' # 'z'.clamp('a'..'f') #=> 'f' # - # If _range.begin_ is +nil+, it is considered smaller than _obj_, - # and if _range.end_ is +nil+, it is considered greater than + # If _range.begin_ is `nil`, it is considered smaller than _obj_, + # and if _range.end_ is `nil`, it is considered greater than # _obj_. # # -20.clamp(0..) #=> 0 # 523.clamp(..100) #=> 100 # - # When _range.end_ is excluded and not +nil+, an exception is + # When _range.end_ is excluded and not `nil`, an exception is # raised. # # 100.clamp(0...100) # ArgumentError # def clamp(min, max=nil) + if max.nil? if min.kind_of?(Range) - max = min.begin - if max.nil? - max = self - elsif min.exclude_end? + max = min.end + if !max.nil? && min.exclude_end? raise ArgumentError, "cannot clamp with an exclusive range" end - min = min.end - if min.nil? - min = self - end - else - raise TypeError, "wrong argument type #{min.class}" + min = min.begin end end - if (min <=> max) > 0 - raise ArgumentError, "min argument must be smaller than max argument" + + if !min.nil? && !max.nil? + cmp = min <=> max + if cmp.nil? + raise ArgumentError, "comparison of #{min.class} with #{max.class} failed" + elsif cmp > 0 + raise ArgumentError, "min argument must be smaller than max argument" + end end - c = self <=> min - if c == 0 - return self - elsif c < 0 - return min + + unless min.nil? + cmp = self <=> min + return self if cmp == 0 + return min if cmp < 0 end - c = self <=> max - if c > 0 - return max - else - return self + unless max.nil? + cmp = self <=> max + return max if cmp > 0 end + return self end end diff --git a/mrbgems/mruby-compar-ext/test/compar.rb b/mrbgems/mruby-compar-ext/test/compar.rb new file mode 100644 index 0000000000..40d90f193d --- /dev/null +++ b/mrbgems/mruby-compar-ext/test/compar.rb @@ -0,0 +1,21 @@ +assert("Comparable#clamp") do + assert_equal(12, 12.clamp(0, 100)) + assert_equal(100, 532.clamp(0, 100)) + assert_equal(0, -3.123.clamp(0, 100)) + assert_equal('d', 'd'.clamp('a', 'f')) + assert_equal('f', 'z'.clamp('a', 'f')) + + assert_equal(12, 12.clamp(0..100)) + assert_equal(100, 523.clamp(0..100)) + assert_equal(0, -3.123.clamp(0..100)) + + assert_equal('d', 'd'.clamp('a'..'f')) + assert_equal('f', 'z'.clamp('a'..'f')) + + assert_equal(0, -20.clamp(0..)) + assert_equal(100, 523.clamp(..100)) + + assert_raise(ArgumentError) { + 100.clamp(0...100) + } +end diff --git a/mrbgems/mruby-compiler/README.md b/mrbgems/mruby-compiler/README.md new file mode 100644 index 0000000000..0c99cacb66 --- /dev/null +++ b/mrbgems/mruby-compiler/README.md @@ -0,0 +1,28 @@ +# mruby-compiler + +This mrbgem provides the mruby compiler, which is responsible for parsing Ruby +code and generating mruby bytecode. + +## Functionality + +The `mruby-compiler` gem includes the following components: + +- **Parser:** Translates Ruby source code into an abstract syntax tree (AST). +- **Code Generator:** Traverses the AST to produce executable mruby bytecode. +- **`mrbc` executable:** A command-line tool for compiling `.rb` files into + `.mrb` bytecode files. + +## Usage + +The `mrbc` (mruby-bin-mrbc) executable will generate compiled binary from Ruby +programs via this gem. + +Example of using `mrbc`: + +```sh +# Compile a Ruby script to bytecode +bin/mrbc my_script.rb + +# Run the compiled script +bin/mruby my_script.mrb +``` diff --git a/mrbgems/mruby-compiler/core/codegen.c b/mrbgems/mruby-compiler/core/codegen.c index ff85f3a3e9..f041a2c997 100644 --- a/mrbgems/mruby-compiler/core/codegen.c +++ b/mrbgems/mruby-compiler/core/codegen.c @@ -4,6 +4,41 @@ ** See Copyright Notice in mruby.h */ +/* + * ## Code Generator + * + * This file implements the mruby code generator, a crucial component of the mruby + * compilation pipeline. Its primary responsibility is to translate the Abstract + * Syntax Tree (AST), produced by the parser, into mruby bytecode (Instruction + * Sequence - iseq). + * + * ### Key Operational Aspects: + * + * - **AST Traversal:** The generator walks through the AST nodes, processing each + * node type and emitting corresponding bytecode instructions. + * - **Scope Management:** It manages lexical scopes, keeping track of local + * variables, upvalues (variables from enclosing scopes), and register + * allocation within each scope. This is vital for correct variable access + * and lifetime. + * - **Opcode Generation:** For different AST node types (e.g., literals, + * arithmetic operations, control flow statements, method calls, variable + * assignments), specific opcodes are generated. This involves selecting the + * appropriate instruction and its operands. + * - **Loop Handling:** It provides mechanisms to correctly generate bytecode for + * various loop constructs (e.g., `while`, `for`, `until`), including managing + * `break`, `next`, and `redo` statements by patching jump addresses. + * - **Instruction Sequence (iseq):** The output of this process is an `mrb_irep` + * structure, which contains the generated instruction sequence (iseq), literal + * pools, symbol tables, and other metadata required for execution by the + * mruby virtual machine. + * - **Error Handling:** Includes mechanisms for reporting errors encountered + * during code generation, such as syntax errors not caught by the parser or + * semantic errors. + * + * This code generator is essential for transforming human-readable mruby code + * into a format that the mruby VM can execute efficiently. + */ + #include #include #include @@ -11,7 +46,6 @@ #include #include #include -#include #include "node.h" #include #include @@ -20,71 +54,87 @@ #include #include +/* Wrappers for mruby's memory management functions. */ +#define mrbc_malloc(s) mrb_basic_alloc_func(NULL,(s)) /* Allocates memory. */ +#define mrbc_realloc(p,s) mrb_basic_alloc_func((p),(s)) /* Reallocates memory. */ +#define mrbc_free(p) mrb_basic_alloc_func((p),0) /* Frees memory. */ + #ifndef MRB_CODEGEN_LEVEL_MAX +/* Maximum recursion depth for the codegen function to prevent stack overflows. */ #define MRB_CODEGEN_LEVEL_MAX 256 #endif +/* Maximum number of arguments for some opcodes like OP_SUPER or OP_ARGARY. */ #define MAXARG_S (1<<16) +/* Macro to detect (0 . 0) separators in literal arrays */ +#define IS_LITERAL_DELIM(node) \ + ((node) && (node)->car && \ + (node)->car->car == NULL && \ + (node)->car->cdr == NULL) + typedef mrb_ast_node node; typedef struct mrb_parser_state parser_state; +/* Represents the different kinds of loops or blocks encountered during code generation. */ enum looptype { - LOOP_NORMAL, - LOOP_BLOCK, - LOOP_FOR, - LOOP_BEGIN, - LOOP_RESCUE, + LOOP_NORMAL, /* A standard loop construct like `while` or `until`. */ + LOOP_BLOCK, /* A block or lambda. */ + LOOP_FOR, /* A `for` loop. */ + LOOP_BEGIN, /* A `begin...end` block (often with `rescue` or `ensure`). */ + LOOP_RESCUE, /* The `rescue` part of a `begin...rescue...end` block. */ }; +/* Information about a loop currently being compiled, used for `break`, `next`, `redo`, etc. */ struct loopinfo { - enum looptype type; - uint32_t pc0; /* `next` destination */ - uint32_t pc1; /* `redo` destination */ - uint32_t pc2; /* `break` destination */ - int reg; /* destination register */ - struct loopinfo *prev; + enum looptype type; /* Type of the loop, using `enum looptype`. */ + uint32_t pc0; /* Jump destination for `next`, or start of loop for `retry` in `rescue`. */ + uint32_t pc1; /* Jump destination for `redo`. */ + uint32_t pc2; /* Jump destination for `break`. */ + int reg; /* Register to store the loop's return value (e.g., from `break val`), or -1 if no value. */ + struct loopinfo *prev; /* Pointer to the previous `loopinfo` in a linked list (for nested loops). */ }; +/* Represents the state of the code generator for a particular lexical scope. */ typedef struct scope { - mrb_state *mrb; - mrb_pool *mpool; + mrb_state *mrb; /* Pointer to the mruby state. */ + mempool *mpool; /* Pointer to the memory pool for this scope's allocations. */ - struct scope *prev; + struct scope *prev; /* Pointer to the previous (enclosing) scope. */ - node *lv; + node *lv; /* AST node representing the list of local variables in this scope. */ - uint16_t sp; - uint32_t pc; - uint32_t lastpc; - uint32_t lastlabel; - uint16_t ainfo:15; - mrb_bool mscope:1; + uint16_t sp; /* Current stack pointer (register index) within this scope. */ + uint32_t pc; /* Current program counter (instruction index) for the ISEQ being generated. */ + uint32_t lastpc; /* Program counter of the previously emitted instruction (used for peephole optimization). */ + uint32_t lastlabel; /* Program counter of the last label emitted (inhibits some peephole optimizations). */ + uint16_t ainfo:15; /* Argument information bitfield (counts for req, opt, rest, post, key, kdict, block). */ + mrb_bool mscope:1; /* Boolean flag: true if this is a method/module/class scope (not a block). */ - struct loopinfo *loop; - mrb_sym filename_sym; - uint16_t lineno; + struct loopinfo *loop; /* Pointer to the current innermost `loopinfo` structure for this scope. */ + mrb_sym filename_sym; /* `mrb_sym` representing the current filename. */ + uint16_t lineno; /* Current line number being processed. */ - mrb_code *iseq; - uint16_t *lines; - uint32_t icapa; + mrb_code *iseq; /* Pointer to the dynamically growing array of `mrb_code` (instructions). */ + uint16_t *lines; /* Array to store line numbers corresponding to each instruction (for debugging). */ + uint32_t icapa; /* Current capacity of the `iseq` and `lines` arrays. */ - mrb_irep *irep; - mrb_pool_value *pool; - mrb_sym *syms; - mrb_irep **reps; - struct mrb_irep_catch_handler *catch_table; - uint32_t pcapa, scapa, rcapa; + mrb_irep *irep; /* Pointer to the `mrb_irep` (instruction sequence representation) being built. */ + mrb_irep_pool *pool; /* Pointer to the literal pool for the `irep`. */ + mrb_sym *syms; /* Pointer to the symbol list for the `irep`. */ + mrb_irep **reps; /* Pointer to the array of child `irep`s (for nested blocks/methods). */ + struct mrb_irep_catch_handler *catch_table; /* Pointer to the table of catch handlers for this scope. */ + uint32_t pcapa, scapa, rcapa; /* Current capacities of the `pool`, `syms`, and `reps` arrays respectively. */ - uint16_t nlocals; - uint16_t nregs; - int ai; + uint16_t nlocals; /* Number of local variables in this scope. */ + uint16_t nregs; /* Number of registers used in this scope (maximum value of `sp`). */ + int ai; /* Arena index for mruby's garbage collector. */ - int debug_start_pos; - uint16_t filename_index; - parser_state* parser; + int debug_start_pos; /* Starting ISEQ position for the current debug file information. */ + uint16_t filename_index; /* Index of the current filename in the parser's filename table. */ + parser_state* parser; /* Pointer to the `mrb_parser_state`. */ - int rlev; /* recursion levels */ + int rlev; /* Recursion level counter for `codegen` calls, to prevent stack overflow. */ } codegen_scope; static codegen_scope* scope_new(mrb_state *mrb, codegen_scope *prev, node *lv); @@ -102,12 +152,30 @@ static void loop_pop(codegen_scope *s, int val); static int catch_handler_new(codegen_scope *s); static void catch_handler_set(codegen_scope *s, int ent, enum mrb_catch_type type, uint32_t begin, uint32_t end, uint32_t target); -static void gen_assignment(codegen_scope *s, node *tree, node *rhs, int sp, int val); static void gen_massignment(codegen_scope *s, node *tree, int sp, int val); +static void codegen_masgn(codegen_scope *s, node *varnode, node *rhs, int sp, int val); +static void gen_assignment(codegen_scope *s, node *tree, node *rhs, int sp, int val); +static void codegen_call_assign(codegen_scope *s, node *varnode, node *rhs, int sp, int val); static void codegen(codegen_scope *s, node *tree, int val); static void raise_error(codegen_scope *s, const char *msg); +/* Forward declarations for helper functions */ +static struct mrb_ast_var_header* get_var_header(node *n); + +/* NULL-safe node type accessor macro */ +#define node_type(n) ((n) ? NODE_TYPE(n) : (enum node_type)0) + +/* + * Reports a compilation error encountered during code generation. + * + * This function formats an error message, typically including the filename + * and line number where the error occurred. It then triggers a longjmp + * to unwind the compilation process, effectively halting further code generation. + * + * @param s The current code generation scope. + * @param message The error message string. + */ static void codegen_error(codegen_scope *s, const char *message) { @@ -124,50 +192,64 @@ codegen_error(codegen_scope *s, const char *message) while (s->prev) { codegen_scope *tmp = s->prev; if (s->irep) { - mrb_free(s->mrb, s->iseq); + mrbc_free(s->iseq); for (int i=0; iirep->plen; i++) { - mrb_pool_value *pv = &s->pool[i]; - if ((pv->tt & 0x3) == IREP_TT_STR || pv->tt == IREP_TT_BIGINT) { - mrb_free(s->mrb, (void*)pv->u.str); + mrb_irep_pool *p = &s->pool[i]; + if ((p->tt & 0x3) == IREP_TT_STR || p->tt == IREP_TT_BIGINT) { + mrbc_free((void*)p->u.str); } } - mrb_free(s->mrb, s->pool); - mrb_free(s->mrb, s->syms); - mrb_free(s->mrb, s->catch_table); + mrbc_free(s->pool); + mrbc_free(s->syms); + mrbc_free(s->catch_table); if (s->reps) { /* copied from mrb_irep_free() in state.c */ for (int i=0; iirep->rlen; i++) { if (s->reps[i]) mrb_irep_decref(s->mrb, (mrb_irep*)s->reps[i]); } - mrb_free(s->mrb, s->reps); + mrbc_free(s->reps); } - mrb_free(s->mrb, s->lines); + mrbc_free(s->lines); } - mrb_pool_close(s->mpool); + mempool_close(s->mpool); s = tmp; } MRB_THROW(s->mrb->jmp); } +/* + * Allocates memory from the memory pool associated with the current codegen_scope. + * + * This function is used for allocations that are expected to have the same + * lifetime as the current scope. The memory allocated via this function will be + * freed automatically when the scope is finished and its memory pool is closed. + * It calls `codegen_error` if allocation fails. + * + * @param s The current code generation scope. + * @param len The number of bytes to allocate. + * @return A pointer to the allocated memory. + */ static void* codegen_palloc(codegen_scope *s, size_t len) { - void *p = mrb_pool_alloc(s->mpool, len); + void *p = mempool_alloc(s->mpool, len); if (!p) codegen_error(s, "pool memory allocation"); return p; } -static void* -codegen_realloc(codegen_scope *s, void *p, size_t len) -{ - p = mrb_realloc_simple(s->mrb, p, len); - - if (!p && len > 0) codegen_error(s, "mrb_realloc"); - return p; -} - +/* + * Checks if instruction operands `a` or `b` exceed 8 bits (0xff). + * + * If the parser option `no_ext_ops` is set (disallowing OP_EXT1/2/3), + * and either operand is larger than 0xff, this function calls `codegen_error` + * to report that an extended opcode would be required. + * + * @param s The current code generation scope. + * @param a The first operand. + * @param b The second operand. + */ static void check_no_ext_ops(codegen_scope *s, uint16_t a, uint16_t b) { @@ -176,12 +258,35 @@ check_no_ext_ops(codegen_scope *s, uint16_t a, uint16_t b) } } +/* + * Creates a new label by returning the current program counter (pc) + * and updating `s->lastlabel` to this value. + * + * Marking a PC as a label (`s->lastlabel = s->pc`) can inhibit certain + * peephole optimizations that might otherwise modify instructions at this label. + * + * @param s The current code generation scope. + * @return The current program counter, which is now marked as a label. + */ static int new_label(codegen_scope *s) { return s->lastlabel = s->pc; } +/* + * Emits a single byte (`i`) into the instruction sequence (`s->iseq`) + * at the specified program counter (`pc`). + * + * This function handles dynamic resizing of the `iseq` buffer and the + * associated `lines` array (if line number tracking is enabled). + * It also records the current line number (`s->lineno`) for the emitted + * instruction in `s->lines[pc]`. + * + * @param s The current code generation scope. + * @param pc The program counter where the byte should be emitted. + * @param i The byte to emit. + */ static void emit_B(codegen_scope *s, uint32_t pc, uint8_t i) { @@ -195,9 +300,9 @@ emit_B(codegen_scope *s, uint32_t pc, uint8_t i) else { s->icapa *= 2; } - s->iseq = (mrb_code*)codegen_realloc(s, s->iseq, sizeof(mrb_code)*s->icapa); + s->iseq = (mrb_code*)mrbc_realloc(s->iseq, sizeof(mrb_code)*s->icapa); if (s->lines) { - s->lines = (uint16_t*)codegen_realloc(s, s->lines, sizeof(uint16_t)*s->icapa); + s->lines = (uint16_t*)mrbc_realloc(s->lines, sizeof(uint16_t)*s->icapa); } } if (s->lines) { @@ -209,6 +314,15 @@ emit_B(codegen_scope *s, uint32_t pc, uint8_t i) s->iseq[pc] = i; } +/* + * Emits a 2-byte short integer (`i`) into the instruction sequence at `pc`. + * The short is emitted in big-endian format (most significant byte first). + * This is achieved by calling `emit_B` twice. + * + * @param s The current code generation scope. + * @param pc The program counter where the short should be emitted. + * @param i The 2-byte short to emit. + */ static void emit_S(codegen_scope *s, int pc, uint16_t i) { @@ -219,6 +333,13 @@ emit_S(codegen_scope *s, int pc, uint16_t i) emit_B(s, pc+1, lo); } +/* + * Generates (emits) a single byte (`i`) at the current program counter (`s->pc`) + * and then increments `s->pc` by 1. + * + * @param s The current code generation scope. + * @param i The byte to emit. + */ static void gen_B(codegen_scope *s, uint8_t i) { @@ -226,6 +347,13 @@ gen_B(codegen_scope *s, uint8_t i) s->pc++; } +/* + * Generates (emits) a 2-byte short integer (`i`) at the current program + * counter (`s->pc`) and then increments `s->pc` by 2. + * + * @param s The current code generation scope. + * @param i The 2-byte short to emit. + */ static void gen_S(codegen_scope *s, uint16_t i) { @@ -233,6 +361,13 @@ gen_S(codegen_scope *s, uint16_t i) s->pc += 2; } +/* + * Generates an opcode `i` that takes no operands. + * Updates `s->lastpc` to the current `s->pc` before emitting. + * + * @param s The current code generation scope. + * @param i The opcode to generate. + */ static void genop_0(codegen_scope *s, mrb_code i) { @@ -240,6 +375,16 @@ genop_0(codegen_scope *s, mrb_code i) gen_B(s, i); } +/* + * Generates an opcode `i` with a single 16-bit operand `a`. + * If `a` is larger than 0xFF (255), it prepends `OP_EXT1` and emits `a` as a short. + * Otherwise, it emits `a` as a single byte. + * Updates `s->lastpc`. + * + * @param s The current code generation scope. + * @param i The opcode to generate. + * @param a The 16-bit operand. + */ static void genop_1(codegen_scope *s, mrb_code i, uint16_t a) { @@ -256,6 +401,17 @@ genop_1(codegen_scope *s, mrb_code i, uint16_t a) } } +/* + * Generates an opcode `i` with two 16-bit operands `a` and `b`. + * It handles operand extensions (`OP_EXT1`, `OP_EXT2`, `OP_EXT3`) + * based on whether `a` and/or `b` are larger than 0xFF. + * Updates `s->lastpc`. + * + * @param s The current code generation scope. + * @param i The opcode to generate. + * @param a The first 16-bit operand. + * @param b The second 16-bit operand. + */ static void genop_2(codegen_scope *s, mrb_code i, uint16_t a, uint16_t b) { @@ -286,6 +442,18 @@ genop_2(codegen_scope *s, mrb_code i, uint16_t a, uint16_t b) } } +/* + * Generates an opcode `i` with three operands `a`, `b`, and `c`. + * It uses `genop_2` to emit `i`, `a`, and `b` (handling extensions for `a` and `b`), + * and then emits `c` as a single byte using `gen_B`. `c` is assumed to fit in a byte. + * Updates `s->lastpc` (via `genop_2`). + * + * @param s The current code generation scope. + * @param i The opcode to generate. + * @param a The first 16-bit operand. + * @param b The second 16-bit operand. + * @param c The third 16-bit operand (emitted as a byte). + */ static void genop_3(codegen_scope *s, mrb_code i, uint16_t a, uint16_t b, uint16_t c) { @@ -293,6 +461,17 @@ genop_3(codegen_scope *s, mrb_code i, uint16_t a, uint16_t b, uint16_t c) gen_B(s, (uint8_t)c); } +/* + * Generates an opcode `i` with a 16-bit operand `a` and a 16-bit operand `b`. + * Operand `a` is emitted using `genop_1` (which handles `OP_EXT1` if needed). + * Operand `b` is emitted as a 2-byte short using `gen_S`. + * Updates `s->lastpc` (via `genop_1`). + * + * @param s The current code generation scope. + * @param i The opcode to generate. + * @param a The first 16-bit operand. + * @param b The second 16-bit operand (emitted as a short). + */ static void genop_2S(codegen_scope *s, mrb_code i, uint16_t a, uint16_t b) { @@ -300,6 +479,17 @@ genop_2S(codegen_scope *s, mrb_code i, uint16_t a, uint16_t b) gen_S(s, b); } +/* + * Generates an opcode `i` with a 16-bit operand `a` and a 32-bit operand `b`. + * Operand `a` is emitted using `genop_1` (handling `OP_EXT1`). + * Operand `b` is emitted as two 2-byte shorts (high word then low word). + * Updates `s->lastpc` (via `genop_1`). + * + * @param s The current code generation scope. + * @param i The opcode to generate. + * @param a The first 16-bit operand. + * @param b The 32-bit operand (emitted as two shorts). + */ static void genop_2SS(codegen_scope *s, mrb_code i, uint16_t a, uint32_t b) { @@ -308,6 +498,15 @@ genop_2SS(codegen_scope *s, mrb_code i, uint16_t a, uint32_t b) gen_S(s, b&0xffff); } +/* + * Generates an opcode `i` followed by a 3-byte "wide" operand `a`. + * The 3-byte operand is emitted as three separate bytes (a1, a2, a3). + * Updates `s->lastpc`. + * + * @param s The current code generation scope. + * @param i The opcode to generate. + * @param a The 32-bit operand, of which the lower 24 bits are used. + */ static void genop_W(codegen_scope *s, mrb_code i, uint32_t a) { @@ -322,6 +521,7 @@ genop_W(codegen_scope *s, mrb_code i, uint32_t a) gen_B(s, a3); } +/* Indicates whether a codegen function should produce a value on the stack (VAL) or not (NOVAL). */ #define NOVAL 0 #define VAL 1 @@ -333,6 +533,19 @@ no_optimize(codegen_scope *s) return FALSE; } +/* + * Decodes a mruby bytecode instruction starting at the given program counter `pc`. + * + * It reads the opcode and its operands from the bytecode stream and populates + * a `mrb_insn_data` structure. This function handles standard opcodes as well + * as extended opcodes (OP_EXT1, OP_EXT2, OP_EXT3) to correctly parse operands + * of varying sizes. This is primarily used by the peephole optimizer and + * instruction analysis utilities. + * + * @param pc Pointer to the start of the instruction in the bytecode. + * @return A `mrb_insn_data` struct containing the decoded instruction, + * its operands (a, b, c), and the original address. + */ struct mrb_insn_data mrb_decode_insn(const mrb_code *pc) { @@ -347,7 +560,7 @@ mrb_decode_insn(const mrb_code *pc) switch (insn) { #define FETCH_Z() /* empty */ #define OPCODE(i,x) case OP_ ## i: FETCH_ ## x (); break; -#include "mruby/ops.h" +#include #undef OPCODE } switch (insn) { @@ -355,7 +568,7 @@ mrb_decode_insn(const mrb_code *pc) insn = READ_B(); switch (insn) { #define OPCODE(i,x) case OP_ ## i: FETCH_ ## x ## _1 (); break; -#include "mruby/ops.h" +#include #undef OPCODE } break; @@ -363,7 +576,7 @@ mrb_decode_insn(const mrb_code *pc) insn = READ_B(); switch (insn) { #define OPCODE(i,x) case OP_ ## i: FETCH_ ## x ## _2 (); break; -#include "mruby/ops.h" +#include #undef OPCODE } break; @@ -371,7 +584,7 @@ mrb_decode_insn(const mrb_code *pc) insn = READ_B(); switch (insn) { #define OPCODE(i,x) case OP_ ## i: FETCH_ ## x ## _3 (); break; -#include "mruby/ops.h" +#include #undef OPCODE } break; @@ -397,7 +610,7 @@ static uint8_t mrb_insn_size[] = { #define BBB 4 #define BS 4 #define BSS 6 -#include "mruby/ops.h" +#include #undef B #undef BB #undef BBB @@ -411,7 +624,7 @@ static uint8_t mrb_insn_size1[] = { #define BBB 5 #define BS 5 #define BSS 7 -#include "mruby/ops.h" +#include #undef B #undef BS #undef BSS @@ -421,7 +634,7 @@ static uint8_t mrb_insn_size2[] = { #define B 2 #define BS 4 #define BSS 6 -#include "mruby/ops.h" +#include #undef B #undef BB #undef BBB @@ -435,7 +648,7 @@ static uint8_t mrb_insn_size2[] = { #define BS 5 #define BSS 7 static uint8_t mrb_insn_size3[] = { -#include "mruby/ops.h" +#include }; #undef B #undef BB @@ -444,16 +657,29 @@ static uint8_t mrb_insn_size3[] = { #undef BSS #undef OPCODE +/* + * Finds the program counter (PC) of the instruction immediately preceding + * the instruction at the given `pc`. + * + * It iterates backward through the already generated instruction sequence (`s->iseq`) + * from its beginning up to `pc`, decoding each instruction to determine its size + * and thus find the start of the previous instruction. + * + * @param s The current code generation scope. + * @param pc Pointer to an instruction in `s->iseq`. + * @return Pointer to the start of the instruction preceding the one at `pc`, + * or NULL if `pc` is at the beginning of `s->iseq`. + */ static const mrb_code* mrb_prev_pc(codegen_scope *s, const mrb_code *pc) { const mrb_code *prev_pc = NULL; const mrb_code *i = s->iseq; + mrb_assert(pc < s->iseq + s->icapa); while (iiseq[(s)->pc]) +/* Converts an instruction memory address to a program counter (pc) offset. */ #define addr_pc(s, addr) (uint32_t)((addr) - s->iseq) +/* Resets the program counter (pc) to the address of the previously generated instruction. Used in peephole optimizations. */ #define rewind_pc(s) s->pc = s->lastpc +/* + * Decodes and returns the last instruction that was emitted into the + * instruction sequence (`s->iseq`). + * It uses `mrb_decode_insn` on the instruction at `s->iseq[s->lastpc]`. + * If no instructions have been emitted (`s->pc == 0`), it returns a NOP. + * + * @param s The current code generation scope. + * @return A `mrb_insn_data` struct for the last emitted instruction. + */ static struct mrb_insn_data mrb_last_insn(codegen_scope *s) { @@ -485,14 +723,40 @@ mrb_last_insn(codegen_scope *s) return mrb_decode_insn(&s->iseq[s->lastpc]); } +/* + * Determines if peephole optimizations should be disabled for the current instruction. + * Peephole optimization is disabled if: + * - General optimization is off (`no_optimize(s)` is true). + * - The current program counter (`s->pc`) is the target of a label (`s->lastlabel == s->pc`). + * - It's the beginning of the bytecode (`s->pc == 0`). + * - The current PC is the same as the PC of the last emitted instruction (`s->pc == s->lastpc`), + * which can happen after a `rewind_pc`. + * + * @param s The current code generation scope. + * @return TRUE if peephole optimizations should be skipped, FALSE otherwise. + */ static mrb_bool no_peephole(codegen_scope *s) { return no_optimize(s) || s->lastlabel == s->pc || s->pc == 0 || s->pc == s->lastpc; } +/* Sentinel value for jump offsets that are not yet determined and need to be linked later. */ #define JMPLINK_START UINT32_MAX +/* + * Generates the 2-byte signed offset for a jump instruction. + * + * The `pc` argument is the absolute target program counter for the jump. + * The function calculates the relative offset from the instruction *after* + * the current jump instruction (i.e., `s->pc + 2` for the jump opcode and its offset) + * to the target `pc`. This offset is then emitted as a 16-bit signed integer. + * If the offset is too large to fit in 16 bits, it calls `codegen_error`. + * If `pc` is `JMPLINK_START`, it emits an offset of 0 (placeholder for later patching). + * + * @param s The current code generation scope. + * @param pc The absolute target program counter for the jump. + */ static void gen_jmpdst(codegen_scope *s, uint32_t pc) { @@ -509,6 +773,18 @@ gen_jmpdst(codegen_scope *s, uint32_t pc) gen_S(s, (uint16_t)off); } +/* + * Generates an unconditional jump instruction `i` (e.g., OP_JMP) + * that jumps to the absolute target program counter `pc`. + * + * It first emits the jump opcode `i` using `genop_0`, then emits + * the calculated jump offset using `gen_jmpdst`. + * + * @param s The current code generation scope. + * @param i The jump opcode to generate (e.g., OP_JMP). + * @param pc The absolute target program counter. + * @return The program counter where the jump offset was written. This is used for jump linking. + */ static uint32_t genjmp(codegen_scope *s, mrb_code i, uint32_t pc) { @@ -522,6 +798,32 @@ genjmp(codegen_scope *s, mrb_code i, uint32_t pc) #define genjmp_0(s,i) genjmp(s,i,JMPLINK_START) +/* + * Generates a conditional jump instruction `i` (e.g., OP_JMPNOT, OP_JMPIF) + * based on the value in register `a`, targeting the absolute program counter `pc`. + * + * This function includes several peephole optimizations: + * - If the last instruction was a MOVE to register `a` from another temporary register, + * it rewinds and uses the source of the MOVE as the condition register. + * - If the last instruction loaded a constant (nil, false, true, integer) into register `a`, + * it may optimize the jump: + * - If the condition is known at compile time (e.g., JMPNOT after LOADF), it can + * transform the conditional jump into an unconditional OP_JMP. + * - If the condition is known and makes the jump always/never taken, it can + * remove the jump entirely (returning JMPLINK_START to signify this). + * The `val` parameter influences these optimizations: if `val` is false (NOVAL), + * it implies the preceding instruction producing `a` might be removable if the jump + * itself is optimized away. + * + * @param s The current code generation scope. + * @param i The conditional jump opcode. + * @param a The register index holding the condition value. + * @param pc The absolute target program counter for the jump. + * @param val Indicates if the value in register `a` from a previous instruction is needed + * beyond this conditional jump. + * @return The program counter where the jump offset was written, or `JMPLINK_START` if the + * jump was optimized away. + */ static uint32_t genjmp2(codegen_scope *s, mrb_code i, uint16_t a, uint32_t pc, int val) { @@ -538,7 +840,7 @@ genjmp2(codegen_scope *s, mrb_code i, uint16_t a, uint32_t pc, int val) } break; case OP_LOADNIL: - case OP_LOADF: + case OP_LOADFALSE: if (data.a == a || data.a > s->nlocals) { s->pc = addr_pc(s, data.addr); if (i == OP_JMPNOT || (i == OP_JMPNIL && data.insn == OP_LOADNIL)) { @@ -549,7 +851,7 @@ genjmp2(codegen_scope *s, mrb_code i, uint16_t a, uint32_t pc, int val) } } break; - case OP_LOADT: case OP_LOADI: case OP_LOADINEG: case OP_LOADI__1: + case OP_LOADTRUE: case OP_LOADI8: case OP_LOADINEG: case OP_LOADI__1: case OP_LOADI_0: case OP_LOADI_1: case OP_LOADI_2: case OP_LOADI_3: case OP_LOADI_4: case OP_LOADI_5: case OP_LOADI_6: case OP_LOADI_7: if (data.a == a || data.a > s->nlocals) { @@ -585,12 +887,34 @@ genjmp2(codegen_scope *s, mrb_code i, uint16_t a, uint32_t pc, int val) static mrb_bool get_int_operand(codegen_scope *s, struct mrb_insn_data *data, mrb_int *ns); static void gen_int(codegen_scope *s, uint16_t dst, mrb_int i); +/* + * Generates an OP_MOVE instruction to copy the value from register `src` to register `dst`. + * + * This function incorporates several peephole optimizations to avoid redundant moves or + * to combine the move with preceding operations: + * - If `dst` and `src` are the same, the function does nothing. + * - If the previous instruction was also an `OP_MOVE` involving `src` or `dst`, + * it might combine or reorder them to eliminate redundant operations. + * - If the previous instruction loaded a literal (nil, self, true, false, integer, + * symbol, string, etc.) into `src`, and `src` is a temporary register, + * this function can rewind the program counter and generate the load operation + * directly into `dst`, effectively eliminating the `OP_MOVE`. + * - It can also perform constant folding for `OP_ADDI`/`OP_SUBI` if a sequence of + * `LOADI`, `MOVE`, `ADDI`/`SUBI` can be resolved at compile time. + * + * The `nopeep` parameter, if true, disables these peephole optimizations, forcing + * the generation of a direct `OP_MOVE` instruction. + * + * @param s The current code generation scope. + * @param dst The destination register index. + * @param src The source register index. + * @param nopeep If non-zero, disables peephole optimizations for this move. + */ static void gen_move(codegen_scope *s, uint16_t dst, uint16_t src, int nopeep) { - if (nopeep || no_peephole(s)) goto normal; - else if (dst == src) return; - else { + if (dst == src) return; + if (!(nopeep || no_peephole(s))) { struct mrb_insn_data data = mrb_last_insn(s); switch (data.insn) { @@ -599,7 +923,7 @@ gen_move(codegen_scope *s, uint16_t dst, uint16_t src, int nopeep) if (data.a == src) { if (data.b == dst) /* skip swapping MOVE */ return; - if (data.a < s->nlocals) goto normal; + if (data.a < s->nlocals) break; rewind_pc(s); s->lastpc = addr_pc(s, mrb_prev_pc(s, data.addr)); gen_move(s, dst, data.b, FALSE); @@ -611,34 +935,34 @@ gen_move(codegen_scope *s, uint16_t dst, uint16_t src, int nopeep) gen_move(s, dst, src, FALSE); return; } - goto normal; - case OP_LOADNIL: case OP_LOADSELF: case OP_LOADT: case OP_LOADF: + break; + case OP_LOADNIL: case OP_LOADSELF: case OP_LOADTRUE: case OP_LOADFALSE: case OP_LOADI__1: case OP_LOADI_0: case OP_LOADI_1: case OP_LOADI_2: case OP_LOADI_3: case OP_LOADI_4: case OP_LOADI_5: case OP_LOADI_6: case OP_LOADI_7: - if (data.a != src || data.a < s->nlocals) goto normal; + if (data.a != src || data.a < s->nlocals) break; rewind_pc(s); genop_1(s, data.insn, dst); return; case OP_HASH: - if (data.b != 0) goto normal; + if (data.b != 0) break; /* fall through */ - case OP_LOADI: case OP_LOADINEG: + case OP_LOADI8: case OP_LOADINEG: case OP_LOADL: case OP_LOADSYM: case OP_GETGV: case OP_GETSV: case OP_GETIV: case OP_GETCV: case OP_GETCONST: case OP_STRING: case OP_LAMBDA: case OP_BLOCK: case OP_METHOD: case OP_BLKPUSH: - if (data.a != src || data.a < s->nlocals) goto normal; + if (data.a != src || data.a < s->nlocals) break; rewind_pc(s); genop_2(s, data.insn, dst, data.b); return; case OP_LOADI16: - if (data.a != src || data.a < s->nlocals) goto normal; + if (data.a != src || data.a < s->nlocals) break; rewind_pc(s); genop_2S(s, data.insn, dst, data.b); return; case OP_LOADI32: - if (data.a != src || data.a < s->nlocals) goto normal; + if (data.a != src || data.a < s->nlocals) break; else { uint32_t i = (uint32_t)data.b<<16|data.c; rewind_pc(s); @@ -646,7 +970,7 @@ gen_move(codegen_scope *s, uint16_t dst, uint16_t src, int nopeep) } return; case OP_ARRAY: - if (data.a != src || data.a < s->nlocals || data.a < dst) goto normal; + if (data.a != src || data.a < s->nlocals || data.a < dst) break; rewind_pc(s); if (data.b == 0 || dst == data.a) genop_2(s, OP_ARRAY, dst, 0); @@ -654,49 +978,92 @@ gen_move(codegen_scope *s, uint16_t dst, uint16_t src, int nopeep) genop_3(s, OP_ARRAY2, dst, data.a, data.b); return; case OP_ARRAY2: - if (data.a != src || data.a < s->nlocals || data.a < dst) goto normal; + if (data.a != src || data.a < s->nlocals || data.a < dst) break; rewind_pc(s); genop_3(s, OP_ARRAY2, dst, data.b, data.c); return; case OP_AREF: case OP_GETUPVAR: - if (data.a != src || data.a < s->nlocals) goto normal; + if (data.a != src || data.a < s->nlocals) break; rewind_pc(s); genop_3(s, data.insn, dst, data.b, data.c); return; case OP_ADDI: case OP_SUBI: - if (addr_pc(s, data.addr) == s->lastlabel || data.a != src || data.a < s->nlocals) goto normal; + if (addr_pc(s, data.addr) == s->lastlabel || data.a != src || data.a < s->nlocals) break; else { struct mrb_insn_data data0 = mrb_decode_insn(mrb_prev_pc(s, data.addr)); - if (data0.insn != OP_MOVE || data0.a != data.a || data0.b != dst) goto normal; - s->pc = addr_pc(s, data0.addr); + if (data0.insn != OP_MOVE || data0.a != data.a || data0.b != dst) break; if (addr_pc(s, data0.addr) != s->lastlabel) { /* constant folding */ - data0 = mrb_decode_insn(mrb_prev_pc(s, data0.addr)); + struct mrb_insn_data data1 = mrb_decode_insn(mrb_prev_pc(s, data0.addr)); mrb_int n; - if (data0.a == dst && get_int_operand(s, &data0, &n)) { + if (data1.a == dst && get_int_operand(s, &data1, &n)) { if ((data.insn == OP_ADDI && !mrb_int_add_overflow(n, data.b, &n)) || (data.insn == OP_SUBI && !mrb_int_sub_overflow(n, data.b, &n))) { - s->pc = addr_pc(s, data0.addr); + s->pc = addr_pc(s, data1.addr); gen_int(s, dst, n); return; } } } + /* ADDILV/SUBILV fusion: MOVE temp local; ADDI temp imm; MOVE local temp */ + /* -> ADDILV local temp imm (temp is working space for method fallback) */ + s->pc = addr_pc(s, data0.addr); + genop_3(s, data.insn == OP_ADDI ? OP_ADDILV : OP_SUBILV, dst, data.a, data.b); + return; } - genop_2(s, data.insn, dst, data.b); - return; + break; default: break; } } - normal: + genop_2(s, OP_MOVE, dst, src); return; } +/* + * Searches for a local variable `id` in outer lexical scopes (upvalues). + * + * It first traverses the chain of enclosing `codegen_scope` structures + * (linked by `s->prev`). If not found, it then traverses the chain of + * `upper` RProc structures stored in the parser state. + * + * If the variable `id` is found in an outer scope: + * - It returns `lv`, the number of lexical levels (scopes) to go up + * to find the variable. + * - It sets the `*idx` output parameter to the variable's index within + * that outer scope's local variable table. + * + * If the variable is not found in any outer scope, it calls `codegen_error` + * to report an error (e.g., "No anonymous block parameter", "Can't find local variables"). + * + * @param s The current code generation scope from which the search begins. + * @param id The `mrb_sym` (symbol) of the local variable to search for. + * @param idx Output parameter: pointer to an integer where the index of the + * variable in its defining scope will be stored. + * @return The lexical distance (number of scopes upwards) to the variable's + * defining scope. + */ static int search_upvar(codegen_scope *s, mrb_sym id, int *idx); +/* + * Generates an `OP_GETUPVAR` instruction to retrieve an upvalue. + * + * The upvalue `id` is first located using `search_upvar` to determine its + * lexical level (`lv`) and index (`idx`) within that outer scope. + * Then, an `OP_GETUPVAR` instruction is generated to load this upvalue + * into the destination register `dst`. + * + * Peephole Optimization: + * - If the immediately preceding instruction was an `OP_SETUPVAR` for the + * same upvalue (`id`), lexical level (`lv`), and destination register (`dst`), + * this `OP_GETUPVAR` is skipped as the value is already in the target register. + * + * @param s The current code generation scope. + * @param dst The destination register index where the upvalue will be loaded. + * @param id The `mrb_sym` (symbol) of the upvalue to retrieve. + */ static void gen_getupvar(codegen_scope *s, uint16_t dst, mrb_sym id) { @@ -713,6 +1080,24 @@ gen_getupvar(codegen_scope *s, uint16_t dst, mrb_sym id) genop_3(s, OP_GETUPVAR, dst, idx, lv); } +/* + * Generates an `OP_SETUPVAR` instruction to set an upvalue. + * + * The upvalue `id` is first located using `search_upvar` to determine its + * lexical level (`lv`) and index (`idx`) within that outer scope. + * Then, an `OP_SETUPVAR` instruction is generated to set this upvalue + * using the value from register `dst`. + * + * Peephole Optimization: + * - If the immediately preceding instruction was an `OP_MOVE` where register `dst` + * was the destination (`data.a == dst`), this function will rewind the program + * counter and use the source register of that `OP_MOVE` (`data.b`) as the source + * for `OP_SETUPVAR` instead. This effectively uses the original value before the move. + * + * @param s The current code generation scope. + * @param dst The register index holding the value to set the upvalue to. + * @param id The `mrb_sym` (symbol) of the upvalue to set. + */ static void gen_setupvar(codegen_scope *s, uint16_t dst, mrb_sym id) { @@ -729,6 +1114,24 @@ gen_setupvar(codegen_scope *s, uint16_t dst, mrb_sym id) genop_3(s, OP_SETUPVAR, dst, idx, lv); } +/* + * Generates a return instruction (e.g., `OP_RETURN`, `OP_RETURN_BLK`). + * + * This function emits the specified return opcode `op` with the source register `src` + * containing the value to be returned. + * + * Peephole Optimization: + * - If peephole optimization is enabled and the immediately preceding instruction + * was an `OP_MOVE` into the `src` register (`data.insn == OP_MOVE && src == data.a`), + * this function will rewind the program counter and generate the return instruction + * using the original source register of that `OP_MOVE` (`data.b`). This avoids + * a redundant move before returning. + * - It also avoids emitting multiple consecutive `OP_RETURN` instructions. + * + * @param s The current code generation scope. + * @param op The specific return opcode to generate (e.g., `OP_RETURN`, `OP_RETURN_BLK`). + * @param src The register index holding the value to be returned. + */ static void gen_return(codegen_scope *s, uint8_t op, uint16_t src) { @@ -742,12 +1145,50 @@ gen_return(codegen_scope *s, uint8_t op, uint16_t src) rewind_pc(s); genop_1(s, op, data.b); } - else if (data.insn != OP_RETURN) { + else if (data.insn == OP_LOADSELF && src == data.a && op == OP_RETURN) { + /* LOADSELF + RETURN -> RETSELF */ + rewind_pc(s); + genop_0(s, OP_RETSELF); + } + else if (data.insn == OP_LOADNIL && src == data.a && op == OP_RETURN) { + /* LOADNIL + RETURN -> RETNIL */ + rewind_pc(s); + genop_0(s, OP_RETNIL); + } + else if (data.insn == OP_LOADTRUE && src == data.a && op == OP_RETURN) { + /* LOADTRUE + RETURN -> RETTRUE */ + rewind_pc(s); + genop_0(s, OP_RETTRUE); + } + else if (data.insn == OP_LOADFALSE && src == data.a && op == OP_RETURN) { + /* LOADFALSE + RETURN -> RETFALSE */ + rewind_pc(s); + genop_0(s, OP_RETFALSE); + } + else if (data.insn != OP_RETURN && data.insn != OP_RETSELF && data.insn != OP_RETNIL && + data.insn != OP_RETTRUE && data.insn != OP_RETFALSE) { genop_1(s, op, src); } } } +/* + * Attempts to extract a compile-time integer value from a given instruction. + * + * This function checks if the instruction described by `data` is one of + * the integer loading opcodes (e.g., `OP_LOADI__1`, `OP_LOADINEG`, `OP_LOADI_0` + * through `OP_LOADI_7`, `OP_LOADI8`, `OP_LOADI16`, `OP_LOADI32`) or `OP_LOADL` + * where the literal pool entry is an integer. + * + * If successful, it stores the extracted integer value into the output + * parameter `*n` and returns `TRUE`. Otherwise, it returns `FALSE`. + * + * @param s The current code generation scope (used to access the literal pool for `OP_LOADL`). + * @param data Pointer to an `mrb_insn_data` structure describing the instruction. + * @param n Output parameter: pointer to an `mrb_int` where the extracted integer + * value will be stored if successful. + * @return `TRUE` if an integer value was successfully extracted, `FALSE` otherwise. + */ static mrb_bool get_int_operand(codegen_scope *s, struct mrb_insn_data *data, mrb_int *n) { @@ -765,25 +1206,25 @@ get_int_operand(codegen_scope *s, struct mrb_insn_data *data, mrb_int *n) *n = data->insn - OP_LOADI_0; return TRUE; - case OP_LOADI: + case OP_LOADI8: case OP_LOADI16: *n = (int16_t)data->b; return TRUE; case OP_LOADI32: - *n = (mrb_int)((uint32_t)data->b<<16)+data->c; + *n = (int32_t)((uint32_t)data->b<<16)+data->c; return TRUE; case OP_LOADL: { - mrb_pool_value *pv = &s->pool[data->b]; + mrb_irep_pool *p = &s->pool[data->b]; - if (pv->tt == IREP_TT_INT32) { - *n = (mrb_int)pv->u.i32; + if (p->tt == IREP_TT_INT32) { + *n = (mrb_int)p->u.i32; } #ifdef MRB_INT64 - else if (pv->tt == IREP_TT_INT64) { - *n = (mrb_int)pv->u.i64; + else if (p->tt == IREP_TT_INT64) { + *n = (mrb_int)p->u.i64; } #endif else { @@ -797,6 +1238,199 @@ get_int_operand(codegen_scope *s, struct mrb_insn_data *data, mrb_int *n) } } +static int new_lit_str2(codegen_scope *s, const char *str1, mrb_int len1, const char *str2, mrb_int len2); +static int find_pool_str(codegen_scope *s, const char *str1, mrb_int len1, const char *str2, mrb_int len2); +static void gen_string(codegen_scope *s, node *list, int val); + +/* + * Reallocates or allocates memory for a string literal in the IREP's literal pool. + * + * This function is used when a string literal needs to be resized, typically + * during string concatenation optimizations (`merge_op_string`). + * + * - If the original pool entry `p` pointed to a shared string (e.g., a string + * from read-only data, `IREP_TT_SSTR`), new memory is allocated for the resized string. + * - If `p` was already a dynamically allocated string (`IREP_TT_STR`), its buffer + * is reallocated to the new `len`. + * + * After allocation/reallocation, the pool entry `p` is updated: + * - Its type `tt` is set to `IREP_TT_STR` (or kept as `IREP_TT_STR`). + * - The length in `tt` is updated to the new `len`. + * - The string is null-terminated. + * - `p->u.str` points to the new or reallocated buffer. + * + * @param s The current code generation scope. + * @param p Pointer to the `mrb_irep_pool` entry for the string literal. + * @param len The new length of the string (excluding the null terminator). + */ +static void +realloc_pool_str(codegen_scope *s, mrb_irep_pool *p, mrb_int len) +{ + char *str; + mrb_int olen = p->tt >> 2; /* original length */ + if ((p->tt & 3) == IREP_TT_SSTR) { /* Check if it's a shared/static string */ + const char *old = p->u.str; + str = (char*)mrbc_malloc(len+1); /* Allocate new memory if it was shared */ + memcpy(str, old, olen); /* Copy original content */ + } + else { /* It's already a heap-allocated string */ + str = (char*)p->u.str; + str = (char*)mrbc_realloc(str, len+1); + } + p->tt = (uint32_t)(len<<2 | IREP_TT_STR); + str[len] = '\0'; + p->u.str = (const char*)str; +} + +/* + * Frees the memory associated with a string literal in the IREP's literal pool, + * if it's not a shared (static) string. + * + * This function is typically called when a string literal pool entry is being + * effectively removed or replaced due to optimizations like string merging. + * + * - It checks if the pool entry `p`'s type `tt` indicates it's a dynamically + * allocated string (not `IREP_TT_SSTR`). + * - If so, it frees the memory pointed to by `p->u.str`. + * - It then sets `p->u.str` to `NULL` and decrements the total count of literals + * in the pool (`s->irep->plen`). Note: This decrement might be problematic if + * pool entries are not compacted, as it could lead to an incorrect `plen`. + * + * @param s The current code generation scope. + * @param p Pointer to the `mrb_irep_pool` entry of the string to be freed. + */ +static void +free_pool_str(codegen_scope *s, mrb_irep_pool *p) +{ + if ((p->tt & 3) != IREP_TT_SSTR) { /* Only free if not a shared/static string */ + mrbc_free((char*)p->u.str); + } + p->u.str = NULL; + s->irep->plen--; /* Decrements the count of pool entries. */ +} + +/* + * Performs a peephole optimization for string concatenation. + * + * This function is called when an `OP_ADD` (string concatenation) instruction + * is encountered. It checks if the two operands to `OP_ADD` were themselves + * loaded by `OP_STRING` instructions (i.e., string literals from the pool + * at indices `b1` and `b2`). + * + * If this pattern is found, `merge_op_string` attempts to: + * 1. Determine if the literal pool entries `b1` and `b2` are used by any other + * `OP_STRING` instructions prior to the instruction at `pc` (the start of the + * first `OP_STRING` in the sequence). + * 2. Based on this usage (`used` flags), it decides on a strategy to merge + * the string content of `b1` and `b2`: + * - If neither `b1` nor `b2` is otherwise referenced, or only `b2` is, it reuses + * and resizes pool entry `b1` to hold the concatenated string. If `b2` was + * the last entry in the pool and not shared, `b2` is freed. + * - If only `b1` is referenced, it reuses and resizes pool entry `b2`. + * - If both `b1` and `b2` are referenced by other instructions, it creates a + * new literal pool entry for the concatenated string. + * 3. If an existing pool entry already matches the concatenated string, that entry is used. + * 4. Finally, it rewinds the program counter to `pc` (the location of the original + * first `OP_STRING`) and generates a single `OP_STRING` instruction to load the + * merged/reused literal into the destination register `dst`. + * + * @param s The current code generation scope. + * @param dst The destination register for the result of the concatenation. + * @param b1 The pool index of the first string literal. + * @param b2 The pool index of the second string literal. + * @param pc The program counter of the instruction that loaded the first string literal (`b1`). + * This is where the new merged `OP_STRING` will be generated. + */ +static void +merge_op_string(codegen_scope *s, uint16_t dst, uint16_t b1, uint16_t b2, const mrb_code *pc) +{ + int used = 0; + const mrb_code *i = s->iseq; + + /* scan OP_STRING that refers b1 or b2 */ + mrb_assert(pc < s->iseq + s->icapa); + while (ipool[b1]; + mrb_irep_pool *p2 = &s->pool[b2]; + mrb_int len1 = p1->tt>>2; + mrb_int len2 = p2->tt>>2; + int off = find_pool_str(s, p1->u.str, len1, p2->u.str, len2); + + if (off < 0) { + switch (used) { + case 0: /* both pools are free */ + case 2: /* b2 is referenced */ + /* overwrite p1; free b2 if possible */ + off = b1; + realloc_pool_str(s, p1, len1+len2); + memcpy((void*)(p1->u.str+len1), (void*)p2->u.str, len2); + if (b1 != b2 && used == 0 && b2+1 == s->irep->plen) { + free_pool_str(s, p2); + } + break; + case 1: /* b1 is referenced */ + /* overwrite p2 */ + off = b2; + realloc_pool_str(s, p2, len1+len2); + memmove((void*)(p2->u.str+len1), (void*)p2->u.str, len2); + memcpy((void*)p2->u.str, p1->u.str, len1); + break; + case 3: /* both b1&b2 are referenced */ + /* create new pool */ + off = new_lit_str2(s, p1->u.str, len1, p2->u.str, len2); + break; + } + } + s->pc = addr_pc(s, pc); + genop_2(s, OP_STRING, dst, off); +} + +/* + * Generates code for addition (`OP_ADD`) or subtraction (`OP_SUB`) + * operations, storing the result in register `dst`. + * + * This function includes several peephole optimizations: + * 1. String Concatenation: If `op` is `OP_ADD` and the two preceding instructions + * were `OP_STRING` (loading string literals), it calls `merge_op_string` + * to attempt compile-time concatenation of these literals. + * 2. Immediate Operations: If the last instruction loaded an integer literal (`n`) + * and the instruction before that loaded another integer (`n0`), but `n0` is not + * suitable for further folding (e.g., it's at a label, or the instruction + * before it isn't an integer load), it attempts to convert the operation to + * `OP_ADDI` or `OP_SUBI` if `n` fits within an 8-bit signed integer. + * 3. Constant Folding: If both the last two instructions loaded integer literals + * (`n0` and `n`), it performs the addition or subtraction at compile time. + * The program counter is rewound to the location of the first literal load, + * and code is generated to load the folded result directly using `gen_int`. + * + * If no optimizations are applicable, it generates the standard `OP_ADD` or `OP_SUB` + * instruction. + * + * @param s The current code generation scope. + * @param op The operation code, either `OP_ADD` or `OP_SUB`. + * @param dst The destination register index for the result. + */ static void gen_addsub(codegen_scope *s, uint8_t op, uint16_t dst) { @@ -811,24 +1445,25 @@ gen_addsub(codegen_scope *s, uint8_t op, uint16_t dst) if (!get_int_operand(s, &data, &n)) { /* not integer immediate */ + if (op == OP_ADD && data.insn == OP_STRING) { + struct mrb_insn_data data0 = mrb_decode_insn(mrb_prev_pc(s, data.addr)); + if (data0.insn == OP_STRING) { + merge_op_string(s, dst, data0.b, data.b, data0.addr); + return; + } + } goto normal; } struct mrb_insn_data data0 = mrb_decode_insn(mrb_prev_pc(s, data.addr)); mrb_int n0; if (addr_pc(s, data.addr) == s->lastlabel || !get_int_operand(s, &data0, &n0)) { - /* OP_ADDI/OP_SUBI takes upto 8bits */ - if (n > INT8_MAX || n < INT8_MIN) goto normal; + /* Fold to OP_ADDI/OP_SUBI only for non-negative 8-bit n; flipping op + for negative n would change the method sent on user override (#2557). */ + if (n < 0 || n > UINT8_MAX) goto normal; rewind_pc(s); if (n == 0) return; - if (n > 0) { - if (op == OP_ADD) genop_2(s, OP_ADDI, dst, (uint16_t)n); - else genop_2(s, OP_SUBI, dst, (uint16_t)n); - } - else { /* n < 0 */ - n = -n; - if (op == OP_ADD) genop_2(s, OP_SUBI, dst, (uint16_t)n); - else genop_2(s, OP_ADDI, dst, (uint16_t)n); - } + if (op == OP_ADD) genop_2(s, OP_ADDI, dst, (uint16_t)n); + else genop_2(s, OP_SUBI, dst, (uint16_t)n); return; } if (op == OP_ADD) { @@ -842,6 +1477,27 @@ gen_addsub(codegen_scope *s, uint8_t op, uint16_t dst) } } +/* + * Generates code for multiplication (`OP_MUL`) or division (`OP_DIV`) + * operations, storing the result in register `dst`. + * + * Peephole Optimization (Constant Folding): + * - If peephole optimization is enabled and the two immediately preceding + * instructions loaded integer literals (into registers that are operands + * for this multiplication/division), this function performs the operation + * at compile time. + * - The program counter is rewound to the location of the first literal load, + * and code is generated to load the folded result directly using `gen_int`. + * - For division, if the divisor is zero or if it's `MRB_INT_MIN / -1` (which + * would overflow), the optimization is skipped. + * + * If no optimization is applicable, it generates the standard `OP_MUL` or `OP_DIV` + * instruction. + * + * @param s The current code generation scope. + * @param op The operation code, either `OP_MUL` or `OP_DIV`. + * @param dst The destination register index for the result. + */ static void gen_muldiv(codegen_scope *s, uint8_t op, uint16_t dst) { @@ -876,11 +1532,47 @@ gen_muldiv(codegen_scope *s, uint8_t op, uint16_t dst) mrb_bool mrb_num_shift(mrb_state *mrb, mrb_int val, mrb_int width, mrb_int *num); +/* + * Generates code for various binary operations, identified by `sym_op`, + * storing the result in register `dst`. + * + * This function handles specific binary operations and includes peephole + * optimizations for constant folding when operands are integer literals. + * + * Operations Handled & Optimizations: + * - `aref` (`[]`): Generates `OP_GETIDX`. + * - Bitwise shifts (`<<`, `>>`): If both operands are integer literals, + * performs the shift at compile time using `mrb_num_shift` and loads the result. + * - Modulo (`%`): If both operands are integer literals, performs modulo + * at compile time and loads the result. Handles `MRB_INT_MIN % -1`. + * - Bitwise AND (`&`), OR (`|`), XOR (`^`): If both operands are integer + * literals, performs the operation at compile time and loads the result. + * + * If an optimization is applied (e.g., constant folding), the program counter + * is rewound, and `gen_int` is used to load the computed result. + * + * @param s The current code generation scope. + * @param op The `mrb_sym` representing the binary operator (e.g., `MRB_OPSYM_LSHIFT`). + * @param dst The destination register index for the result. + * @return `TRUE` if a specific optimization was applied (like `OP_GETIDX` or constant folding), + * `FALSE` otherwise. A `FALSE` return typically indicates that a generic + * `OP_SEND` instruction should be generated for the operation. + */ static mrb_bool gen_binop(codegen_scope *s, mrb_sym op, uint16_t dst) { if (no_peephole(s)) return FALSE; - else if (op == MRB_OPSYM_2(s->mrb, aref)) { + else if (op == MRB_OPSYM(aref)) { + /* GETIDX0 fusion: MOVE dst arr; LOADI_0 dst+1 -> GETIDX0 dst arr */ + struct mrb_insn_data data = mrb_last_insn(s); + if (data.insn == OP_LOADI_0 && data.a == (uint32_t)dst+1 && addr_pc(s, data.addr) != s->lastlabel) { + struct mrb_insn_data data0 = mrb_decode_insn(mrb_prev_pc(s, data.addr)); + if (data0.insn == OP_MOVE && data0.a == dst && data0.b != dst) { + s->pc = addr_pc(s, data0.addr); + genop_2(s, OP_GETIDX0, dst, data0.b); + return TRUE; + } + } genop_1(s, OP_GETIDX, dst); return TRUE; } @@ -895,14 +1587,14 @@ gen_binop(codegen_scope *s, mrb_sym op, uint16_t dst) if (!get_int_operand(s, &data0, &n0)) { return FALSE; } - if (op == MRB_OPSYM_2(s->mrb, lshift)) { + if (op == MRB_OPSYM(lshift)) { if (!mrb_num_shift(s->mrb, n0, n, &n)) return FALSE; } - else if (op == MRB_OPSYM_2(s->mrb, rshift)) { + else if (op == MRB_OPSYM(rshift)) { if (n == MRB_INT_MIN) return FALSE; if (!mrb_num_shift(s->mrb, n0, -n, &n)) return FALSE; } - else if (op == MRB_OPSYM_2(s->mrb, mod) && n != 0) { + else if (op == MRB_OPSYM(mod) && n != 0) { if (n0 == MRB_INT_MIN && n == -1) { n = 0; } @@ -914,13 +1606,13 @@ gen_binop(codegen_scope *s, mrb_sym op, uint16_t dst) n = n1; } } - else if (op == MRB_OPSYM_2(s->mrb, and)) { + else if (op == MRB_OPSYM(and)) { n = n0 & n; } - else if (op == MRB_OPSYM_2(s->mrb, or)) { + else if (op == MRB_OPSYM(or)) { n = n0 | n; } - else if (op == MRB_OPSYM_2(s->mrb, xor)) { + else if (op == MRB_OPSYM(xor)) { n = n0 ^ n; } else { @@ -932,6 +1624,32 @@ gen_binop(codegen_scope *s, mrb_sym op, uint16_t dst) } } +/* + * Resolves the target address of a previously generated jump instruction. + * + * Jump instructions are often generated with placeholder offsets (e.g., 0 or a + * link to another jump) when their final target is not yet known. This function + * patches such a jump. + * + * `pos0` is the program counter (address) of the 2-byte field within a jump + * instruction that holds its offset (or a link in a jump chain). + * + * The function calculates the correct relative offset from the instruction + * *after* the jump's offset field (`pos0 + 2`) to the current program + * counter (`s->pc`), which is the actual target of the jump. This calculated + * offset is then written back into the bytecode at `pos0`. + * + * If the original value at `pos0` was not 0 (i.e., it was part of a jump chain, + * pointing to the next jump to patch), this original value (which is an offset + * relative to `pos0 + 2`) is returned so that `dispatch_linked` can continue + * patching the chain. If the original value was 0, it signifies the end of a chain, + * and 0 is returned. + * + * @param s The current code generation scope. + * @param pos0 The address of the 2-byte offset field within a jump instruction. + * @return The next position in a jump chain to dispatch (calculated from the + * original offset stored at `pos0`), or 0 if it's the end of a chain. + */ static uint32_t dispatch(codegen_scope *s, uint32_t pos0) { @@ -953,6 +1671,25 @@ dispatch(codegen_scope *s, uint32_t pos0) return pos1+newpos; } +/* + * Patches a chain of linked jump instructions to all point to the current + * program counter (`s->pc`). + * + * Jump instructions whose targets are not yet known can be linked together. + * Each jump's offset field initially stores the relative offset to the next + * jump in the chain (or 0 if it's the last one). `pos` is the address of the + * first jump's offset field in such a chain. + * + * This function iterates through the chain: + * - It calls `dispatch(s, pos)` to patch the jump at `pos` to target the current `s->pc`. + * - `dispatch` returns the address of the next jump in the chain (or 0 if the end). + * - The process repeats until the end of the chain is reached. + * + * If `pos` is `JMPLINK_START`, it means there's no chain to dispatch, so it returns early. + * + * @param s The current code generation scope. + * @param pos The address of the offset field of the first jump instruction in a linked chain. + */ static void dispatch_linked(codegen_scope *s, uint32_t pos) { @@ -963,6 +1700,7 @@ dispatch_linked(codegen_scope *s, uint32_t pos) } } +/* Updates the nregs (number of registers used) if the current stack pointer (sp) exceeds it. */ #define nregs_update do {if (s->sp > s->nregs) s->nregs = s->sp;} while (0) static void push_n_(codegen_scope *s, int n) @@ -983,29 +1721,100 @@ pop_n_(codegen_scope *s, int n) s->sp-=n; } +/* Increments the stack pointer (sp) by 1 and updates nregs. */ #define push() push_n_(s,1) +/* Increments the stack pointer (sp) by n and updates nregs. */ #define push_n(n) push_n_(s,n) +/* Decrements the stack pointer (sp) by 1. */ #define pop() pop_n_(s,1) +/* Decrements the stack pointer (sp) by n. */ #define pop_n(n) pop_n_(s,n) +/* Returns the current stack pointer (sp) value. */ #define cursp() (s->sp) -static mrb_pool_value* +/* + * Extends the literal pool (`s->pool`) of the current IREP (`s->irep`) if necessary. + * + * If the number of literals currently in the pool (`s->irep->plen`) has reached + * the pool's capacity (`s->pcapa`), this function doubles the capacity by + * reallocating the `s->pool` array. + * After ensuring there's space, it increments `s->irep->plen` and returns a pointer + * to the newly available slot in the literal pool. + * + * @param s The current code generation scope. + * @return A pointer to the next available (or newly allocated) `mrb_irep_pool` entry. + */ +static mrb_irep_pool* lit_pool_extend(codegen_scope *s) { if (s->irep->plen == s->pcapa) { s->pcapa *= 2; - s->pool = (mrb_pool_value*)codegen_realloc(s, s->pool, sizeof(mrb_pool_value)*s->pcapa); + s->pool = (mrb_irep_pool*)mrbc_realloc(s->pool, sizeof(mrb_irep_pool)*s->pcapa); } return &s->pool[s->irep->plen++]; } +/* Helper functions for simple load operations that follow the pattern: + * if (!val) return; ; genop_X(...); push(); */ +static void +gen_load_op1(codegen_scope *s, mrb_code op, int val) +{ + if (!val) return; + genop_1(s, op, cursp()); + push(); +} + +static void +gen_load_op2(codegen_scope *s, mrb_code op, uint16_t arg, int val) +{ + if (!val) return; + genop_2(s, op, cursp(), arg); + push(); +} + +/* Helper function for conditional nil loading - loads nil only if val is needed */ +static void +gen_load_nil(codegen_scope *s, int val) +{ + if (!val) return; + genop_1(s, OP_LOADNIL, cursp()); + push(); +} + +/* Helper function for loading literal and pushing */ +static void +gen_load_lit(codegen_scope *s, int off) +{ + genop_2(s, OP_LOADL, cursp(), off); + push(); +} + +/* + * Adds a big integer literal (BigInt) to the IREP's literal pool. + * The BigInt is provided as a string `p` in the given `base`. + * + * - It first searches the existing literal pool to see if an identical BigInt + * (same string representation and base) already exists. If so, its index is returned. + * - If not found, a new entry is created in the pool: + * - The pool is extended if necessary using `lit_pool_extend`. + * - The new pool entry's type `tt` is set to `IREP_TT_BIGINT`. + * - Memory is allocated to store the BigInt's string representation, its length (1 byte), + * and its base (1 byte). The string `p` is copied into this buffer. + * - `pv->u.str` points to this allocated buffer. + * - If the length of the string `p` exceeds 255, a "integer too big" error is raised. + * + * @param s The current code generation scope. + * @param p A string representing the big integer. + * @param base The base of the string representation (e.g., 10 for decimal). + * @return The index of the BigInt literal in the pool. + */ static int -new_litbint(codegen_scope *s, const char *p, int base, mrb_bool neg) +new_litbint(codegen_scope *s, const char *p, int base) { int i; size_t plen; - mrb_pool_value *pv; + mrb_irep_pool *pv; plen = strlen(p); if (plen > 255) { @@ -1024,10 +1833,9 @@ new_litbint(codegen_scope *s, const char *p, int base, mrb_bool neg) char *buf; pv->tt = IREP_TT_BIGINT; - buf = (char*)codegen_realloc(s, NULL, plen+3); + buf = (char*)mrbc_malloc(plen+3); buf[0] = (char)plen; - if (neg) buf[1] = -base; - else buf[1] = base; + buf[1] = base; memcpy(buf+2, p, plen); buf[plen+2] = '\0'; pv->u.str = buf; @@ -1035,103 +1843,231 @@ new_litbint(codegen_scope *s, const char *p, int base, mrb_bool neg) return i; } +/* + * Searches the IREP's literal pool for an existing string that is identical + * to the concatenation of `str1` (of length `len1`) and `str2` (of length `len2`). + * + * It iterates through the existing literal pool entries: + * - Skips entries that are not strings or are marked with `IREP_TT_NFLAG`. + * - Compares the total length (`len1 + len2`) with the length of the pool string. + * - If lengths match, it performs a `memcmp` to check if the content is identical + * to the concatenation of `str1` and `str2`. + * + * @param s The current code generation scope. + * @param str1 Pointer to the first part of the string to find. + * @param len1 Length of `str1`. + * @param str2 Pointer to the second part of the string to find (can be NULL if `len2` is 0). + * @param len2 Length of `str2`. + * @return The index of the matching string literal in the pool if found, otherwise -1. + */ static int -new_lit_str(codegen_scope *s, const char *str, mrb_int len) +find_pool_str(codegen_scope *s, const char *str1, mrb_int len1, const char *str2, mrb_int len2) { + mrb_irep_pool *pool; + mrb_int len = len1 + len2; int i; - mrb_pool_value *pv; for (i=0; iirep->plen; i++) { - pv = &s->pool[i]; - if (pv->tt & IREP_TT_NFLAG) continue; - mrb_int plen = pv->tt>>2; + pool = &s->pool[i]; + if (pool->tt & IREP_TT_NFLAG) continue; + mrb_int plen = pool->tt>>2; if (len != plen) continue; - if (memcmp(pv->u.str, str, plen) == 0) + if (memcmp(pool->u.str, str1, len1) == 0 && + (len2 == 0 || memcmp(pool->u.str + len1, str2, len2) == 0)) return i; } + return -1; +} - pv = lit_pool_extend(s); +/* + * Adds a string literal, potentially formed by concatenating `str1` and `str2`, + * to the IREP's literal pool. + * + * - It first calls `find_pool_str` to check if an identical concatenated string + * already exists in the pool. If so, its index is returned. + * - If not found: + * - A new slot in the literal pool is obtained using `lit_pool_extend`. + * - If `str1` points to read-only data (`mrb_ro_data_p(str1)`) and `str2` is NULL + * (meaning `str1` is the complete string and it's from a static source), + * the pool entry is marked as `IREP_TT_SSTR` (shared string) and `pool->u.str` + * points directly to `str1`. + * - Otherwise (if the string needs to be dynamically created or is not from + * read-only data), memory is allocated for the combined length of `str1` and + * `str2` plus a null terminator. `str1` and `str2` (if present) are copied + * into this new buffer. The pool entry is marked as `IREP_TT_STR`, and + * `pool->u.str` points to this newly allocated buffer. + * - The index of the new or found literal is returned. + * + * @param s The current code generation scope. + * @param str1 Pointer to the first part of the string. + * @param len1 Length of `str1`. + * @param str2 Pointer to the second part of the string (can be NULL if `len2` is 0). + * @param len2 Length of `str2`. + * @return The index of the string literal in the pool. + */ +static int +new_lit_str2(codegen_scope *s, const char *str1, mrb_int len1, const char *str2, mrb_int len2) +{ + int i = find_pool_str(s, str1, len1, str2, len2); + + if (i >= 0) return i; + i = s->irep->plen; + + mrb_irep_pool *pool = lit_pool_extend(s); + mrb_int len = len1 + len2; - if (mrb_ro_data_p(str)) { - pv->tt = (uint32_t)(len<<2) | IREP_TT_SSTR; - pv->u.str = str; + if (mrb_ro_data_p(str1) && !str2) { + pool->tt = (uint32_t)(len<<2) | IREP_TT_SSTR; + pool->u.str = str1; } else { char *p; - pv->tt = (uint32_t)(len<<2) | IREP_TT_STR; - p = (char*)codegen_realloc(s, NULL, len+1); - memcpy(p, str, len); + pool->tt = (uint32_t)(len<<2) | IREP_TT_STR; + p = (char*)mrbc_malloc(len+1); + memcpy(p, str1, len1); + if (str2) memcpy(p+len1, str2, len2); p[len] = '\0'; - pv->u.str = p; + pool->u.str = p; } return i; } +/* + * Adds a string literal (from `str` with length `len`) to the IREP's literal pool. + * This is a wrapper around `new_lit_str2`, passing NULL for `str2` and 0 for `len2`. + * + * @param s The current code generation scope. + * @param str Pointer to the string. + * @param len Length of the string. + * @return The index of the string literal in the pool. + */ +static int +new_lit_str(codegen_scope *s, const char *str, mrb_int len) +{ + return new_lit_str2(s, str, len, NULL, 0); +} + +/* + * Adds a C-string literal (null-terminated string `str`) to the IREP's literal pool. + * This is a wrapper around `new_lit_str`, calculating the length of `str` using `strlen`. + * + * @param s The current code generation scope. + * @param str Pointer to the null-terminated C-string. + * @return The index of the string literal in the pool. + */ static int new_lit_cstr(codegen_scope *s, const char *str) { return new_lit_str(s, str, (mrb_int)strlen(str)); } +/* + * Adds an integer literal `num` to the IREP's literal pool. + * + * - It first searches the existing literal pool to see if an identical integer + * value already exists. If so, its index is returned. + * - If not found, a new entry is created: + * - The pool is extended if necessary using `lit_pool_extend`. + * - The new pool entry's type `tt` is set to `IREP_TT_INT32` or `IREP_TT_INT64` + * depending on whether `MRB_INT64` is defined. + * - The integer `num` is stored in `pool->u.i32` or `pool->u.i64`. + * + * @param s The current code generation scope. + * @param num The `mrb_int` value to add to the pool. + * @return The index of the integer literal in the pool. + */ static int new_lit_int(codegen_scope *s, mrb_int num) { int i; - mrb_pool_value *pv; + mrb_irep_pool *pool; for (i=0; iirep->plen; i++) { - pv = &s->pool[i]; - if (pv->tt == IREP_TT_INT32) { - if (num == pv->u.i32) return i; + pool = &s->pool[i]; + if (pool->tt == IREP_TT_INT32) { + if (num == pool->u.i32) return i; } #ifdef MRB_64BIT - else if (pv->tt == IREP_TT_INT64) { - if (num == pv->u.i64) return i; + else if (pool->tt == IREP_TT_INT64) { + if (num == pool->u.i64) return i; } continue; #endif } - pv = lit_pool_extend(s); + pool = lit_pool_extend(s); #ifdef MRB_INT64 - pv->tt = IREP_TT_INT64; - pv->u.i64 = num; + pool->tt = IREP_TT_INT64; + pool->u.i64 = num; #else - pv->tt = IREP_TT_INT32; - pv->u.i32 = num; + pool->tt = IREP_TT_INT32; + pool->u.i32 = num; #endif return i; } #ifndef MRB_NO_FLOAT +/* + * Adds a float literal `num` to the IREP's literal pool. + * This function is only compiled if `MRB_NO_FLOAT` is not defined. + * + * - It first searches the existing literal pool to see if an identical float + * value (considering both value and sign bit) already exists. If so, its + * index is returned. + * - If not found, a new entry is created: + * - The pool is extended if necessary using `lit_pool_extend`. + * - The new pool entry's type `tt` is set to `IREP_TT_FLOAT`. + * - The float `num` is stored in `pool->u.f`. + * + * @param s The current code generation scope. + * @param num The `mrb_float` value to add to the pool. + * @return The index of the float literal in the pool. + */ static int new_lit_float(codegen_scope *s, mrb_float num) { int i; - mrb_pool_value *pv; + mrb_irep_pool *pool; for (i=0; iirep->plen; i++) { mrb_float f; - pv = &s->pool[i]; - if (pv->tt != IREP_TT_FLOAT) continue; - f = pv->u.f; + pool = &s->pool[i]; + if (pool->tt != IREP_TT_FLOAT) continue; + f = pool->u.f; if (f == num && !signbit(f) == !signbit(num)) return i; } - pv = lit_pool_extend(s); + pool = lit_pool_extend(s); - pv->tt = IREP_TT_FLOAT; - pv->u.f = num; + pool->tt = IREP_TT_FLOAT; + pool->u.f = num; return i; } #endif +/* + * Adds a symbol `sym` to the IREP's symbol list (`s->syms`). + * + * - It first iterates through the existing symbols in `s->syms` (up to `s->irep->slen`) + * to check if the symbol `sym` already exists. If found, its index is returned. + * - If the symbol is not found: + * - It checks if the current symbol list capacity (`s->scapa`) is sufficient. + * If not, `s->scapa` is doubled, and `s->syms` is reallocated. + * If the new capacity would exceed 0xFFFF, a "too many symbols" error is raised. + * - The symbol `sym` is added to `s->syms` at the current end of the list (`s->irep->slen`). + * - `s->irep->slen` is incremented. + * - The index of the (newly added or existing) symbol is returned. + * + * @param s The current code generation scope. + * @param sym The `mrb_sym` to add to the symbol list. + * @return The index of the symbol in the IREP's symbol list. + */ static int -new_sym(codegen_scope *s, mrb_sym sym) +sym_idx(codegen_scope *s, mrb_sym sym) { int i, len; @@ -1146,16 +2082,40 @@ new_sym(codegen_scope *s, mrb_sym sym) if (s->scapa > 0xffff) { codegen_error(s, "too many symbols"); } - s->syms = (mrb_sym*)codegen_realloc(s, s->syms, sizeof(mrb_sym)*s->scapa); + s->syms = (mrb_sym*)mrbc_realloc(s->syms, sizeof(mrb_sym)*s->scapa); } s->syms[s->irep->slen] = sym; return s->irep->slen++; } +/* + * Generates an instruction to set a variable, where the variable is identified by a symbol. + * This is a generic helper for opcodes like `OP_SETGV`, `OP_SETIV`, `OP_SETCV`, `OP_SETCONST`. + * + * - It first ensures the symbol `sym` is in the IREP's symbol list by calling `sym_idx`, + * obtaining its index `idx`. + * - Peephole Optimization: If `val` is `NOVAL` (false) and peephole optimization is enabled, + * it checks if the immediately preceding instruction was an `OP_MOVE` into the `dst` + * register. If so, it means the value intended for the variable assignment was moved + * into `dst`. In this case, it rewinds the program counter and uses the original source + * register of that `OP_MOVE` as the source for the set operation, effectively using + * the value before it was moved to `dst`. + * - Finally, it generates the specified opcode `op` with operands `dst` (the source + * register for the value, possibly modified by peephole optimization) and `idx` + * (the symbol index) using `genop_2`. + * + * @param s The current code generation scope. + * @param op The specific set variable opcode (e.g., `OP_SETGV`, `OP_SETIV`). + * @param dst The register index holding the value to be assigned to the variable. + * @param sym The `mrb_sym` (symbol) identifying the variable. + * @param val A flag indicating context (often whether the value in `dst` is from an + * expression that should be preserved if the set operation is part of a larger one). + * If `NOVAL`, it enables the peephole optimization. + */ static void gen_setxv(codegen_scope *s, uint8_t op, uint16_t dst, mrb_sym sym, int val) { - int idx = new_sym(s, sym); + int idx = sym_idx(s, sym); if (!val && !no_peephole(s)) { struct mrb_insn_data data = mrb_last_insn(s); if (data.insn == OP_MOVE && data.a == dst) { @@ -1166,6 +2126,29 @@ gen_setxv(codegen_scope *s, uint8_t op, uint16_t dst, mrb_sym sym, int val) genop_2(s, op, dst, idx); } +/* + * Generates the most compact instruction(s) to load an integer literal `i` + * into the destination register `dst`. + * + * It employs a series of checks to use specialized, shorter opcodes for common integer values: + * - `OP_LOADI__1` for -1. + * - `OP_LOADINEG` for negative integers between -255 and -2 (operand is positive magnitude). + * - `OP_LOADI16` for negative integers fitting in a signed 16-bit integer (INT16_MIN to -256). + * - `OP_LOADI32` for negative integers fitting in a signed 32-bit integer (INT32_MIN to not fitting in 16-bit). + * - `OP_LOADI_0` through `OP_LOADI_7` for integers 0 through 7. + * - `OP_LOADI8` for positive integers between 8 and 255. + * - `OP_LOADI16` for positive integers fitting in a signed 16-bit integer (256 to INT16_MAX). + * - `OP_LOADI32` for positive integers fitting in a signed 32-bit integer (not fitting in 16-bit to INT32_MAX). + * + * If the integer `i` does not fit any of these specialized opcodes (i.e., it's too large + * or too small for `OP_LOADI32`), it falls back to `OP_LOADL`. This involves adding + * the integer to the IREP's literal pool using `new_lit_int` and then generating + * `OP_LOADL` with the resulting pool index. + * + * @param s The current code generation scope. + * @param dst The destination register index where the integer will be loaded. + * @param i The `mrb_int` value to load. + */ static void gen_int(codegen_scope *s, uint16_t dst, mrb_int i) { @@ -1177,7 +2160,7 @@ gen_int(codegen_scope *s, uint16_t dst, mrb_int i) else goto int_lit; } else if (i < 8) genop_1(s, OP_LOADI_0 + (uint8_t)i, dst); - else if (i <= 0xff) genop_2(s, OP_LOADI, dst, (uint16_t)i); + else if (i <= 0xff) genop_2(s, OP_LOADI8, dst, (uint16_t)i); else if (i <= INT16_MAX) genop_2S(s, OP_LOADI16, dst, (uint16_t)i); else if (i <= INT32_MAX) genop_2SS(s, OP_LOADI32, dst, (uint32_t)i); else { @@ -1186,6 +2169,36 @@ gen_int(codegen_scope *s, uint16_t dst, mrb_int i) } } +/* + * Generates code for a unary operation specified by `sym`, operating on the + * value in register `dst`, and storing the result back into `dst`. + * + * Supported unary operations: + * - Unary plus (`+`): This is a no-op in terms of value change, but the function + * still processes it. + * - Unary minus (`-`): Negates the integer value. + * - Bitwise NOT (`~`): Performs a bitwise complement on the integer value. + * + * Peephole Optimization (Constant Folding): + * - If peephole optimization is enabled and the immediately preceding instruction + * loaded an integer literal into register `dst` (which is also the operand + * register for this unary operation), this function performs the unary operation + * at compile time. + * - The program counter is rewound to the location of the literal load, and code + * is generated to load the folded result directly using `gen_int`. + * - For unary minus, if the original integer is `MRB_INT_MIN`, constant folding + * is skipped to avoid overflow. + * + * If the operation is not one of the recognized unary ops or if constant folding + * is not applicable, the function returns `FALSE`. + * + * @param s The current code generation scope. + * @param sym The `mrb_sym` representing the unary operator (e.g., `MRB_OPSYM_PLUS`, `MRB_OPSYM_MINUS`). + * @param dst The register index which holds the operand and will store the result. + * @return `TRUE` if a constant folding optimization was successfully applied, + * `FALSE` otherwise (e.g., if the operation is not supported for folding, + * or if the preceding instruction was not a suitable integer load). + */ static mrb_bool gen_uniop(codegen_scope *s, mrb_sym sym, uint16_t dst) { @@ -1194,14 +2207,14 @@ gen_uniop(codegen_scope *s, mrb_sym sym, uint16_t dst) mrb_int n; if (!get_int_operand(s, &data, &n)) return FALSE; - if (sym == MRB_OPSYM_2(s->mrb, plus)) { + if (sym == MRB_OPSYM(plus)) { /* unary plus does nothing */ } - else if (sym == MRB_OPSYM_2(s->mrb, minus)) { + else if (sym == MRB_OPSYM(minus)) { if (n == MRB_INT_MIN) return FALSE; n = -n; } - else if (sym == MRB_OPSYM_2(s->mrb, neg)) { + else if (sym == MRB_OPSYM(neg)) { n = ~n; } else { @@ -1212,11 +2225,23 @@ gen_uniop(codegen_scope *s, mrb_sym sym, uint16_t dst) return TRUE; } +/* + * Calculates and returns the number of elements in a linked list of AST nodes. + * The list is traversed via the `cdr` field of each `node`. + * + * @param tree Pointer to the head of the AST node list. + * @return The number of nodes in the list. + */ static int node_len(node *tree) { int n = 0; + /* Validate pointer before using it */ + if (!tree || ((uintptr_t)tree < 0x1000)) { + return 0; + } + while (tree) { n++; tree = tree->cdr; @@ -1224,12 +2249,26 @@ node_len(node *tree) return n; } -#define nint(x) ((int)(intptr_t)(x)) -#define nchar(x) ((char)(intptr_t)(x)) -#define nsym(x) ((mrb_sym)(intptr_t)(x)) +/* Casts a void* (typically from an AST node part) to an int. */ +#define node_to_sym(x) ((mrb_sym)(intptr_t)(x)) +#define node_to_int(x) ((int)(intptr_t)(x)) +/* Casts a void* (typically from an AST node part) to a char. */ +#define node_to_char(x) ((char)(intptr_t)(x)) +/* Casts a void* (typically from an AST node part) to an mrb_sym. */ -#define lv_name(lv) nsym((lv)->car) +/* Extracts the symbol (name) of a local variable from its AST node representation. */ +#define lv_name(lv) node_to_sym((lv)->car) +/* + * Searches for a local variable `id` within the current scope's local variable list (`s->lv`). + * The local variable list `s->lv` is a linked list of AST nodes, where each node's + * `car` holds the symbol of the local variable. + * + * @param s The current code generation scope. + * @param id The `mrb_sym` (symbol) of the local variable to search for. + * @return The 1-based index of the local variable in the current scope if found; + * otherwise, returns 0. + */ static int lv_idx(codegen_scope *s, mrb_sym id) { @@ -1256,7 +2295,7 @@ search_upvar(codegen_scope *s, mrb_sym id, int *idx) if (*idx > 0) { return lv; } - lv ++; + lv++; up = up->prev; } @@ -1278,16 +2317,16 @@ search_upvar(codegen_scope *s, mrb_sym id, int *idx) } if (MRB_PROC_SCOPE_P(u)) break; u = u->upper; - lv ++; + lv++; } - if (id == MRB_OPSYM_2(s->mrb, and)) { + if (id == MRB_OPSYM(and)) { codegen_error(s, "No anonymous block parameter"); } - else if (id == MRB_OPSYM_2(s->mrb, mul)) { + else if (id == MRB_OPSYM(mul)) { codegen_error(s, "No anonymous rest parameter"); } - else if (id == MRB_OPSYM_2(s->mrb, pow)) { + else if (id == MRB_OPSYM(pow)) { codegen_error(s, "No anonymous keyword rest parameter"); } else { @@ -1296,161 +2335,136 @@ search_upvar(codegen_scope *s, mrb_sym id, int *idx) return -1; /* not reached */ } -static void -for_body(codegen_scope *s, node *tree) +/* + * Generates the bytecode for the body of a lambda or a block. + * This function is responsible for creating a new scope, handling arguments + * (including optional, rest, keyword, and block arguments), generating code + * for the body's expressions, and finalizing the resulting `mrb_irep`. + * + * @param s The parent code generation scope. + * @param tree The AST node representing the lambda or block. + * `tree->car` contains the argument list AST. + * `tree->cdr->car` is the body of the lambda/block. + * @param blk A flag indicating if this is a block (`TRUE`) or a lambda (`FALSE`). + * This affects `s->mscope` and loop setup. + * @return The index of the newly created `mrb_irep` in the parent scope's `reps` array. + */ +static int +lambda_body(codegen_scope *s, node *locals, struct mrb_ast_args *args, node *body, int blk) { - codegen_scope *prev = s; - int idx; - struct loopinfo *lp; - node *n2; - - /* generate receiver */ - codegen(s, tree->cdr->car, VAL); - /* generate loop-block */ - s = scope_new(s->mrb, s, NULL); + codegen_scope *parent = s; + /* Create a new scope for the lambda/block body. */ + s = scope_new(s->mrb, s, locals); - push(); /* push for a block parameter */ + /* `mscope` is false for blocks, true for lambdas/methods. */ + s->mscope = !blk; - /* generate loop variable */ - n2 = tree->car; - genop_W(s, OP_ENTER, 0x40000); - if (n2->car && !n2->car->cdr && !n2->cdr) { - gen_assignment(s, n2->car->car, NULL, 1, NOVAL); + /* If it's a block, push a LOOP_BLOCK structure for break/next/return handling. */ + if (blk) { + struct loopinfo *lp = loop_push(s, LOOP_BLOCK); + lp->pc0 = new_label(s); /* Mark entry point for potential retry/redo. */ } - else { - gen_massignment(s, n2, 1, VAL); + + /* Argument processing */ + if (args == NULL) { /* No arguments */ + genop_W(s, OP_ENTER, 0); /* Generate OP_ENTER with no argument specification. */ + s->ainfo = 0; } - /* construct loop */ - lp = loop_push(s, LOOP_FOR); - lp->pc1 = new_label(s); + else { /* Has arguments */ + mrb_aspec a; + int ma, oa, ra, pa, ka, kd, ba, i; + uint32_t pos; + node *opt; + node *margs, *pargs; - /* loop body */ - codegen(s, tree->cdr->cdr->car, VAL); - pop(); - gen_return(s, OP_RETURN, cursp()); - loop_pop(s, NOVAL); - scope_finish(s); - s = prev; - genop_2(s, OP_BLOCK, cursp(), s->irep->rlen-1); - push();pop(); /* space for a block */ - pop(); - idx = new_sym(s, MRB_SYM_2(s->mrb, each)); - genop_3(s, OP_SENDB, cursp(), idx, 0); -} - -static int -lambda_body(codegen_scope *s, node *tree, int blk) -{ - codegen_scope *parent = s; - s = scope_new(s->mrb, s, tree->car); - - s->mscope = !blk; - - if (blk) { - struct loopinfo *lp = loop_push(s, LOOP_BLOCK); - lp->pc0 = new_label(s); - } - tree = tree->cdr; - if (tree->car == NULL) { - genop_W(s, OP_ENTER, 0); - s->ainfo = 0; - } - else { - mrb_aspec a; - int ma, oa, ra, pa, ka, kd, ba, i; - uint32_t pos; - node *opt; - node *margs, *pargs; - node *tail; + /* args is already struct mrb_ast_args * */ /* mandatory arguments */ - ma = node_len(tree->car->car); - margs = tree->car->car; - tail = tree->car->cdr->cdr->cdr->cdr; + ma = node_len(args->mandatory_args); + margs = args->mandatory_args; /* optional arguments */ - oa = node_len(tree->car->cdr->car); + oa = node_len(args->optional_args); /* rest argument? */ - ra = tree->car->cdr->cdr->car ? 1 : 0; + ra = args->rest_arg ? 1 : 0; /* mandatory arguments after rest argument */ - pa = node_len(tree->car->cdr->cdr->cdr->car); - pargs = tree->car->cdr->cdr->cdr->car; + pa = node_len(args->post_mandatory_args); + pargs = args->post_mandatory_args; + /* keyword arguments */ - ka = tail? node_len(tail->cdr->car) : 0; - /* keyword dictionary? */ - kd = tail && tail->cdr->cdr->car? 1 : 0; - /* block argument? */ - ba = tail && tail->cdr->cdr->cdr->car ? 1 : 0; + ka = args->keyword_args ? node_len(args->keyword_args) : 0; + kd = args->kwrest_arg ? 1 : 0; + /* &nil: no block accepted (noblock flag in aspec) */ + mrb_bool noblock = args->block_arg == MRB_SYM(nil); + ba = (args->block_arg && !noblock) ? 1 : 0; if (ma > 0x1f || oa > 0x1f || pa > 0x1f || ka > 0x1f) { codegen_error(s, "too many formal arguments"); } - /* (23bits = 5:5:1:5:5:1:1) */ - a = MRB_ARGS_REQ(ma) + /* (24bits = 1:5:5:1:5:5:1:1) */ + a = (noblock ? MRB_ARGS_NOBLOCK() : 0) + | MRB_ARGS_REQ(ma) | MRB_ARGS_OPT(oa) - | (ra? MRB_ARGS_REST() : 0) + | (ra ? MRB_ARGS_REST() : 0) | MRB_ARGS_POST(pa) | MRB_ARGS_KEY(ka, kd) - | (ba? MRB_ARGS_BLOCK() : 0); + | (ba ? MRB_ARGS_BLOCK() : 0); genop_W(s, OP_ENTER, a); - /* (12bits = 5:1:5:1) */ + /* (13bits = 6:1:5:1:1) - Store argument counts for block argument passing (OP_BLKPUSH) */ s->ainfo = (((ma+oa) & 0x3f) << 7) | ((ra & 0x1) << 6) | ((pa & 0x1f) << 1) - | ((ka | kd) ? 1 : 0); - /* generate jump table for optional arguments initializer */ - pos = new_label(s); + | (ka || kd) + | ((ba & 0x1) << 13); + + /* Optional argument default value initialization */ + pos = new_label(s); /* Start of the optional argument jump table. */ for (i=0; i 0) { - genjmp_0(s, OP_JMP); + genjmp_0(s, OP_JMP); /* Jump to skip all default assignments if all optional args are provided. */ } - opt = tree->car->cdr->car; + opt = args->optional_args; /* AST node for optional arguments. */ i = 0; - while (opt) { + while (opt) { /* Iterate through optional arguments. */ int idx; - mrb_sym id = nsym(opt->car->car); + mrb_sym id = node_to_sym(opt->car->car); /* Symbol of the optional argument. */ - dispatch(s, pos+i*3+1); - codegen(s, opt->car->cdr, VAL); + dispatch(s, pos+i*3+1); /* Patch the jump to this argument's default value code. */ + codegen(s, opt->car->cdr, VAL); /* Generate code for the default value expression. */ pop(); - idx = lv_idx(s, id); + idx = lv_idx(s, id); /* Get local variable index. */ if (idx > 0) { - gen_move(s, idx, cursp(), 0); + gen_move(s, idx, cursp(), 0); /* Move default value to the local variable. */ } - else { + else { /* Should not happen for optional args, but handle as upvar if it does. */ gen_getupvar(s, cursp(), id); } i++; opt = opt->cdr; } if (oa > 0) { - dispatch(s, pos+i*3+1); + dispatch(s, pos+i*3+1); /* Patch the final jump to after all default assignments. */ } - /* keyword arguments */ - if (tail) { - node *kwds = tail->cdr->car; - int kwrest = 0; + /* Keyword argument processing */ + if (ka > 0 || kd > 0) { /* Has keyword arguments or keyword rest */ + node *kwds; + int kwrest = kd; /* Flag for keyword rest argument (e.g., **kwargs) */ - if (tail->cdr->cdr->car) { - kwrest = 1; - } - mrb_assert(nint(tail->car) == NODE_ARGS_TAIL); - mrb_assert(node_len(tail) == 4); + kwds = args->keyword_args; while (kwds) { int jmpif_key_p, jmp_def_set = -1; - node *kwd = kwds->car, *def_arg = kwd->cdr->cdr->car; - mrb_sym kwd_sym = nsym(kwd->cdr->car); - - mrb_assert(nint(kwd->car) == NODE_KW_ARG); + node *kwd = kwds->car; + mrb_sym kwd_sym = node_to_sym(kwd->car); /* Direct access to key */ + node *def_arg = kwd->cdr; /* Direct access to value */ if (def_arg) { int idx; - genop_2(s, OP_KEY_P, lv_idx(s, kwd_sym), new_sym(s, kwd_sym)); + genop_2(s, OP_KEY_P, lv_idx(s, kwd_sym), sym_idx(s, kwd_sym)); jmpif_key_p = genjmp2_0(s, OP_JMPIF, lv_idx(s, kwd_sym), NOVAL); codegen(s, def_arg, VAL); pop(); @@ -1464,7 +2478,7 @@ lambda_body(codegen_scope *s, node *tree, int blk) jmp_def_set = genjmp_0(s, OP_JMP); dispatch(s, jmpif_key_p); } - genop_2(s, OP_KARG, lv_idx(s, kwd_sym), new_sym(s, kwd_sym)); + genop_2(s, OP_KARG, lv_idx(s, kwd_sym), sym_idx(s, kwd_sym)); if (jmp_def_set != -1) { dispatch(s, jmp_def_set); } @@ -1472,31 +2486,113 @@ lambda_body(codegen_scope *s, node *tree, int blk) kwds = kwds->cdr; } - if (tail->cdr->car && !kwrest) { - genop_0(s, OP_KEYEND); + /* Check if there are keyword args but no keyword rest */ + int has_keywords = args->keyword_args != NULL; + + if (has_keywords && !kwrest) { /* If there are keyword args but no keyword rest. */ + genop_0(s, OP_KEYEND); /* Signal end of keyword arguments. */ + + /* Reconstruct keyword hash for super to use */ + /* After KEYEND, the hash at kw_pos is empty (all keys deleted by KARG) */ + /* Build a fresh hash from the extracted keyword local variables */ + int kw_dict_pos = ma + oa + ra + pa + 1; + int sp_save = cursp(); + + /* Load key-value pairs for each keyword argument starting at stack position */ + node *kw_list = args->keyword_args; + int num_pairs = 0; + while (kw_list) { + node *kw = kw_list->car; + mrb_sym kw_sym = node_to_sym(kw->car); + + /* Load symbol (key) */ + genop_2(s, OP_LOADSYM, cursp(), sym_idx(s, kw_sym)); + push(); + + /* Load keyword local variable value */ + genop_2(s, OP_MOVE, cursp(), lv_idx(s, kw_sym)); + push(); + + num_pairs++; + kw_list = kw_list->cdr; + } + + /* Create hash at current stack position, then move to keyword dict position */ + if (num_pairs > 0) { + genop_2(s, OP_HASH, sp_save, num_pairs); + genop_2(s, OP_MOVE, kw_dict_pos, sp_save); + } + else { + /* No keyword args, create empty hash */ + genop_2(s, OP_HASH, sp_save, 0); + genop_2(s, OP_MOVE, kw_dict_pos, sp_save); + } + + /* Restore stack pointer */ + s->sp = sp_save; } } - /* argument destructuring */ - if (margs) { - node *n = margs; + /* Block argument processing */ + if (ba) { /* If a block argument (e.g., &blk) is present. */ + mrb_sym bparam = args->block_arg; + pos = ma+oa+ra+pa+(ka||kd); /* Calculate register offset for the block parameter. */ + if (bparam) { /* If it's a named block parameter. */ + int idx = lv_idx(s, bparam); + genop_2(s, OP_MOVE, idx, pos+1); /* Move the block from its argument slot to the local variable. */ + } + } - pos = 1; + /* Argument destructuring for mandatory and post-mandatory arguments */ + if (margs) { /* Mandatory arguments */ + node *n = margs; + pos = 1; /* Start from register 1 (after self). */ while (n) { - if (nint(n->car->car) == NODE_MASGN) { - gen_massignment(s, n->car->cdr->car, pos, NOVAL); + if (node_type(n->car) == NODE_MARG) { /* If the argument is a mass assignment (e.g., |(a,b)| ). */ + struct mrb_ast_masgn_node *masgn_n = (struct mrb_ast_masgn_node*)n->car; + /* Use dedicated parameter destructuring logic instead of general codegen_masgn */ + int nn = 0; + /* Handle pre variables */ + if (masgn_n->pre) { + node *pre = masgn_n->pre; + while (pre) { + int sp = cursp(); + genop_3(s, OP_AREF, sp, pos, nn); + push(); + gen_assignment(s, pre->car, NULL, sp, NOVAL); + pop(); + nn++; + pre = pre->cdr; + } + } + /* For now, only handle simple pre variables - rest/post would need more complex logic */ } pos++; n = n->cdr; } } - if (pargs) { + if (pargs) { /* Post-mandatory arguments */ node *n = pargs; - - pos = ma+oa+ra+1; + pos = ma+oa+ra+1; /* Calculate starting register for post-mandatory args. */ while (n) { - if (nint(n->car->car) == NODE_MASGN) { - gen_massignment(s, n->car->cdr->car, pos, NOVAL); + if (node_type(n->car) == NODE_MARG) { /* If argument is a mass assignment. */ + struct mrb_ast_masgn_node *masgn_n = (struct mrb_ast_masgn_node*)n->car; + /* Use dedicated parameter destructuring logic instead of general codegen_masgn */ + int nn = 0; + /* Handle pre variables */ + if (masgn_n->pre) { + node *pre = masgn_n->pre; + while (pre) { + int sp = cursp(); + genop_3(s, OP_AREF, sp, pos, nn); + push(); + gen_assignment(s, pre->car, NULL, sp, NOVAL); + pop(); + nn++; + pre = pre->cdr; + } + } + /* For now, only handle simple pre variables - rest/post would need more complex logic */ } pos++; n = n->cdr; @@ -1504,59 +2600,166 @@ lambda_body(codegen_scope *s, node *tree, int blk) } } - codegen(s, tree->cdr->car, VAL); - pop(); - if (s->pc > 0) { + /* Generate code for the actual body of the lambda/block. */ + codegen(s, body, VAL); + pop(); /* Pop the result of the body. */ + + /* Implicit return of the last evaluated expression. */ + if (s->pc > 0) { /* Ensure there's some code before adding return. */ gen_return(s, OP_RETURN, cursp()); } + if (blk) { - loop_pop(s, NOVAL); + loop_pop(s, NOVAL); /* Pop the LOOP_BLOCK structure. */ } - scope_finish(s); - return parent->irep->rlen - 1; + scope_finish(s); /* Finalize the IREP for this lambda/block. */ + return parent->irep->rlen - 1; /* Return the index of this IREP in the parent's REP list. */ } +/* + * Generates code for a new lexical scope, typically for class/module definitions + * or the top-level script. + * + * This function handles the creation of a new `codegen_scope`, recursively + * generates code for the body of that scope, and then finalizes the scope + * to produce an `mrb_irep`. + * + * @param s The parent code generation scope. + * @param tree The AST node representing the scope. + * `tree->car` contains the list of local variables for the new scope. + * `tree->cdr` is the body (sequence of expressions) of the scope. + * @param val Unused in this specific function's direct logic for return value, + * but passed to `codegen` for the body. + * @return The index of the newly created `mrb_irep` in the parent scope's `reps` array. + * Returns 0 if `s->irep` is NULL (should not happen in normal operation). + */ static int -scope_body(codegen_scope *s, node *tree, int val) +scope_body(codegen_scope *s, node *locals, node *body, int val) { - codegen_scope *scope = scope_new(s->mrb, s, tree->car); + /* Create a new scope, inheriting from `s`, with local variables from `locals`. */ + codegen_scope *scope = scope_new(s->mrb, s, locals); - codegen(scope, tree->cdr, VAL); - gen_return(scope, OP_RETURN, scope->sp-1); - if (!s->iseq) { + /* Generate code for the body of the scope. */ + codegen(scope, body, val); + + /* If this is the outermost scope (e.g., top-level script), add OP_STOP. */ + if (!s->iseq) { /* s->iseq would be NULL for the initial dummy scope. */ + if (val) { + gen_return(scope, OP_RETURN, scope->sp-1); + } + /* skip RETURN when no_return_value; STOP will terminate VM */ genop_0(scope, OP_STOP); } + else { + /* Ensure the scope returns the value of its last expression. */ + if (val) { + gen_return(scope, OP_RETURN, scope->sp-1); + } + else { + gen_return(scope, OP_RETURN, 0); /* return nil */ + } + } + + /* Finalize the IREP for this scope. */ scope_finish(scope); + if (!s->irep) { - /* should not happen */ + /* This case should ideally not be reached in normal compilation. */ return 0; } + /* Return the index of the newly created IREP in the parent's list of REPs. */ return s->irep->rlen - 1; } +static struct mrb_ast_var_header* +get_var_header(node *n) +{ + if (!n) return NULL; + + /* Try to interpret as variable-sized node */ + struct mrb_ast_var_header *header = (struct mrb_ast_var_header*)n; + return header; +} + +/* Helper to detect splat nodes in variable-sized format */ +static mrb_bool +is_splat_node(node *n) +{ + return (node_type(n) == NODE_SPLAT); +} + static mrb_bool nosplat(node *t) { while (t) { - if (nint(t->car->car) == NODE_SPLAT) return FALSE; + if (is_splat_node(t->car)) return FALSE; t = t->cdr; } return TRUE; } +/* Check if node is a simple literal that can be generated into any register */ +static mrb_bool +is_simple_literal(node *n) +{ + switch (node_type(n)) { + case NODE_INT: + case NODE_NIL: + case NODE_TRUE: + case NODE_FALSE: + return TRUE; + default: + return FALSE; + } +} + +/* Check if all lhs are local variables and get their registers */ +static mrb_bool +all_lvar_pre(codegen_scope *s, node *pre, int *regs, int max) +{ + int i = 0; + while (pre && i < max) { + if (node_type(pre->car) != NODE_LVAR) return FALSE; + int idx = lv_idx(s, var_node(pre->car)->symbol); + if (idx <= 0) return FALSE; /* not a local variable */ + regs[i++] = idx; + pre = pre->cdr; + } + return pre == NULL; /* all processed */ +} + +/* Generate a simple literal directly into a specific register */ +static void +gen_literal_to_reg(codegen_scope *s, node *n, int reg) +{ + switch (node_type(n)) { + case NODE_INT: + gen_int(s, reg, int_node(n)->value); + break; + case NODE_NIL: + genop_1(s, OP_LOADNIL, reg); + break; + case NODE_TRUE: + genop_1(s, OP_LOADTRUE, reg); + break; + case NODE_FALSE: + genop_1(s, OP_LOADFALSE, reg); + break; + default: + break; + } +} + static mrb_sym attrsym(codegen_scope *s, mrb_sym a) { - const char *name; mrb_int len; - char *name2; - - name = mrb_sym_name_len(s->mrb, a, &len); - name2 = (char*)codegen_palloc(s, - (size_t)len - + 1 /* '=' */ - + 1 /* '\0' */ - ); + const char *name = mrb_sym_name_len(s->mrb, a, &len); + char *name2 = (char*)codegen_palloc(s, + (size_t)len + + 1 /* '=' */ + + 1 /* '\0' */ + ); mrb_assert_int_fit(mrb_int, len, size_t, SIZE_MAX); memcpy(name2, name, (size_t)len); name2[len] = '='; @@ -1565,8 +2768,11 @@ attrsym(codegen_scope *s, mrb_sym a) return mrb_intern(s->mrb, name2, len+1); } +/* Maximum number of arguments for a call that can be encoded directly in some opcodes (e.g. OP_SEND). */ #define CALL_MAXARGS 15 +/* Maximum number of elements in a literal array/hash handled by simpler opcodes before needing OP_ARYPUSH/OP_HASHADD. */ #define GEN_LIT_ARY_MAX 64 +/* Stack pointer threshold during value sequence generation; if cursp() exceeds this, intermediate arrays might be formed. */ #define GEN_VAL_STACK_MAX 99 static int @@ -1589,7 +2795,45 @@ gen_values(codegen_scope *s, node *t, int val, int limit) } while (t) { - int is_splat = nint(t->car->car) == NODE_SPLAT; + int is_splat = is_splat_node(t->car); + + /* Optimization: skip or inline literal splat arrays + * - Empty splat (`*[]`/`*zarray`): contributes nothing; skip. + * - Non-empty literal array with no inner splat (`*[a,b]`): inline + * as normal positional args to avoid building/concatenating arrays. + */ + if (is_splat) { + struct mrb_ast_splat_node *splat = splat_node(t->car); + node *sv = splat->value; + if (sv) { + enum node_type nt = node_type(sv); + if (nt == NODE_ARRAY) { + struct mrb_ast_array_node *an = array_node(sv); + if (an->elements == NULL) { + /* empty splat; contributes nothing */ + t = t->cdr; + continue; + } + else if (nosplat(an->elements)) { + /* Inline non-empty literal array elements as regular args */ + node *e = an->elements; + while (e) { + /* Honor evaluation order */ + codegen(s, e->car, val); + n++; + e = e->cdr; + } + t = t->cdr; + continue; + } + } + else if (nt == NODE_ZARRAY) { + /* explicit empty array literal */ + t = t->cdr; + continue; + } + } + } if (is_splat || cursp() >= slimit) { /* flush stack */ pop_n(n); @@ -1648,7 +2892,7 @@ gen_hash(codegen_scope *s, node *tree, int val, int limit) mrb_bool first = TRUE; while (tree) { - if (nint(tree->car->car->car) == NODE_KW_REST_ARGS) { + if (node_to_sym(tree->car->car) == MRB_OPSYM(pow)) { if (val && first) { genop_2(s, OP_HASH, cursp(), 0); push(); @@ -1712,292 +2956,143 @@ gen_hash(codegen_scope *s, node *tree, int val, int limit) return len; } + +static void +gen_colon_assign_common(codegen_scope *s, node *rhs, int sp, int val, int idx, int final_op) +{ + if (rhs) { + codegen(s, rhs, VAL); + pop(); + gen_move(s, sp, cursp(), 0); + } + pop(); pop(); + genop_2(s, final_op, cursp(), idx); + if (val) push(); +} + static void -gen_call(codegen_scope *s, node *tree, int val, int safe) +gen_colon2_assign(codegen_scope *s, node *varnode, node *rhs, int sp, int val) { - mrb_sym sym = nsym(tree->cdr->car); - int skip = 0, n = 0, nk = 0, noop = no_optimize(s), noself = 0, blk = 0, sp_save = cursp(); + struct mrb_ast_colon2_node *n = (struct mrb_ast_colon2_node*)varnode; + int idx; - if (!tree->car) { - noself = noop = 1; - push(); - } - else { - codegen(s, tree->car, VAL); /* receiver */ - } - if (safe) { - int recv = cursp()-1; - gen_move(s, cursp(), recv, 1); - skip = genjmp2_0(s, OP_JMPNIL, cursp(), val); + if (sp) { + gen_move(s, cursp(), sp, 0); } - tree = tree->cdr->cdr->car; - if (tree) { - if (tree->car) { /* positional arguments */ - n = gen_values(s, tree->car, VAL, 14); - if (n < 0) { /* variable length */ - noop = 1; /* not operator */ - n = 15; - push(); - } - } - if (tree->cdr->car) { /* keyword arguments */ - noop = 1; - nk = gen_hash(s, tree->cdr->car->cdr, VAL, 14); - if (nk < 0) nk = 15; - } + sp = cursp(); + push(); + codegen(s, n->base, VAL); + idx = sym_idx(s, n->name); + gen_colon_assign_common(s, rhs, sp, val, idx, OP_SETMCNST); +} + +static void +gen_colon3_assign(codegen_scope *s, node *varnode, node *rhs, int sp, int val) +{ + struct mrb_ast_colon3_node *n = (struct mrb_ast_colon3_node*)varnode; + int idx; + + if (sp) { + gen_move(s, cursp(), sp, 0); } - if (tree && tree->cdr && tree->cdr->cdr) { - codegen(s, tree->cdr->cdr, VAL); + sp = cursp(); + push(); + genop_1(s, OP_OCLASS, cursp()); + push(); + idx = sym_idx(s, n->name); + gen_colon_assign_common(s, rhs, sp, val, idx, OP_SETCONST); +} + +static void +gen_xvar_assignment(codegen_scope *s, node *tree, node *rhs, int sp, int val, uint8_t op) +{ + struct mrb_ast_var_node *var = (struct mrb_ast_var_node*)tree; + if (rhs) { + codegen(s, rhs, VAL); pop(); - noop = 1; - blk = 1; - } - push();pop(); - s->sp = sp_save; - if (!noop && sym == MRB_OPSYM_2(s->mrb, add) && n == 1) { - gen_addsub(s, OP_ADD, cursp()); - } - else if (!noop && sym == MRB_OPSYM_2(s->mrb, sub) && n == 1) { - gen_addsub(s, OP_SUB, cursp()); - } - else if (!noop && sym == MRB_OPSYM_2(s->mrb, mul) && n == 1) { - gen_muldiv(s, OP_MUL, cursp()); - } - else if (!noop && sym == MRB_OPSYM_2(s->mrb, div) && n == 1) { - gen_muldiv(s, OP_DIV, cursp()); - } - else if (!noop && sym == MRB_OPSYM_2(s->mrb, lt) && n == 1) { - genop_1(s, OP_LT, cursp()); - } - else if (!noop && sym == MRB_OPSYM_2(s->mrb, le) && n == 1) { - genop_1(s, OP_LE, cursp()); - } - else if (!noop && sym == MRB_OPSYM_2(s->mrb, gt) && n == 1) { - genop_1(s, OP_GT, cursp()); - } - else if (!noop && sym == MRB_OPSYM_2(s->mrb, ge) && n == 1) { - genop_1(s, OP_GE, cursp()); - } - else if (!noop && sym == MRB_OPSYM_2(s->mrb, eq) && n == 1) { - genop_1(s, OP_EQ, cursp()); - } - else if (!noop && sym == MRB_OPSYM_2(s->mrb, aset) && n == 2) { - genop_1(s, OP_SETIDX, cursp()); - } - else if (!noop && n == 0 && gen_uniop(s, sym, cursp())) { - /* constant folding succeeded */ - } - else if (!noop && n == 1 && gen_binop(s, sym, cursp())) { - /* constant folding succeeded */ - } - else if (noself){ - genop_3(s, blk ? OP_SSENDB : OP_SSEND, cursp(), new_sym(s, sym), n|(nk<<4)); - } - else { - genop_3(s, blk ? OP_SENDB : OP_SEND, cursp(), new_sym(s, sym), n|(nk<<4)); - } - if (safe) { - dispatch(s, skip); - } - if (val) { - push(); + sp = cursp(); } + gen_setxv(s, op, sp, var->symbol, val); +} + +static void +gen_xvar(codegen_scope *s, mrb_sym sym, int val, uint8_t op) +{ + if (!val) return; + int i = sym_idx(s, sym); + + genop_2(s, op, cursp(), i); + push(); } static void gen_assignment(codegen_scope *s, node *tree, node *rhs, int sp, int val) { int idx; - int type = nint(tree->car); - switch (type) { - case NODE_GVAR: - case NODE_ARG: - case NODE_LVAR: - case NODE_IVAR: - case NODE_CVAR: - case NODE_CONST: + /* Check if this is a variable-sized node first */ + enum node_type var_type = node_type(tree); + switch (var_type) { case NODE_NIL: - case NODE_MASGN: if (rhs) { codegen(s, rhs, VAL); pop(); sp = cursp(); } + /* NODE_NIL assignment is complete - just break (splat without assignment) */ break; - case NODE_COLON2: + gen_colon2_assign(s, tree, rhs, sp, val); + return; case NODE_COLON3: - case NODE_CALL: - case NODE_SCALL: - /* keep evaluation order */ - break; - - case NODE_NVAR: - /* never happens; should have already checked in the parser */ - codegen_error(s, "Can't assign to numbered parameter"); - break; - - default: - codegen_error(s, "unknown lhs"); - break; - } - - tree = tree->cdr; - switch (type) { + gen_colon3_assign(s, tree, rhs, sp, val); + return; case NODE_GVAR: - gen_setxv(s, OP_SETGV, sp, nsym(tree), val); - break; - case NODE_ARG: - case NODE_LVAR: - idx = lv_idx(s, nsym(tree)); - if (idx > 0) { - if (idx != sp) { - gen_move(s, idx, sp, val); - } - break; - } - else { /* upvar */ - gen_setupvar(s, sp, nsym(tree)); - } + gen_xvar_assignment(s, tree, rhs, sp, val, OP_SETGV); break; case NODE_IVAR: - gen_setxv(s, OP_SETIV, sp, nsym(tree), val); + gen_xvar_assignment(s, tree, rhs, sp, val, OP_SETIV); break; case NODE_CVAR: - gen_setxv(s, OP_SETCV, sp, nsym(tree), val); + gen_xvar_assignment(s, tree, rhs, sp, val, OP_SETCV); break; case NODE_CONST: - gen_setxv(s, OP_SETCONST, sp, nsym(tree), val); - break; - case NODE_COLON2: - case NODE_COLON3: - if (sp) { - gen_move(s, cursp(), sp, 0); - } - sp = cursp(); - push(); - if (type == NODE_COLON2) { - codegen(s, tree->car, VAL); - idx = new_sym(s, nsym(tree->cdr)); - } - else { /* NODE_COLON3 */ - genop_1(s, OP_OCLASS, cursp()); - push(); - idx = new_sym(s, nsym(tree)); - } - if (rhs) { - codegen(s, rhs, VAL); pop(); - gen_move(s, sp, cursp(), 0); - } - pop_n(2); - genop_2(s, OP_SETMCNST, sp, idx); + gen_xvar_assignment(s, tree, rhs, sp, val, OP_SETCONST); break; - - case NODE_CALL: - case NODE_SCALL: + case NODE_MASGN: + case NODE_MARG: + /* Multiple assignment: expressions (MASGN) and parameter destructuring (MARG) */ + codegen_masgn(s, tree, rhs, sp, val); + return; + case NODE_LVAR: { - int noself = 0, safe = (type == NODE_SCALL), skip = 0, top, call, n = 0; - mrb_sym mid = nsym(tree->cdr->car); - - top = cursp(); - if (val || sp == cursp()) { - push(); /* room for retval */ - } - call = cursp(); - if (!tree->car) { - noself = 1; - push(); + mrb_sym sym = var_node(tree)->symbol; + if (rhs) { + codegen(s, rhs, VAL); + pop(); + sp = cursp(); } - else { - codegen(s, tree->car, VAL); /* receiver */ - } - if (safe) { - int recv = cursp()-1; - gen_move(s, cursp(), recv, 1); - skip = genjmp2_0(s, OP_JMPNIL, cursp(), val); - } - tree = tree->cdr->cdr->car; - if (tree) { - if (tree->car) { /* positional arguments */ - n = gen_values(s, tree->car, VAL, (tree->cdr->car)?13:14); - if (n < 0) { /* variable length */ - n = 15; - push(); - } - } - if (tree->cdr->car) { /* keyword arguments */ - if (n == 13 || n == 14) { - pop_n(n); - genop_2(s, OP_ARRAY, cursp(), n); - push(); - n = 15; - } - gen_hash(s, tree->cdr->car->cdr, VAL, 0); - if (n < 14) { - n++; - } - else { - pop_n(2); - genop_2(s, OP_ARYPUSH, cursp(), 1); - } - push(); - } - } - if (rhs) { - codegen(s, rhs, VAL); - pop(); - } - else { - gen_move(s, cursp(), sp, 0); - } - if (val) { - gen_move(s, top, cursp(), 1); - } - if (n < 15) { - n++; - if (n == 15) { - pop_n(14); - genop_2(s, OP_ARRAY, cursp(), 15); + idx = lv_idx(s, sym); + if (idx > 0) { + if (idx != sp) { + gen_move(s, idx, sp, val); } + break; } else { - pop(); - genop_2(s, OP_ARYPUSH, cursp(), 1); - } - push(); pop(); - s->sp = call; - if (mid == MRB_OPSYM_2(s->mrb, aref) && n == 2) { - push_n(4); pop_n(4); /* self + idx + value + (invisible block for OP_SEND) */ - genop_1(s, OP_SETIDX, cursp()); - } - else { - int st = 2 /* self + block */ + - (((n >> 0) & 0x0f) < 15 ? ((n >> 0) & 0x0f) : 1) + - (((n >> 4) & 0x0f) < 15 ? ((n >> 4) & 0x0f) * 2 : 1); - push_n(st); pop_n(st); - genop_3(s, noself ? OP_SSEND : OP_SEND, cursp(), new_sym(s, attrsym(s, mid)), n); - } - if (safe) { - dispatch(s, skip); + gen_setupvar(s, sp, sym); } - s->sp = top; } break; - - case NODE_MASGN: - gen_massignment(s, tree->car, sp, val); - break; - - /* splat without assignment */ - case NODE_NIL: - break; - + case NODE_CALL: + codegen_call_assign(s, tree, rhs, sp, val); + return; default: - codegen_error(s, "unknown lhs"); + codegen_error(s, "unsupported variable-sized lhs"); break; } if (val) push(); + return; } static void @@ -2073,71 +3168,136 @@ static void gen_literal_array(codegen_scope *s, node *tree, mrb_bool sym, int val) { if (val) { - int i = 0, j = 0, gen = 0; - - while (tree) { - switch (nint(tree->car->car)) { - case NODE_STR: - if ((tree->cdr == NULL) && (nint(tree->car->cdr->cdr) == 0)) - break; - /* fall through */ - case NODE_BEGIN: - codegen(s, tree->car, VAL); - ++j; - break; + int array_size = 0; + int first = 1; + int slimit = GEN_LIT_ARY_MAX; + node *current = tree; + + if (cursp() >= slimit) slimit = GEN_VAL_STACK_MAX; + + /* Process each segment separated by NODE_LITERAL_DELIM */ + while (current) { + /* Find the segment boundaries without allocating */ + node *segment_start = current; + node *segment_prev = NULL; + + /* Find end of segment (delimiter or end of list) */ + while (current && !IS_LITERAL_DELIM(current)) { + segment_prev = current; + current = current->cdr; + } + + /* Process the segment if it has content */ + if (segment_start != current) { + /* Check if this is an empty string segment (for %w[] case) */ + mrb_bool is_empty_segment = TRUE; + node *check = segment_start; + while (check != current) { + if (check->car) { + mrb_int len = node_to_int(check->car->car); + if (len > 0) { + is_empty_segment = FALSE; + break; + } + else if (len < 0) { + /* Expression node - not empty */ + is_empty_segment = FALSE; + break; + } + /* len == 0 means empty string, continue checking */ + } + check = check->cdr; + } + + /* Only process non-empty segments */ + if (!is_empty_segment) { + /* Flush accumulated elements when stack is full */ + if (cursp() >= slimit) { + if (array_size > 0) { + pop_n(array_size); + if (first) { + genop_2(s, OP_ARRAY, cursp(), array_size); + push(); + first = 0; + } + else { + pop(); + genop_2(s, OP_ARYPUSH, cursp(), array_size); + push(); + } + array_size = 0; + } + } + + /* Temporarily terminate the segment by saving and clearing the cdr */ + node *saved_cdr = NULL; + if (segment_prev) { + saved_cdr = segment_prev->cdr; + segment_prev->cdr = NULL; + } + + /* Use gen_string for this segment */ + gen_string(s, segment_start, VAL); + + /* Restore the original cdr */ + if (segment_prev) { + segment_prev->cdr = saved_cdr; + } - case NODE_LITERAL_DELIM: - if (j > 0) { - j = 0; - ++i; - if (sym) + /* Apply symbol conversion if needed */ + if (sym) { gen_intern(s); + } + + array_size++; } - break; - } - while (j >= 2) { - pop(); pop(); - genop_1(s, OP_STRCAT, cursp()); - push(); - j--; } - if (i > GEN_LIT_ARY_MAX) { - pop_n(i); - if (gen) { - pop(); - genop_2(s, OP_ARYPUSH, cursp(), i); - } - else { - genop_2(s, OP_ARRAY, cursp(), i); - gen = 1; - } - push(); - i = 0; + + /* Skip the delimiter if present */ + if (current && IS_LITERAL_DELIM(current)) { + current = current->cdr; } - tree = tree->cdr; } - if (j > 0) { - ++i; - if (sym) - gen_intern(s); + + /* Handle remaining elements */ + if (!first) { + if (array_size > 0) { + pop_n(array_size + 1); + genop_2(s, OP_ARYPUSH, cursp(), array_size); + } } - pop_n(i); - if (gen) { - pop(); - genop_2(s, OP_ARYPUSH, cursp(), i); + else if (array_size > 0) { + pop_n(array_size); + genop_2(s, OP_ARRAY, cursp(), array_size); } else { - genop_2(s, OP_ARRAY, cursp(), i); + genop_2(s, OP_ARRAY, cursp(), 0); } push(); } else { - while (tree) { - switch (nint(tree->car->car)) { - case NODE_BEGIN: case NODE_BLOCK: - codegen(s, tree->car, NOVAL); + /* NOVAL case: only evaluate expressions for side effects */ + node *current = tree; + + while (current) { + /* Process nodes until delimiter */ + while (current && !IS_LITERAL_DELIM(current)) { + node *elem = current->car; + if (elem) { + mrb_int len = node_to_int(elem->car); + if (len < 0) { + /* Expression: (-1 . node) - evaluate for side effects */ + codegen(s, (node*)elem->cdr, NOVAL); + } + /* String literals: (len . str) - no side effects, skip */ + } + current = current->cdr; + } + + /* Skip delimiter */ + if (current && IS_LITERAL_DELIM(current)) { + current = current->cdr; } - tree = tree->cdr; } } } @@ -2150,59 +3310,13 @@ raise_error(codegen_scope *s, const char *msg) genop_1(s, OP_ERR, idx); } -static mrb_int -readint(codegen_scope *s, const char *p, int base, mrb_bool neg, mrb_bool *overflow) -{ - const char *e = p + strlen(p); - mrb_int result = 0; - - mrb_assert(base >= 2 && base <= 16); - if (*p == '+') p++; - while (p < e) { - int n; - char c = *p; - switch (c) { - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - n = c - '0'; break; - case '8': case '9': - n = c - '0'; break; - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - n = c - 'a' + 10; break; - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - n = c - 'A' + 10; break; - default: - codegen_error(s, "malformed readint input"); - *overflow = TRUE; - /* not reached */ - return result; - } - if (mrb_int_mul_overflow(result, base, &result)) { - overflow: - *overflow = TRUE; - return 0; - } - mrb_uint tmp = ((mrb_uint)result)+n; - if (neg && tmp == (mrb_uint)MRB_INT_MAX+1) { - *overflow = FALSE; - return MRB_INT_MIN; - } - if (tmp > MRB_INT_MAX) goto overflow; - result = (mrb_int)tmp; - p++; - } - *overflow = FALSE; - if (neg) return -result; - return result; -} - static void gen_retval(codegen_scope *s, node *tree) { - if (nint(tree->car) == NODE_SPLAT) { + if (is_splat_node(tree)) { codegen(s, tree, VAL); pop(); - genop_1(s, OP_ARYDUP, cursp()); + genop_1(s, OP_ARYSPLAT, cursp()); } else { codegen(s, tree, VAL); @@ -2213,11 +3327,13 @@ gen_retval(codegen_scope *s, node *tree) static mrb_bool true_always(node *tree) { - switch (nint(tree->car)) { - case NODE_TRUE: + /* Check if this is a variable-sized node first */ + enum node_type var_type = node_type(tree); + switch (var_type) { case NODE_INT: - case NODE_STR: - case NODE_SYM: + case NODE_BIGINT: + case NODE_FLOAT: + case NODE_TRUE: return TRUE; default: return FALSE; @@ -2227,7 +3343,8 @@ true_always(node *tree) static mrb_bool false_always(node *tree) { - switch (nint(tree->car)) { + /* Check variable-sized nodes that are always false */ + switch (node_type(tree)) { case NODE_FALSE: case NODE_NIL: return TRUE; @@ -2254,1545 +3371,3579 @@ gen_blkmove(codegen_scope *s, uint16_t ainfo, int lv) } static void -codegen(codegen_scope *s, node *tree, int val) +gen_lvar(codegen_scope *s, mrb_sym sym, int val) { - int nt; - int rlev = s->rlev; + if (!val) return; + int idx = lv_idx(s, sym); - if (!tree) { - if (val) { - genop_1(s, OP_LOADNIL, cursp()); - push(); - } - return; + if (idx > 0) { + gen_move(s, cursp(), idx, val); } - - s->rlev++; - if (s->rlev > MRB_CODEGEN_LEVEL_MAX) { - codegen_error(s, "too complex expression"); + else { + gen_getupvar(s, cursp(), sym); } - if (s->irep && s->filename_index != tree->filename_index) { - mrb_sym fname = mrb_parser_get_filename(s->parser, s->filename_index); - const char *filename = mrb_sym_name_len(s->mrb, fname, NULL); + push(); +} - mrb_debug_info_append_file(s->mrb, s->irep->debug_info, - filename, s->lines, s->debug_start_pos, s->pc); - s->debug_start_pos = s->pc; - s->filename_index = tree->filename_index; - s->filename_sym = mrb_parser_get_filename(s->parser, tree->filename_index); - } +static void +codegen_hash(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_hash_node *hash = hash_node(varnode); + node *pairs = hash->pairs; + int regular_pairs = 0; + mrb_bool update = FALSE; + mrb_bool first = TRUE; - nt = nint(tree->car); - s->lineno = tree->lineno; - tree = tree->cdr; - switch (nt) { - case NODE_BEGIN: - if (val && !tree) { - genop_1(s, OP_LOADNIL, cursp()); - push(); - } - while (tree) { - codegen(s, tree->car, tree->cdr ? NOVAL : val); - tree = tree->cdr; - } - break; + if (!val) return; - case NODE_RESCUE: - { - int noexc; - uint32_t exend, pos1, pos2, tmp; - struct loopinfo *lp; - int catch_entry, begin, end; + if (!pairs) { + genop_2(s, OP_HASH, cursp(), 0); + push(); + return; + } - if (tree->car == NULL) goto exit; - lp = loop_push(s, LOOP_BEGIN); - lp->pc0 = new_label(s); - catch_entry = catch_handler_new(s); - begin = s->pc; - codegen(s, tree->car, VAL); - pop(); - lp->type = LOOP_RESCUE; - end = s->pc; - noexc = genjmp_0(s, OP_JMP); - catch_handler_set(s, catch_entry, MRB_CATCH_RESCUE, begin, end, s->pc); - tree = tree->cdr; - exend = JMPLINK_START; - pos1 = JMPLINK_START; - if (tree->car) { - node *n2 = tree->car; - int exc = cursp(); - - genop_1(s, OP_EXCEPT, exc); - push(); - while (n2) { - node *n3 = n2->car; - node *n4 = n3->car; - - dispatch(s, pos1); - pos2 = JMPLINK_START; - do { - if (n4 && n4->car && nint(n4->car->car) == NODE_SPLAT) { - codegen(s, n4->car, VAL); - gen_move(s, cursp(), exc, 0); - push_n(2); pop_n(2); /* space for one arg and a block */ - pop(); - genop_3(s, OP_SEND, cursp(), new_sym(s, MRB_SYM_2(s->mrb, __case_eqq)), 1); - } - else { - if (n4) { - codegen(s, n4->car, VAL); - } - else { - genop_2(s, OP_GETCONST, cursp(), new_sym(s, MRB_SYM_2(s->mrb, StandardError))); - push(); - } - pop(); - genop_2(s, OP_RESCUE, exc, cursp()); - } - tmp = genjmp2(s, OP_JMPIF, cursp(), pos2, val); - pos2 = tmp; - if (n4) { - n4 = n4->cdr; - } - } while (n4); - pos1 = genjmp_0(s, OP_JMP); - dispatch_linked(s, pos2); + /* Process each key-value pair using cons-list iteration, handling double-splat (**) cases */ + node *current = pairs; + while (current) { + /* Each current->car is a cons (key . value) */ + node *pair = current->car; + struct mrb_ast_node *key = pair->car; + struct mrb_ast_node *value = pair->cdr; - pop(); - if (n3->cdr->car) { - gen_assignment(s, n3->cdr->car, NULL, exc, NOVAL); - } - if (n3->cdr->cdr->car) { - codegen(s, n3->cdr->cdr->car, val); - if (val) pop(); - } - tmp = genjmp(s, OP_JMP, exend); - exend = tmp; - n2 = n2->cdr; - push(); + /* Check if this is a double-splat (**kwargs) */ + if (node_to_sym(key) == MRB_OPSYM(pow)) { + /* Flush any accumulated regular pairs first */ + if (val && first && regular_pairs == 0) { + /* First element is splat - create empty hash */ + genop_2(s, OP_HASH, cursp(), 0); + push(); + update = TRUE; + } + else if (val && regular_pairs > 0) { + /* Create/add hash from accumulated pairs */ + pop_n(regular_pairs * 2); + if (!update) { + genop_2(s, OP_HASH, cursp(), regular_pairs); } - if (pos1 != JMPLINK_START) { - dispatch(s, pos1); - genop_1(s, OP_RAISEIF, exc); + else { + pop(); + genop_2(s, OP_HASHADD, cursp(), regular_pairs); } + push(); } - pop(); - tree = tree->cdr; - dispatch(s, noexc); - if (tree->car) { - codegen(s, tree->car, val); - } - else if (val) { + + /* Generate the splat hash */ + codegen(s, value, val); + + /* Merge the splat hash */ + if (val && (regular_pairs > 0 || update)) { + pop(); pop(); + genop_1(s, OP_HASHCAT, cursp()); push(); } - dispatch_linked(s, exend); - loop_pop(s, NOVAL); + + update = TRUE; + regular_pairs = 0; } - break; + else { + /* Regular key-value pair */ + codegen(s, key, val); + codegen(s, value, val); + regular_pairs++; + } + first = FALSE; - case NODE_ENSURE: - if (!tree->cdr || !tree->cdr->cdr || - (nint(tree->cdr->cdr->car) == NODE_BEGIN && - tree->cdr->cdr->cdr)) { - int catch_entry, begin, end, target; - int idx; + current = current->cdr; + } - catch_entry = catch_handler_new(s); - begin = s->pc; - codegen(s, tree->car, val); - end = target = s->pc; - push(); - idx = cursp(); - genop_1(s, OP_EXCEPT, idx); + /* Handle any remaining regular pairs */ + if (val) { + if (!update && regular_pairs > 0) { + /* Simple case: no splats, just create hash */ + pop_n(regular_pairs * 2); + genop_2(s, OP_HASH, cursp(), regular_pairs); push(); - codegen(s, tree->cdr->cdr, NOVAL); - pop(); - genop_1(s, OP_RAISEIF, idx); - pop(); - catch_handler_set(s, catch_entry, MRB_CATCH_ENSURE, begin, end, target); } - else { /* empty ensure ignored */ - codegen(s, tree->car, val); + else if (update && regular_pairs > 0) { + /* Add remaining pairs to existing hash */ + pop_n(regular_pairs * 2 + 1); + genop_2(s, OP_HASHADD, cursp(), regular_pairs); + push(); } - break; + } +} - case NODE_LAMBDA: - if (val) { - int idx = lambda_body(s, tree, 1); - genop_2(s, OP_LAMBDA, cursp(), idx); - push(); - } - break; - case NODE_BLOCK: - if (val) { - int idx = lambda_body(s, tree, 1); +/* Common function to generate bytecode for cons list string representation + * Handles list of elements where each element is either: + * - (len . str) for string literals + * - (-1 . node) for expressions that need evaluation + */ +/* Common function to generate bytecode for cons list string representation + * Handles list of elements where each element is either: + * - (len . str) for string literals + * - (-1 . node) for expressions that need evaluation + */ +/* Common function to generate bytecode for cons list string representation + * Handles list of elements where each element is either: + * - (len . str) for string literals + * - (-1 . node) for expressions that need evaluation + */ +/* Common function to generate bytecode for cons list string representation + * Handles list of elements where each element is either: + * - (len . str) for string literals + * - (-1 . node) for expressions that need evaluation + */ +static void +gen_string(codegen_scope *s, node *list, int val) +{ + if (val) { + /* Handle as cons list of string parts with safety checks */ + node *n = list; + mrb_bool first = TRUE; - genop_2(s, OP_BLOCK, cursp(), idx); - push(); - } - break; + while (n) { + node *elem = n->car; + if (!elem) break; - case NODE_IF: - { - uint32_t pos1, pos2; - mrb_bool nil_p = FALSE; - node *elsepart = tree->cdr->cdr->car; - - if (!tree->car) { - codegen(s, elsepart, val); - goto exit; - } - if (true_always(tree->car)) { - codegen(s, tree->cdr->car, val); - goto exit; - } - if (false_always(tree->car)) { - codegen(s, elsepart, val); - goto exit; - } - if (nint(tree->car->car) == NODE_CALL) { - node *n = tree->car->cdr; - mrb_sym mid = nsym(n->cdr->car); - mrb_sym sym_nil_p = MRB_SYM_Q_2(s->mrb, nil); - if (mid == sym_nil_p && n->cdr->cdr->car == NULL) { - nil_p = TRUE; - codegen(s, n->car, VAL); - } - } - if (!nil_p) { - codegen(s, tree->car, VAL); - } - pop(); - if (val || tree->cdr->car) { - if (nil_p) { - pos2 = genjmp2_0(s, OP_JMPNIL, cursp(), val); - pos1 = genjmp_0(s, OP_JMP); - dispatch(s, pos2); - } - else { - pos1 = genjmp2_0(s, OP_JMPNOT, cursp(), val); - } - codegen(s, tree->cdr->car, val); - if (val) pop(); - if (elsepart || val) { - pos2 = genjmp_0(s, OP_JMP); - dispatch(s, pos1); - codegen(s, elsepart, val); - dispatch(s, pos2); - } - else { - dispatch(s, pos1); - } + mrb_int len = node_to_int(elem->car); + + if (len >= 0) { + /* String literal: (len . str) */ + const char *str = (char*)elem->cdr; + if (!str) {str = ""; len = 0;} + int off = new_lit_str(s, str, len); + genop_2(s, OP_STRING, cursp(), off); + push(); } - else { /* empty then-part */ - if (elsepart) { - if (nil_p) { - pos1 = genjmp2_0(s, OP_JMPNIL, cursp(), val); - } - else { - pos1 = genjmp2_0(s, OP_JMPIF, cursp(), val); - } - codegen(s, elsepart, val); - dispatch(s, pos1); - } - else if (val && !nil_p) { - genop_1(s, OP_LOADNIL, cursp()); - push(); - } + else { + /* Expression: (-1 . node) */ + codegen(s, (node*)elem->cdr, VAL); } - } - break; - - case NODE_AND: - { - uint32_t pos; - if (true_always(tree->car)) { - codegen(s, tree->cdr, val); - goto exit; + /* Concatenate with previous parts (except for first element) */ + if (!first) { + pop(); pop(); + genop_1(s, OP_STRCAT, cursp()); + push(); } - if (false_always(tree->car)) { - codegen(s, tree->car, val); - goto exit; + else { + first = FALSE; } - codegen(s, tree->car, VAL); - pop(); - pos = genjmp2_0(s, OP_JMPNOT, cursp(), val); - codegen(s, tree->cdr, val); - dispatch(s, pos); - } - break; - case NODE_OR: - { - uint32_t pos; + n = n->cdr; + } - if (true_always(tree->car)) { - codegen(s, tree->car, val); - goto exit; - } - if (false_always(tree->car)) { - codegen(s, tree->cdr, val); - goto exit; - } - codegen(s, tree->car, VAL); - pop(); - pos = genjmp2_0(s, OP_JMPIF, cursp(), val); - codegen(s, tree->cdr, val); - dispatch(s, pos); + /* Handle empty list case */ + if (first) { + gen_load_nil(s, 1); } - break; + } + else { + /* NOVAL case: only evaluate expressions for side effects */ + node *n = list; + while (n) { + node *elem = n->car; + if (!elem) break; + if (node_to_int(elem->car) < 0) { + /* Expression: (-1 . node) - evaluate for side effects */ + codegen(s, (node*)elem->cdr, NOVAL); + } + /* String literals: (len . str) - no side effects, skip */ + n = n->cdr; + } + } +} - case NODE_WHILE: - case NODE_UNTIL: - { - if (true_always(tree->car)) { - if (nt == NODE_UNTIL) { - if (val) { + +/* Handle variable-sized node types */ +static void +codegen_call(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_call_node *call = call_node(varnode); + mrb_sym sym = call->method_name; + int skip = 0, n = 0, nk = 0, noop = no_optimize(s), noself = 0, blk = 0, sp_save = cursp(); + enum mrb_insn opt_op = OP_NOP; + int safe = call->safe_call; + node *args = call->args; + + if (!noop) { + if (sym == MRB_OPSYM(add)) opt_op = OP_ADD; + else if (sym == MRB_OPSYM(sub)) opt_op = OP_SUB; + else if (sym == MRB_OPSYM(mul)) opt_op = OP_MUL; + else if (sym == MRB_OPSYM(div)) opt_op = OP_DIV; + else if (sym == MRB_OPSYM(lt)) opt_op = OP_LT; + else if (sym == MRB_OPSYM(le)) opt_op = OP_LE; + else if (sym == MRB_OPSYM(gt)) opt_op = OP_GT; + else if (sym == MRB_OPSYM(ge)) opt_op = OP_GE; + else if (sym == MRB_OPSYM(eq)) opt_op = OP_EQ; + else if (sym == MRB_OPSYM(aref)) opt_op = OP_GETIDX; + else if (sym == MRB_OPSYM(aset)) opt_op = OP_SETIDX; + } + + if (!call->receiver || (opt_op == OP_NOP && node_type(call->receiver) == NODE_SELF)) { + noself = 1; + push(); + } + else { + codegen(s, call->receiver, VAL); /* receiver */ + } + + if (safe) { + int recv = cursp()-1; + gen_move(s, cursp(), recv, 1); + skip = genjmp2_0(s, OP_JMPNIL, cursp(), val); + } + + /* Generate arguments - use gen_values to properly handle splat */ + if (args) { + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)args; + if (callargs->regular_args) { + n = gen_values(s, callargs->regular_args, VAL, 14); + if (n < 0) { /* variable length (contains splat) */ + n = 15; + push(); + noop = 1; + } + } + + /* Handle keyword arguments if present */ + if (callargs->keyword_args) { + nk = gen_hash(s, callargs->keyword_args, VAL, 14); + if (nk < 0) { + nk = 15; + } + noop = 1; + } + + /* Handle block if present */ + if (callargs->block_arg) { + codegen(s, callargs->block_arg, VAL); + pop(); + blk = 1; + noop = 1; + } + } + + push(); + s->sp = sp_save; + + /* Apply optimizations */ + if (opt_op == OP_ADD && n == 1) { + gen_addsub(s, OP_ADD, cursp()); + } + else if (opt_op == OP_SUB && n == 1) { + gen_addsub(s, OP_SUB, cursp()); + } + else if (opt_op == OP_MUL && n == 1) { + gen_muldiv(s, OP_MUL, cursp()); + } + else if (opt_op == OP_DIV && n == 1) { + gen_muldiv(s, OP_DIV, cursp()); + } + else if (opt_op == OP_LT && n == 1) { + genop_1(s, OP_LT, cursp()); + } + else if (opt_op == OP_LE && n == 1) { + genop_1(s, OP_LE, cursp()); + } + else if (opt_op == OP_GT && n == 1) { + genop_1(s, OP_GT, cursp()); + } + else if (opt_op == OP_GE && n == 1) { + genop_1(s, OP_GE, cursp()); + } + else if (opt_op == OP_EQ && n == 1) { + genop_1(s, OP_EQ, cursp()); + } + else if (opt_op == OP_SETIDX && n == 2) { + genop_1(s, OP_SETIDX, cursp()); + } + else if (!noop && n == 0 && gen_uniop(s, sym, cursp())) { + /* constant folding succeeded */ + } + else if (!noop && n == 1 && gen_binop(s, sym, cursp())) { + /* constant folding succeeded */ + } + else if (noself) { + if (!blk && n == 0 && nk == 0) { + genop_2(s, OP_SSEND0, cursp(), sym_idx(s, sym)); + } + else { + genop_3(s, blk ? OP_SSENDB : OP_SSEND, cursp(), sym_idx(s, sym), n|(nk<<4)); + } + } + else if (!blk && n == 0 && nk == 0) { + genop_2(s, OP_SEND0, cursp(), sym_idx(s, sym)); + } + else { + genop_3(s, blk ? OP_SENDB : OP_SEND, cursp(), sym_idx(s, sym), n|(nk<<4)); + } + + if (safe) { + dispatch(s, skip); + } + if (!val) return; + push(); +} + +static void +codegen_call_assign(codegen_scope *s, node *varnode, node *rhs, int sp, int val) +{ + enum node_type var_type = NODE_TYPE(varnode); + int noself = 0, safe = 0, skip = 0, top, callsp, n = 0, nk = 0; + mrb_sym mid = 0; + node *args = NULL; + node *receiver = NULL; + enum mrb_insn opt_op = OP_NOP; + int noop = no_optimize(s); + + /* Extract information based on node type */ + if (var_type == NODE_CALL) { + struct mrb_ast_call_node *call = call_node(varnode); + mid = call->method_name; + args = call->args; + receiver = call->receiver; + safe = call->safe_call; + } + else { + codegen_error(s, "unsupported call type in assignment"); + return; + } + + /* Convert method name to assignment form (e.g., [] -> []=) */ + mrb_sym assign_mid = attrsym(s, mid); + + /* Check for optimizable operations */ + if (!noop) { + if (mid == MRB_OPSYM(aref)) opt_op = OP_SETIDX; + } + + top = cursp(); + if (val || sp == cursp()) { + push(); /* room for retval */ + } + callsp = cursp(); + + /* Generate receiver */ + if (!receiver) { + noself = 1; + push(); + } + else { + codegen(s, receiver, VAL); /* receiver */ + } + + /* Handle safe navigation */ + if (safe) { + int recv = cursp()-1; + gen_move(s, cursp(), recv, 1); + skip = genjmp2_0(s, OP_JMPNIL, cursp(), val); + } + + /* Generate arguments from original call */ + if (args) { + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)args; + if (callargs->regular_args) { + node *regular_args = callargs->regular_args; + node *arg_iter = regular_args; + while (arg_iter) { + codegen(s, arg_iter->car, VAL); + n++; + arg_iter = arg_iter->cdr; + } + if (n > 13) { /* leave room for rhs */ + pop_n(n); + genop_2(s, OP_ARRAY, cursp(), n); + push(); + n = 15; + noop = 1; + } + } + + /* Handle keyword arguments if present */ + if (callargs->keyword_args) { + node *kwargs = callargs->keyword_args; + if (n == 13 || n == 14) { + pop_n(n); + genop_2(s, OP_ARRAY, cursp(), n); + push(); + n = 15; + } + gen_hash(s, kwargs->cdr, VAL, 0); + if (n < 14) { + n++; + } + else { + pop_n(2); + genop_2(s, OP_ARYPUSH, cursp(), 1); + } + push(); + noop = 1; + } + } + + /* Generate rhs (the assigned value) */ + if (rhs) { + codegen(s, rhs, VAL); + pop(); + } + else { + /* For compound assignments, move the computed value from sp to cursp() */ + gen_move(s, cursp(), sp, 0); + } + if (val) { + gen_move(s, top, cursp(), 1); + } + /* Account for the value being assigned (either from rhs or already on stack) */ + if (n < 14) { + n++; + } + else { + if (rhs) { + pop_n(2); + genop_2(s, OP_ARYPUSH, cursp(), 1); + push(); + } + } + + /* Generate the optimized instruction or method call */ + push(); push(); + s->sp = callsp; + + if (opt_op == OP_SETIDX && n == 2) { + /* Always preserve return value for SETIDX - assignments return the assigned value */ + genop_1(s, OP_SETIDX, cursp()); + } + else if (noself) { + genop_3(s, OP_SSEND, cursp(), sym_idx(s, assign_mid), n|(nk<<4)); + } + else { + genop_3(s, OP_SEND, cursp(), sym_idx(s, assign_mid), n|(nk<<4)); + } + + if (safe) { + dispatch(s, skip); + } + + /* Restore stack pointer like legacy code */ + s->sp = top; + + if (val) { + push(); + } +} + +static void +codegen_array(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_array_node *array = array_node(varnode); + node *elements = array->elements; + int regular_elements = 0; + int first = 1; + int slimit = GEN_LIT_ARY_MAX; + + if (!val) return; + + if (!elements) { + genop_2(s, OP_ARRAY, cursp(), 0); + push(); + return; + } + + if (cursp() >= slimit) slimit = GEN_VAL_STACK_MAX; + + /* Process each element using cons-list iteration, handling splats */ + node *current = elements; + while (current) { + struct mrb_ast_node *element = current->car; + int is_splat = is_splat_node(element); + + /* Skip splat of an empty literal array: [*[]] => [] without ARYCAT noise */ + if (is_splat) { + struct mrb_ast_splat_node *splat = splat_node(element); + node *sv = splat->value; + if (sv) { + enum node_type nt = node_type(sv); + if (nt == NODE_ARRAY) { + struct mrb_ast_array_node *an = array_node(sv); + if (an->elements == NULL) { + current = current->cdr; + continue; + } + } + else if (nt == NODE_ZARRAY) { + current = current->cdr; + continue; + } + } + } + + if (is_splat || cursp() >= slimit) { /* flush accumulated elements */ + if (regular_elements > 0) { + pop_n(regular_elements); + if (first) { + genop_2(s, OP_ARRAY, cursp(), regular_elements); + push(); + first = 0; + } + else { + pop(); + genop_2(s, OP_ARYPUSH, cursp(), regular_elements); + push(); + } + regular_elements = 0; + } + else if (first && is_splat) { + /* First element is splat - create empty array */ + genop_1(s, OP_LOADNIL, cursp()); + genop_2(s, OP_ARRAY, cursp(), 0); + push(); + first = 0; + } + } + + codegen(s, element, val); + + if (is_splat) { + /* Concatenate splat array */ + pop(); pop(); + genop_1(s, OP_ARYCAT, cursp()); + push(); + } + else { + regular_elements++; + } + + current = current->cdr; + } + + /* Handle any remaining regular elements */ + if (!first) { + /* Variable length - we have an array from splats */ + if (regular_elements > 0) { + pop_n(regular_elements + 1); + genop_2(s, OP_ARYPUSH, cursp(), regular_elements); + push(); + } + } + else { + /* Simple case: no splats, just create array */ + pop_n(regular_elements); + genop_2(s, OP_ARRAY, cursp(), regular_elements); + push(); + } +} + +/* Control flow and definition node codegen functions */ +static mrb_bool +callargs_empty(node *n) +{ + if (!n) return TRUE; + return (callargs_node(n)->regular_args == 0 && callargs_node(n)->keyword_args == 0 && callargs_node(n)->block_arg == 0); +} + +static void +codegen_if(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_if_node *if_n = if_node(varnode); + node *condition = if_n->condition; + node *then_body = if_n->then_body; + node *else_body = if_n->else_body; + uint32_t pos1, pos2; + mrb_bool nil_p = FALSE; + + if (!condition) { + codegen(s, else_body, val); + return; + } + if (true_always(condition)) { + codegen(s, then_body, val); + return; + } + if (false_always(condition)) { + codegen(s, else_body, val); + return; + } + + /* Check for nil? optimization */ + if (node_type(condition) == NODE_CALL) { + /* Variable-sized NODE_CALL */ + struct mrb_ast_call_node *call_n = (struct mrb_ast_call_node*)condition; + mrb_sym sym_nil_p = MRB_SYM_Q(nil); + if (call_n->method_name == sym_nil_p && callargs_empty(call_n->args)) { + nil_p = TRUE; + if (call_n->receiver) { + codegen(s, call_n->receiver, VAL); + } + else { + /* implicit receiver: bare `nil?` means `self.nil?` */ + gen_load_op1(s, OP_LOADSELF, VAL); + } + } + } + + if (!nil_p) { + /* Generate condition code */ + codegen(s, condition, VAL); + } + pop(); + + if (val || then_body) { + if (nil_p) { + pos2 = genjmp2_0(s, OP_JMPNIL, cursp(), val); + pos1 = genjmp_0(s, OP_JMP); + dispatch(s, pos2); + } + else { + pos1 = genjmp2_0(s, OP_JMPNOT, cursp(), val); + } + codegen(s, then_body, val); + if (val) pop(); + if (else_body || val) { + pos2 = genjmp_0(s, OP_JMP); + dispatch(s, pos1); + codegen(s, else_body, val); + dispatch(s, pos2); + } + else { + dispatch(s, pos1); + } + } + else { /* empty then-part */ + if (else_body) { + if (nil_p) { + pos1 = genjmp2_0(s, OP_JMPNIL, cursp(), val); + } + else { + pos1 = genjmp2_0(s, OP_JMPIF, cursp(), val); + } + codegen(s, else_body, val); + dispatch(s, pos1); + } + else if (val && !nil_p) { + gen_load_nil(s, 1); + } + } +} + +/* Shared codegen for while/until pre-tested loops. + is_until: FALSE for while (exit on false), TRUE for until (exit on true) */ +static void +codegen_loop(codegen_scope *s, node *varnode, int val, mrb_bool is_until) +{ + struct mrb_ast_while_node *loop_n = while_node(varnode); + node *condition = loop_n->condition; + node *body = loop_n->body; + + if (is_until ? false_always(condition) : true_always(condition)) { + struct loopinfo *lp = loop_push(s, LOOP_NORMAL); + if (!val) lp->reg = -1; + lp->pc0 = new_label(s); + lp->pc1 = new_label(s); + genop_0(s, OP_NOP); /* for redo */ + codegen(s, body, NOVAL); + genjmp(s, OP_JMP, lp->pc0); + loop_pop(s, val); + return; + } + if (is_until ? true_always(condition) : false_always(condition)) { + if (val) { + gen_load_nil(s, 1); + } + return; + } + + struct loopinfo *lp = loop_push(s, LOOP_NORMAL); + uint32_t pos; + + if (!val) lp->reg = -1; + lp->pc0 = new_label(s); + codegen(s, condition, VAL); + pop(); + pos = genjmp2_0(s, is_until ? OP_JMPIF : OP_JMPNOT, cursp(), NOVAL); + lp->pc1 = new_label(s); + genop_0(s, OP_NOP); /* for redo */ + codegen(s, body, NOVAL); + genjmp(s, OP_JMP, lp->pc0); + dispatch(s, pos); + loop_pop(s, val); +} + +/* Shared codegen for while/until post-tested (modifier) loops. + is_until: FALSE for while (exit on false), TRUE for until (exit on true) */ +static void +codegen_loop_mod(codegen_scope *s, node *varnode, int val, mrb_bool is_until) +{ + struct mrb_ast_while_node *loop_n = while_node(varnode); + node *condition = loop_n->condition; + node *body = loop_n->body; + + if (is_until ? true_always(condition) : false_always(condition)) { + /* execute body once then exit */ + codegen(s, body, val); + if (val) push(); + return; + } + if (is_until ? false_always(condition) : true_always(condition)) { + /* infinite loop after first execution */ + struct loopinfo *lp = loop_push(s, LOOP_NORMAL); + if (!val) lp->reg = -1; + + uint32_t pos0 = genjmp_0(s, OP_JMP); + lp->pc0 = new_label(s); + lp->pc1 = new_label(s); + genop_0(s, OP_NOP); /* for redo */ + dispatch(s, pos0); + codegen(s, body, NOVAL); + genjmp(s, OP_JMP, lp->pc0); + loop_pop(s, val); + return; + } + + struct loopinfo *lp = loop_push(s, LOOP_NORMAL); + if (!val) lp->reg = -1; + + uint32_t pos0 = genjmp_0(s, OP_JMP); + lp->pc0 = new_label(s); + codegen(s, condition, VAL); + pop(); + uint32_t pos = genjmp2_0(s, is_until ? OP_JMPIF : OP_JMPNOT, cursp(), NOVAL); + lp->pc1 = new_label(s); + genop_0(s, OP_NOP); /* for redo */ + dispatch(s, pos0); + codegen(s, body, NOVAL); + genjmp(s, OP_JMP, lp->pc0); + dispatch(s, pos); + loop_pop(s, val); +} + +static void +codegen_for(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_for_node *for_n = for_node(varnode); + node *var = for_n->var; + node *iterable = for_n->iterable; + node *body = for_n->body; + + codegen_scope *prev = s; + int idx; + struct loopinfo *lp; + + /* generate receiver */ + codegen(s, iterable, VAL); + /* generate loop-block */ + s = scope_new(s->mrb, s, NULL); + + push(); /* push for a block parameter */ + + /* generate loop variable */ + genop_W(s, OP_ENTER, 0x40000); + if (var->car && !var->car->cdr && !var->cdr) { + gen_assignment(s, var->car->car, NULL, 1, NOVAL); + } + else { + gen_massignment(s, var, 1, VAL); + } + /* construct loop */ + lp = loop_push(s, LOOP_FOR); + lp->pc1 = new_label(s); + genop_0(s, OP_NOP); /* for redo */ + + /* loop body */ + codegen(s, body, VAL); + pop(); + gen_return(s, OP_RETURN, cursp()); + loop_pop(s, NOVAL); + scope_finish(s); + s = prev; + genop_2(s, OP_BLOCK, cursp(), s->irep->rlen-1); + push();pop(); /* space for a block */ + pop(); + idx = sym_idx(s, MRB_SYM(each)); + genop_3(s, OP_SENDB, cursp(), idx, 0); + if (val) push(); +} + +static void +codegen_case(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_case_node *case_n = case_node(varnode); + node *value = case_n->value; + node *body = case_n->body; + + int head = 0; + uint32_t case_end_jumps, tmp; + uint32_t next_when_pos = JMPLINK_START; + node *n; + + case_end_jumps = JMPLINK_START; + + /* Handle case value exactly like original */ + if (value) { + head = cursp(); + codegen(s, value, VAL); + } + + /* Iterate through when clauses list with JMPNOT optimization */ + node *current_when = body; + while (current_when) { + node *when_clause = current_when->car; + + /* Dispatch previous when's "next" jump to this location */ + if (next_when_pos != JMPLINK_START) { + dispatch_linked(s, next_when_pos); + next_when_pos = JMPLINK_START; + } + + /* when_clause is (condition . body) cons node */ + node *args = when_clause->car; /* when conditions */ + node *when_body = when_clause->cdr; /* when body */ + + /* Process when conditions with JMPNOT optimization */ + n = args; + uint32_t condition_success_pos = JMPLINK_START; + + while (n) { + codegen(s, n->car, VAL); + if (head) { + gen_move(s, cursp(), head, 0); + push(); push(); pop(); pop(); pop(); + if (is_splat_node(n->car)) { + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_SYM(__case_eqq)), 1); + } + else { + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(eqq)), 1); + } + } + else { + pop(); + } + + if (n->cdr) { + /* More conditions in this when - use JMPIF to success handler */ + tmp = genjmp2(s, OP_JMPIF, cursp(), condition_success_pos, !head); + condition_success_pos = tmp; + } + else { + /* Last condition - use JMPNOT to next when clause */ + tmp = genjmp2(s, OP_JMPNOT, cursp(), next_when_pos, !head); + next_when_pos = tmp; + } + n = n->cdr; + } + + /* Dispatch multiple condition success jumps to body */ + if (condition_success_pos != JMPLINK_START) { + dispatch_linked(s, condition_success_pos); + } + + /* Generate when body */ + codegen(s, when_body, val); + if (val) pop(); + + /* Check if this is the last when clause before else, or if there's no else clause */ + node *next_node = current_when->cdr; + + tmp = genjmp(s, OP_JMP, case_end_jumps); + case_end_jumps = tmp; + + current_when = next_node; + } + + /* Handle case where no else clause was found */ + if (next_when_pos != JMPLINK_START) { + dispatch_linked(s, next_when_pos); + /* No else clause, generate LOADNIL for VAL case */ + if (val) { + genop_1(s, OP_LOADNIL, cursp()); + } + } + + /* Apply stack management strategy for cases without else clause */ + if (val) { + /* Dispatch remaining case_end_jumps */ + if (case_end_jumps != JMPLINK_START) { + dispatch_linked(s, case_end_jumps); + } + if (head) { + /* Move result to original case value position */ + gen_move(s, head, cursp(), 0); + pop(); + } + /* Always push to maintain stack alignment */ + push(); + } + else { + /* NOVAL case */ + if (case_end_jumps != JMPLINK_START) { + dispatch_linked(s, case_end_jumps); + } + if (head) { + pop(); + } + } +} + +/* Forward declaration for pattern matching code generation + * known_array_len: -1 if unknown, >= 0 if target is known to be an array of that length + */ +static void codegen_pattern(codegen_scope *s, node *pattern, int target, uint32_t *fail_pos, int known_array_len); + +/* Return the static element count of an array literal AST node, or -1 if any + * element is a splat (whose runtime length is unknown). + */ +static int +array_literal_known_len(node *value) +{ + if (node_type(value) != NODE_ARRAY) return -1; + struct mrb_ast_array_node *arr = array_node(value); + int len = 0; + for (node *elem = arr->elements; elem; elem = elem->cdr) { + if (is_splat_node(elem->car)) return -1; + len++; + } + return len; +} + +/* Pattern matching case/in expression */ +static void +codegen_case_match(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_case_match_node *case_match_n = case_match_node(varnode); + node *value = case_match_n->value; + node *in_clauses = case_match_n->in_clauses; + + int head = cursp(); + uint32_t case_end_jumps = JMPLINK_START; + uint32_t tmp; + + /* Check if value is an array literal - allows optimizations in pattern matching */ + int known_array_len = array_literal_known_len(value); + + /* Generate code for the case value */ + codegen(s, value, VAL); + + /* Iterate through in clauses */ + node *current_in = in_clauses; + while (current_in) { + struct mrb_ast_in_node *in_n = in_node(current_in->car); + node *pattern = in_n->pattern; + node *guard = in_n->guard; + mrb_bool guard_is_unless = in_n->guard_is_unless; + node *body = in_n->body; + + uint32_t fail_pos = JMPLINK_START; + + if (pattern) { + /* Generate pattern matching code */ + codegen_pattern(s, pattern, head, &fail_pos, known_array_len); + } + + /* Generate guard clause if present */ + if (guard) { + codegen(s, guard, VAL); + pop(); /* pop before jump - cursp() now points to guard result */ + if (guard_is_unless) { + /* unless guard: fail if guard is true */ + tmp = genjmp2(s, OP_JMPIF, cursp(), fail_pos, 0); + } + else { + /* if guard: fail if guard is false */ + tmp = genjmp2(s, OP_JMPNOT, cursp(), fail_pos, 0); + } + fail_pos = tmp; + } + + /* Generate in-clause body */ + codegen(s, body, val); + if (val) pop(); + + /* Jump to end of case/in */ + tmp = genjmp(s, OP_JMP, case_end_jumps); + case_end_jumps = tmp; + + /* Dispatch fail jumps to next in-clause */ + if (fail_pos != JMPLINK_START) { + dispatch_linked(s, fail_pos); + } + + current_in = current_in->cdr; + } + + /* No pattern matched - raise NoMatchingPatternError */ + genop_1(s, OP_LOADFALSE, cursp()); + genop_1(s, OP_MATCHERR, cursp()); + + /* Dispatch all end jumps */ + if (case_end_jumps != JMPLINK_START) { + dispatch_linked(s, case_end_jumps); + } + + if (val) { + /* Move result to original case value position */ + gen_move(s, head, cursp(), 0); + pop(); + push(); + } + else { + pop(); /* pop the case value */ + } +} + +/* Generate pattern matching code for a single pattern. + * target: stack position of the value being matched + * fail_pos: linked list of jump positions for pattern match failure + * known_array_len: -1 if unknown, >= 0 if target is known to be an array of that length + */ +/* generate code to load a hash pattern key onto the stack */ +static void +gen_pat_key(codegen_scope *s, node *key) +{ + if (node_type(key) == NODE_SYM) { + genop_2(s, OP_LOADSYM, cursp(), sym_idx(s, sym_node(key)->symbol)); + } + else { + codegen(s, key, VAL); + } +} + +/* generate OP_ARRAY of hash pattern keys on the stack */ +static void +gen_pat_keys_ary(codegen_scope *s, node *pairs, int num_keys) +{ + int i = 0; + node *pair; + for (pair = pairs; pair; pair = pair->cdr, i++) { + gen_pat_key(s, pair->car->car); + push(); + } + genop_2(s, OP_ARRAY, cursp() - num_keys, num_keys); + for (i = 1; i < num_keys; i++) pop(); +} + +static void +codegen_pattern(codegen_scope *s, node *pattern, int target, uint32_t *fail_pos, int known_array_len) +{ + uint32_t tmp; + + switch (node_type(pattern)) { + case NODE_PAT_VALUE: + { + struct mrb_ast_pat_value_node *pat_val = pat_value_node(pattern); + /* Generate: pattern_value === target */ + codegen(s, pat_val->value, VAL); + gen_move(s, cursp(), target, 0); + push(); push(); pop(); pop(); pop(); + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(eqq)), 1); + /* Jump to fail if not matched */ + tmp = genjmp2(s, OP_JMPNOT, cursp(), *fail_pos, 1); + *fail_pos = tmp; + } + break; + + case NODE_PAT_VAR: + { + struct mrb_ast_pat_var_node *pat_var = pat_var_node(pattern); + if (pat_var->name) { + /* Bind the matched value to the variable */ + int idx = lv_idx(s, pat_var->name); + if (idx > 0) { + gen_move(s, idx, target, 1); /* nopeep=1 to prevent optimization */ + } + } + /* Variable pattern always matches (wildcard if name is 0) */ + } + break; + + case NODE_PAT_ALT: + { + struct mrb_ast_pat_alt_node *pat_alt = pat_alt_node(pattern); + uint32_t left_fail = JMPLINK_START; + uint32_t success_pos = JMPLINK_START; + + /* Try left pattern */ + codegen_pattern(s, pat_alt->left, target, &left_fail, known_array_len); + + /* Optimize JMPNOT+JMP to JMPIF when: + * 1. Left pattern is not another NODE_PAT_ALT (avoid recursion issues) + * 2. Left pattern generated at least one JMPNOT + * 3. The last JMPNOT is immediately before current position + * 4. The instruction is actually OP_JMPNOT (not OP_JMP which has + * different format S vs BS - converting OP_JMP would corrupt bytecode) + * In this case, convert JMPNOT to JMPIF and skip generating JMP */ + if (node_type(pat_alt->left) != NODE_PAT_ALT && + left_fail != JMPLINK_START && left_fail + 2 == s->pc && + s->iseq[left_fail - 2] == OP_JMPNOT) { + /* Extract the previous link from the JMPNOT chain. + * The chain uses relative offsets where the end is marked by + * an offset that points to address 0 (i.e., (pos+2)+offset == 0) */ + int16_t prev_offset = (int16_t)PEEK_S(s->iseq + left_fail); + int32_t next_addr = (int32_t)(left_fail + 2) + prev_offset; + uint32_t prev_link = (next_addr == 0) ? JMPLINK_START : (uint32_t)next_addr; + /* Convert JMPNOT to JMPIF */ + s->iseq[left_fail - 2] = OP_JMPIF; + /* Clear offset to mark end of success chain */ + emit_S(s, left_fail, 0); + success_pos = left_fail; + /* Continue with remaining fail chain */ + left_fail = prev_link; + } + else { + /* Left succeeded - jump to success */ + tmp = genjmp(s, OP_JMP, success_pos); + success_pos = tmp; + } + + /* Left failed - try right pattern */ + if (left_fail != JMPLINK_START) { + dispatch_linked(s, left_fail); + } + codegen_pattern(s, pat_alt->right, target, fail_pos, known_array_len); + + /* Dispatch success jumps */ + if (success_pos != JMPLINK_START) { + dispatch_linked(s, success_pos); + } + } + break; + + case NODE_PAT_AS: + { + struct mrb_ast_pat_as_node *pat_as = pat_as_node(pattern); + /* First match the pattern */ + codegen_pattern(s, pat_as->pattern, target, fail_pos, known_array_len); + /* Then bind the value to the variable */ + int idx = lv_idx(s, pat_as->name); + if (idx > 0) { + gen_move(s, idx, target, 0); + } + } + break; + + case NODE_PAT_PIN: + { + struct mrb_ast_pat_pin_node *pat_pin = pat_pin_node(pattern); + /* Get the current value of the pinned variable */ + int idx = lv_idx(s, pat_pin->name); + if (idx > 0) { + /* Compare: pinned_value === target */ + gen_move(s, cursp(), idx, 0); /* Load pinned variable */ + push(); + gen_move(s, cursp(), target, 0); /* Load target */ + push(); push(); pop(); pop(); pop(); + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(eqq)), 1); + /* Jump to fail if not matched */ + tmp = genjmp2(s, OP_JMPNOT, cursp(), *fail_pos, 1); + *fail_pos = tmp; + } + else { + /* Variable not found - raise compile error like CRuby */ + codegen_error(s, "no such local variable for pin operator"); + } + } + break; + + case NODE_PAT_ARRAY: + { + struct mrb_ast_pat_array_node *pat_arr = pat_array_node(pattern); + int pre_len = 0, post_len = 0; + int arr_reg; + node *elem; + int i; + + /* Count pre and post elements */ + for (elem = pat_arr->pre; elem; elem = elem->cdr) pre_len++; + for (elem = pat_arr->post; elem; elem = elem->cdr) post_len++; + + /* Optimization: if we know the target is an array, skip deconstruct */ + if (known_array_len >= 0) { + /* Use target directly as array register */ + arr_reg = target; + /* Compile-time size check */ + if (pat_arr->rest == 0) { + /* No rest: exact length match required */ + if (known_array_len != pre_len) { + /* Size mismatch - always fail */ + tmp = genjmp(s, OP_JMP, *fail_pos); + *fail_pos = tmp; + break; + } + /* Size matches, no runtime check needed */ + } + else { + /* Has rest: minimum length check */ + int min_len = pre_len + post_len; + if (known_array_len < min_len) { + /* Size too small - always fail */ + tmp = genjmp(s, OP_JMP, *fail_pos); + *fail_pos = tmp; + break; + } + /* Size sufficient, no runtime check needed */ + } + + /* Match pre-rest elements using GETIDX (faster than SEND :[]) */ + i = 0; + for (elem = pat_arr->pre; elem; elem = elem->cdr, i++) { + /* Get arr[i] using GETIDX */ + gen_move(s, cursp(), arr_reg, 0); + push(); + gen_int(s, cursp(), i); + genop_1(s, OP_GETIDX, cursp() - 1); /* R[cursp-1] = R[cursp-1][R[cursp]] */ + /* Element is now at cursp-1 */ + /* Match element pattern (elements are not known arrays) */ + codegen_pattern(s, elem->car, cursp() - 1, fail_pos, -1); + pop(); /* Clean up element slot */ + } + + /* Bind rest elements if rest is a variable */ + if (pat_arr->rest && pat_arr->rest != (node*)-1) { + struct mrb_ast_pat_var_node *rest_var = pat_var_node(pat_arr->rest); + if (rest_var->name) { + int var_idx = lv_idx(s, rest_var->name); + /* Generate: arr[pre_len..-(post_len+1)] or arr[pre_len..-1] if no post */ + gen_move(s, cursp(), arr_reg, 0); /* arr at cursp */ + push(); + gen_int(s, cursp(), pre_len); /* start at cursp */ + push(); + if (post_len > 0) { + gen_int(s, cursp(), -(post_len + 1)); /* end at cursp */ + } + else { + gen_int(s, cursp(), -1); /* end at cursp */ + } + /* start at cursp-1, end at cursp; create inclusive range at cursp-1 */ + genop_1(s, OP_RANGE_INC, cursp() - 1); + /* arr at cursp-2, range at cursp-1 */ + pop(); /* cursp now at range position */ + pop(); /* cursp now at arr position */ + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(aref)), 1); + if (var_idx > 0) { + gen_move(s, var_idx, cursp(), 1); + } + } + } + + /* Match post-rest elements using GETIDX */ + i = -post_len; + for (elem = pat_arr->post; elem; elem = elem->cdr, i++) { + /* Get arr[i] using GETIDX (negative index from end) */ + gen_move(s, cursp(), arr_reg, 0); + push(); + gen_int(s, cursp(), i); + genop_1(s, OP_GETIDX, cursp() - 1); + /* Match element pattern */ + codegen_pattern(s, elem->car, cursp() - 1, fail_pos, -1); + pop(); /* Clean up element slot */ + } + /* No arr_reg to pop since we used target directly */ + } + else { + /* General case: need to call deconstruct and check size at runtime */ + arr_reg = cursp(); + + /* Call deconstruct on target */ + gen_move(s, cursp(), target, 0); + push(); + genop_3(s, OP_SEND, arr_reg, sym_idx(s, MRB_SYM(deconstruct)), 0); + + /* Check length constraints */ + if (pat_arr->rest == 0) { + /* No rest: exact length match */ + /* Generate: arr.size == pre_len using EQ opcode */ + gen_move(s, cursp(), arr_reg, 0); + push(); + genop_3(s, OP_SEND, cursp() - 1, sym_idx(s, MRB_SYM(size)), 0); + gen_int(s, cursp(), pre_len); + /* EQ: R[a] = R[a] == R[a+1]; size at cursp-1, pre_len at cursp */ + genop_1(s, OP_EQ, cursp() - 1); + tmp = genjmp2(s, OP_JMPNOT, cursp() - 1, *fail_pos, 1); + *fail_pos = tmp; + pop(); + } + else { + /* Has rest: minimum length check */ + int min_len = pre_len + post_len; + if (min_len > 0) { + /* Generate: arr.size >= min_len using GE opcode */ + gen_move(s, cursp(), arr_reg, 0); + push(); + genop_3(s, OP_SEND, cursp() - 1, sym_idx(s, MRB_SYM(size)), 0); + gen_int(s, cursp(), min_len); + /* GE: R[a] = R[a] >= R[a+1]; size at cursp-1, min_len at cursp */ + genop_1(s, OP_GE, cursp() - 1); + tmp = genjmp2(s, OP_JMPNOT, cursp() - 1, *fail_pos, 1); + *fail_pos = tmp; + pop(); + } + } + + /* Match pre-rest elements */ + i = 0; + for (elem = pat_arr->pre; elem; elem = elem->cdr, i++) { + /* Get arr[i] */ + gen_move(s, cursp(), arr_reg, 0); + push(); + gen_int(s, cursp(), i); + push(); push(); pop(); pop(); pop(); + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(aref)), 1); + push(); /* Preserve element result for codegen_pattern */ + /* Match element pattern */ + codegen_pattern(s, elem->car, cursp() - 1, fail_pos, -1); + pop(); /* Clean up element slot */ + } + + /* Bind rest elements if rest is a variable */ + if (pat_arr->rest && pat_arr->rest != (node*)-1) { + struct mrb_ast_pat_var_node *rest_var = pat_var_node(pat_arr->rest); + if (rest_var->name) { + int var_idx = lv_idx(s, rest_var->name); + /* Generate: arr[pre_len..-(post_len+1)] or arr[pre_len..-1] if no post */ + gen_move(s, cursp(), arr_reg, 0); /* arr at cursp */ + push(); + gen_int(s, cursp(), pre_len); /* start at cursp */ + push(); + if (post_len > 0) { + gen_int(s, cursp(), -(post_len + 1)); /* end at cursp */ + } + else { + gen_int(s, cursp(), -1); /* end at cursp */ + } + /* start at cursp-1, end at cursp; create inclusive range at cursp-1 */ + genop_1(s, OP_RANGE_INC, cursp() - 1); + /* arr at cursp-2, range at cursp-1 */ + pop(); /* cursp now at range position */ + pop(); /* cursp now at arr position */ + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(aref)), 1); + if (var_idx > 0) { + gen_move(s, var_idx, cursp(), 1); + } + } + } + + /* Match post-rest elements */ + i = -post_len; + for (elem = pat_arr->post; elem; elem = elem->cdr, i++) { + /* Get arr[i] (negative index from end) */ + gen_move(s, cursp(), arr_reg, 0); + push(); + gen_int(s, cursp(), i); + push(); push(); pop(); pop(); pop(); + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(aref)), 1); + push(); /* Preserve element result for codegen_pattern */ + /* Match element pattern */ + codegen_pattern(s, elem->car, cursp() - 1, fail_pos, -1); + pop(); /* Clean up element slot */ + } + + pop(); /* Pop arr_reg */ + } + } + break; + + case NODE_PAT_FIND: + { + /* Find pattern: [*pre, elem1, elem2, ..., *post] + * Searches for elems anywhere in the array. + * + * Stack layout: + * arr_reg: deconstructed array (stable) + * idx_reg: current search index (stable) + * + * Loop bound is recomputed each iteration since OP_SEND clobbers registers. + */ + struct mrb_ast_pat_find_node *pat_find = pat_find_node(pattern); + int elems_len = 0; + node *elem; + int arr_reg = cursp(); + int idx_reg; + uint32_t loop_start, match_fail, loop_end; + + /* Count middle elements */ + for (elem = pat_find->elems; elem; elem = elem->cdr) elems_len++; + + /* Call deconstruct on target */ + gen_move(s, cursp(), target, 0); + push(); + genop_3(s, OP_SEND, arr_reg, sym_idx(s, MRB_SYM(deconstruct)), 0); + + /* Check minimum length: arr.size >= elems_len */ + gen_move(s, cursp(), arr_reg, 0); + push(); + genop_3(s, OP_SEND, cursp() - 1, sym_idx(s, MRB_SYM(size)), 0); + gen_int(s, cursp(), elems_len); + push(); push(); pop(); pop(); pop(); + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(ge)), 1); + tmp = genjmp2(s, OP_JMPNOT, cursp(), *fail_pos, 1); + *fail_pos = tmp; + + /* Initialize index to 0 */ + idx_reg = cursp(); + gen_int(s, idx_reg, 0); + push(); + + /* Loop: try matching at each position */ + loop_start = s->pc; + match_fail = JMPLINK_START; + + /* Check if idx <= arr.size - elems_len (i.e., idx < arr.size - elems_len + 1) */ + /* Compute: arr.size - elems_len */ + gen_move(s, cursp(), arr_reg, 0); + push(); + genop_3(s, OP_SEND, cursp() - 1, sym_idx(s, MRB_SYM(size)), 0); + gen_int(s, cursp(), elems_len); + push(); push(); pop(); pop(); pop(); + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(sub)), 1); + /* Now cursp() has (size - elems_len), compare: idx <= (size - elems_len) */ + gen_move(s, cursp() + 1, idx_reg, 0); + push(); + push(); push(); pop(); pop(); pop(); + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(ge)), 1); + tmp = genjmp2(s, OP_JMPNOT, cursp(), *fail_pos, 1); + *fail_pos = tmp; + + /* Try to match each middle element at idx+offset */ + int offset = 0; + for (elem = pat_find->elems; elem; elem = elem->cdr, offset++) { + /* Get arr[idx + offset] */ + gen_move(s, cursp(), arr_reg, 0); + push(); + if (offset == 0) { + gen_move(s, cursp(), idx_reg, 0); + } + else { + gen_move(s, cursp(), idx_reg, 0); + push(); + gen_int(s, cursp(), offset); + push(); push(); pop(); pop(); pop(); + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(add)), 1); + } + push(); push(); pop(); pop(); pop(); + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(aref)), 1); + push(); /* Preserve element result for codegen_pattern */ + /* Match element pattern - on fail, try next index */ + codegen_pattern(s, elem->car, cursp() - 1, &match_fail, -1); + pop(); /* Clean up element slot */ + } + + /* All elements matched - bind pre and post if named */ + if (pat_find->pre && pat_find->pre != (node*)-1) { + struct mrb_ast_pat_var_node *pre_var = pat_var_node(pat_find->pre); + if (pre_var->name) { + int var_idx = lv_idx(s, pre_var->name); + /* pre = arr[0...idx] (exclusive range) */ + /* Following the NODE_PAT_ARRAY pattern exactly */ + gen_move(s, cursp(), arr_reg, 0); /* arr at cursp */ + push(); + gen_int(s, cursp(), 0); /* start=0 at cursp */ + push(); + gen_move(s, cursp(), idx_reg, 0); /* end=idx at cursp */ + /* start at cursp-1, end at cursp; create exclusive range at cursp-1 */ + genop_1(s, OP_RANGE_EXC, cursp() - 1); + /* arr at cursp-2, range at cursp-1 */ + pop(); /* cursp now at range position */ + pop(); /* cursp now at arr position */ + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(aref)), 1); + if (var_idx > 0) { + gen_move(s, var_idx, cursp(), 1); + } + } + } + + if (pat_find->post && pat_find->post != (node*)-1) { + struct mrb_ast_pat_var_node *post_var = pat_var_node(pat_find->post); + if (post_var->name) { + int var_idx = lv_idx(s, post_var->name); + /* post = arr[(idx+elems_len)..-1] (inclusive range) */ + /* Following the NODE_PAT_ARRAY pattern exactly */ + gen_move(s, cursp(), arr_reg, 0); /* arr at cursp */ + push(); + /* Compute idx + elems_len for start index */ + gen_move(s, cursp(), idx_reg, 0); /* idx at cursp */ + push(); + gen_int(s, cursp(), elems_len); /* elems_len at cursp */ + push(); push(); pop(); pop(); pop(); + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(add)), 1); + /* start index (idx+elems_len) now at cursp */ + push(); + gen_int(s, cursp(), -1); /* end=-1 at cursp */ + /* start at cursp-1, end at cursp; create inclusive range at cursp-1 */ + genop_1(s, OP_RANGE_INC, cursp() - 1); + /* arr at cursp-2, range at cursp-1 */ + pop(); /* cursp now at range position */ + pop(); /* cursp now at arr position */ + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(aref)), 1); + if (var_idx > 0) { + gen_move(s, var_idx, cursp(), 1); + } + } + } + + /* Jump to success (end of find pattern) */ + loop_end = genjmp(s, OP_JMP, JMPLINK_START); + + /* Match failed - increment index and try again */ + dispatch_linked(s, match_fail); + gen_move(s, cursp(), idx_reg, 0); + push(); + gen_int(s, cursp(), 1); + push(); push(); pop(); pop(); pop(); + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(add)), 1); + gen_move(s, idx_reg, cursp(), 0); + genjmp(s, OP_JMP, loop_start); + + /* Success exit point */ + dispatch(s, loop_end); + + pop(); /* idx_reg */ + pop(); /* arr_reg */ + } + break; + + case NODE_PAT_HASH: + { + struct mrb_ast_pat_hash_node *pat_hash = pat_hash_node(pattern); + int hash_reg = cursp(); + node *pair; + int num_keys = 0; + + /* Count keys */ + for (pair = pat_hash->pairs; pair; pair = pair->cdr) num_keys++; + + /* Call deconstruct_keys. + * Pass nil when all keys are needed (rest or exact match). + * Pass keys array only for partial match (optimization for custom classes). */ + gen_move(s, cursp(), target, 0); + push(); + if (pat_hash->rest == NULL && num_keys > 0) { + /* Partial match: pass keys array */ + gen_pat_keys_ary(s, pat_hash->pairs, num_keys); + } + else { + genop_1(s, OP_LOADNIL, cursp()); + push(); + } + genop_3(s, OP_SEND, hash_reg, sym_idx(s, MRB_SYM(deconstruct_keys)), 1); + pop(); + + /* Check all keys exist and get values via __pat_values */ + if (num_keys > 0) { + int vals_reg = cursp(); + gen_move(s, vals_reg, hash_reg, 0); + push(); + gen_pat_keys_ary(s, pat_hash->pairs, num_keys); + genop_3(s, OP_SEND, vals_reg, sym_idx(s, MRB_SYM(__pat_values)), 1); + pop(); /* keys_ary */ + /* vals_reg = values array or false; fail if false */ + tmp = genjmp2(s, OP_JMPNOT, vals_reg, *fail_pos, 1); + *fail_pos = tmp; + + /* Match each value against its pattern */ + int i = 0; + for (pair = pat_hash->pairs; pair; pair = pair->cdr, i++) { + node *pat = pair->car->cdr; + + gen_move(s, cursp(), vals_reg, 0); + push(); + gen_int(s, cursp(), i); + push(); push(); pop(); pop(); pop(); + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_OPSYM(aref)), 1); + push(); + + codegen_pattern(s, pat, cursp() - 1, fail_pos, -1); + pop(); + } + pop(); /* vals_reg */ + } + + /* Handle rest pattern */ + if (pat_hash->rest == (node*)-1 || (num_keys == 0 && pat_hash->rest == NULL)) { + /* **nil or empty {}: exact match - verify hash.size == num_keys */ + gen_move(s, cursp(), hash_reg, 0); + push(); + genop_3(s, OP_SEND, cursp() - 1, sym_idx(s, MRB_SYM(size)), 0); + gen_int(s, cursp(), num_keys); + genop_1(s, OP_EQ, cursp() - 1); + tmp = genjmp2(s, OP_JMPNOT, cursp() - 1, *fail_pos, 1); + *fail_pos = tmp; + pop(); + } + else if (pat_hash->rest && pat_hash->rest != (node*)-2) { + /* **var: capture remaining keys via hash.__except(keys_array) */ + struct mrb_ast_pat_var_node *rest_var = pat_var_node(pat_hash->rest); + if (rest_var->name) { + int var_idx = lv_idx(s, rest_var->name); + int recv = cursp(); + gen_move(s, recv, hash_reg, 0); + push(); + if (num_keys > 0) { + gen_pat_keys_ary(s, pat_hash->pairs, num_keys); + genop_3(s, OP_SEND, recv, sym_idx(s, MRB_SYM(__except)), 1); + pop(); + } + else { + genop_3(s, OP_SEND, recv, sym_idx(s, MRB_SYM(dup)), 0); + } + if (var_idx > 0) { + gen_move(s, var_idx, recv, 1); + } + pop(); + } + } + + pop(); /* hash_reg */ + } + break; + + default: + raise_error(s, "unsupported pattern type"); + break; + } +} + +/* Definition node codegen functions */ + +static void +codegen_def(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_def_node *def_n = def_node(varnode); + int sym = sym_idx(s, def_n->name); + + /* Call lambda_body directly with individual parameters */ + /* For NODE_DEF, args should contain the full locals structure from defn_setup */ + int idx = lambda_body(s, def_n->locals, def_n->args, def_n->body, 0); + + if (idx <= 0xff) { + /* TDEF fusion: TCLASS + METHOD + DEF -> TDEF */ + genop_3(s, OP_TDEF, cursp(), sym, idx); + } + else { + genop_1(s, OP_TCLASS, cursp()); + push(); + genop_2(s, OP_METHOD, cursp(), idx); + push(); pop(); + pop(); + genop_2(s, OP_DEF, cursp(), sym); + } + if (val) push(); +} + +/* Helper function for generating class/module/singleton class body */ +/* Forward declaration */ +static mrb_bool is_empty_stmts(node *stmt_node); + +static void +gen_class_body(codegen_scope *s, node *body, int val) +{ + int idx; + + if (body && body->cdr) { + /* Extract locals and body from the cons structure: (locals . body) */ + node *locals = body->car; + node *body_stmts = body->cdr; + + /* Check for empty body case */ + if (is_empty_stmts(body_stmts)) { + genop_1(s, OP_LOADNIL, cursp()); + } + else { + /* Generate proper scope with locals and body */ + idx = scope_body(s, locals, body_stmts, val); + genop_2(s, OP_EXEC, cursp(), idx); + } + } + else { + /* No body - load nil */ + genop_1(s, OP_LOADNIL, cursp()); + } +} + +/* Helper function for generating namespace/parent for class/module */ +static void +gen_namespace(codegen_scope *s, node *name) +{ + if (name->car == (node*)0) { + genop_1(s, OP_LOADNIL, cursp()); + push(); + } + else if (name->car == (node*)1) { + genop_1(s, OP_OCLASS, cursp()); + push(); + } + else { + codegen(s, name->car, VAL); + } +} + +static void +codegen_class(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_class_node *class_n = class_node(varnode); + node *name = class_n->name; + node *superclass = class_n->superclass; + node *body = class_n->body; + int idx; + + /* Handle class namespace */ + gen_namespace(s, name); + + /* Handle superclass */ + if (superclass) { + codegen(s, superclass, VAL); + } + else { + genop_1(s, OP_LOADNIL, cursp()); + push(); + } + + pop(); pop(); + + /* Create class with name symbol */ + idx = sym_idx(s, node_to_sym(name->cdr)); + genop_2(s, OP_CLASS, cursp(), idx); + + /* Generate class body */ + gen_class_body(s, body, val); + + if (val) { + push(); + } +} + +static void +codegen_module(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_module_node *module_n = module_node(varnode); + node *name = module_n->name; + node *body = module_n->body; + int idx; + + /* Handle module namespace */ + gen_namespace(s, name); + pop(); + + /* Create module with name symbol */ + idx = sym_idx(s, node_to_sym(name->cdr)); + genop_2(s, OP_MODULE, cursp(), idx); + + /* Generate module body */ + gen_class_body(s, body, val); + + if (val) { + push(); + } +} + +static void +codegen_sclass(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_sclass_node *sclass_n = sclass_node(varnode); + node *obj = sclass_n->obj; + node *body = sclass_n->body; + + /* Generate code for the singleton object */ + codegen(s, obj, VAL); + pop(); + + /* Enter singleton class scope */ + genop_1(s, OP_SCLASS, cursp()); + + /* Generate singleton class body */ + gen_class_body(s, body, val); + + if (val) { + push(); + } +} + +/* Variable-sized assignment codegen functions */ +static void +codegen_asgn(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_asgn_node *asgn_n = asgn_node(varnode); + node *lhs = asgn_n->lhs; + node *rhs = asgn_n->rhs; + + gen_assignment(s, lhs, rhs, 0, val); +} + +static void +codegen_masgn(codegen_scope *s, node *varnode, node *rhs, int sp, int val) +{ + struct mrb_ast_masgn_node *masgn_n = (struct mrb_ast_masgn_node*)varnode; + + /* If called from codegen_variable_node context, use the embedded rhs */ + if (!rhs && sp == 0) { + rhs = masgn_n->rhs; + sp = 0; /* Use register 0 as base for standalone assignment */ + } + + int len = 0, n = 0, post = 0; + node *t = rhs ? rhs : masgn_n->rhs, *p; + int rhs_reg = sp; + + if (!val && t && node_type(t) == NODE_ARRAY) { + struct mrb_ast_array_node *an = array_node(t); + if (an->elements && nosplat(an->elements)) { + /* fixed rhs */ + t = an->elements; + + /* Optimization: direct generation for simple cases */ + /* When all lhs are local vars and all rhs are simple literals, */ + /* generate directly into target registers (no temporaries) */ + if (masgn_n->pre && !masgn_n->rest && !masgn_n->post) { + int regs[16]; /* support up to 16 variables */ + node *lhs = masgn_n->pre; + node *rhs_elem = t; + int rhs_count = 0, lhs_count = 0; + mrb_bool all_simple = TRUE; + + /* Count lhs variables */ + while (lhs && lhs_count < 16) { + lhs_count++; + lhs = lhs->cdr; + } + + /* Count and check rhs are all simple literals */ + while (rhs_elem && rhs_count < 16) { + if (!is_simple_literal(rhs_elem->car)) { + all_simple = FALSE; + break; + } + rhs_count++; + rhs_elem = rhs_elem->cdr; + } + /* Only apply when lhs and rhs counts match exactly */ + lhs = masgn_n->pre; + if (all_simple && lhs_count > 0 && lhs_count == rhs_count && + all_lvar_pre(s, lhs, regs, lhs_count)) { + /* Direct generation: generate literals into target registers */ + rhs_elem = t; + for (int i = 0; i < lhs_count; i++) { + gen_literal_to_reg(s, rhs_elem->car, regs[i]); + rhs_elem = rhs_elem->cdr; + } + return; + } + } + + rhs_reg = cursp(); /* Save register where values will be pushed */ + while (t) { + codegen(s, t->car, VAL); + len++; + t = t->cdr; + } + if (masgn_n->pre) { /* pre */ + t = masgn_n->pre; + n = 0; + while (t) { + if (n < len) { + gen_assignment(s, t->car, NULL, rhs_reg+n, NOVAL); + n++; + } + else { + genop_1(s, OP_LOADNIL, rhs_reg+n); + gen_assignment(s, t->car, NULL, rhs_reg+n, NOVAL); + } + t = t->cdr; + } + } + /* Count post variables */ + if (masgn_n->post) { + p = masgn_n->post; + while (p) { + post++; + p = p->cdr; + } + } + /* Handle rest variable */ + if (masgn_n->rest && (intptr_t)masgn_n->rest != -1) { + int rn; + + if (len < post + n) { + rn = 0; + } + else { + rn = len - post - n; + } + if (cursp() == rhs_reg+n) { + genop_2(s, OP_ARRAY, cursp(), rn); + } + else { + genop_3(s, OP_ARRAY2, cursp(), rhs_reg+n, rn); + } + gen_assignment(s, masgn_n->rest, NULL, cursp(), NOVAL); + n += rn; + } + /* Handle post variables */ + if (masgn_n->post) { + t = masgn_n->post; + while (t) { + if (ncar, NULL, rhs_reg+n, NOVAL); + } + else { genop_1(s, OP_LOADNIL, cursp()); - push(); + gen_assignment(s, t->car, NULL, cursp(), NOVAL); } - goto exit; + t = t->cdr; + n++; + } + } + pop_n(len); + return; + } + } + + { + /* variable rhs - implement gen_massignment logic directly for variable-sized nodes */ + + /* Check if this is parameter destructuring (called from lambda_body) */ + if (!rhs && sp > 0) { + /* Parameter destructuring: value is already in register sp */ + rhs_reg = sp; + } + else if (t) { + codegen(s, t, VAL); + rhs_reg = cursp() - 1; /* rhs is now at cursp()-1 */ + } + else { + /* No rhs and no sp value - should not happen in normal cases */ + return; + } + + /* Handle the lhs structure directly */ + n = 0; + post = 0; + + if (masgn_n->pre) { /* pre */ + node *pre = masgn_n->pre; + n = 0; + while (pre) { + int sp = cursp(); + genop_3(s, OP_AREF, sp, rhs_reg, n); + push(); + gen_assignment(s, pre->car, NULL, sp, NOVAL); + pop(); + n++; + pre = pre->cdr; + } + } + + /* Count post variables */ + if (masgn_n->post) { + node *p = masgn_n->post; + while (p) { + post++; + p = p->cdr; + } + } + + /* Only generate APOST if there's rest or post variables */ + if ((masgn_n->rest && (intptr_t)masgn_n->rest != -1) || masgn_n->post) { + gen_move(s, cursp(), rhs_reg, val); + push_n(post+1); + pop_n(post+1); + genop_3(s, OP_APOST, cursp(), n, post); + int nn = 1; + if (masgn_n->rest && (intptr_t)masgn_n->rest != -1) { /* rest */ + gen_assignment(s, masgn_n->rest, NULL, cursp(), NOVAL); + } + if (masgn_n->post) { + node *post_part = masgn_n->post; + while (post_part) { + gen_assignment(s, post_part->car, NULL, cursp()+nn, NOVAL); + post_part = post_part->cdr; + nn++; } } - else if (false_always(tree->car)) { - if (nt == NODE_WHILE) { - if (val) { - genop_1(s, OP_LOADNIL, cursp()); + } + + if (!val && t) { + pop(); /* pop the rhs value */ + } + } +} + +static void +codegen_op_asgn(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_op_asgn_node *op_asgn_n = op_asgn_node(varnode); + node *lhs = op_asgn_n->lhs; + node *rhs = op_asgn_n->rhs; + mrb_sym sym = op_asgn_n->op; + mrb_int len; + const char *name = mrb_sym_name_len(s->mrb, sym, &len); + int vsp = -1; + + /* Handle ||= and &&= operators */ + if (len == 2 && + ((name[0] == '|' && name[1] == '|') || + (name[0] == '&' && name[1] == '&'))) { + uint32_t pos; + enum node_type lhs_type = node_type(lhs); + + /* For ||= on class variables and constants, wrap read in exception handling */ + if (name[0] == '|' && (lhs_type == NODE_CVAR || lhs_type == NODE_CONST)) { + int catch_entry, begin, end; + int noexc, exc; + struct loopinfo *lp; + + lp = loop_push(s, LOOP_BEGIN); + lp->pc0 = new_label(s); + catch_entry = catch_handler_new(s); + begin = s->pc; + exc = cursp(); + codegen(s, lhs, VAL); + end = s->pc; + noexc = genjmp_0(s, OP_JMP); + lp->type = LOOP_RESCUE; + catch_handler_set(s, catch_entry, MRB_CATCH_RESCUE, begin, end, s->pc); + genop_1(s, OP_EXCEPT, exc); + genop_1(s, OP_LOADFALSE, exc); + dispatch(s, noexc); + loop_pop(s, NOVAL); + } + else { + /* Generate code to get current value of LHS */ + codegen(s, lhs, VAL); + } + + pop(); + if (val) { + if (vsp >= 0) { + gen_move(s, vsp, cursp(), 1); + } + pos = genjmp2_0(s, name[0]=='|'?OP_JMPIF:OP_JMPNOT, cursp(), val); + } + else { + pos = genjmp2_0(s, name[0]=='|'?OP_JMPIF:OP_JMPNOT, cursp(), val); + } + codegen(s, rhs, VAL); + pop(); + if (val && vsp >= 0) { + gen_move(s, vsp, cursp(), 1); + } + gen_assignment(s, lhs, NULL, cursp(), val); + dispatch(s, pos); + return; + } + + /* For other operators, generate: lhs = lhs op rhs */ + codegen(s, lhs, VAL); + codegen(s, rhs, VAL); + push(); pop(); + pop(); pop(); + + /* Apply the operator */ + if (len == 1 && name[0] == '+') { + gen_addsub(s, OP_ADD, cursp()); + } + else if (len == 1 && name[0] == '-') { + gen_addsub(s, OP_SUB, cursp()); + } + else if (len == 1 && name[0] == '*') { + genop_1(s, OP_MUL, cursp()); + } + else if (len == 1 && name[0] == '/') { + genop_1(s, OP_DIV, cursp()); + } + else if (len == 1 && name[0] == '<') { + genop_1(s, OP_LT, cursp()); + } + else if (len == 2 && name[0] == '<' && name[1] == '=') { + genop_1(s, OP_LE, cursp()); + } + else if (len == 1 && name[0] == '>') { + genop_1(s, OP_GT, cursp()); + } + else if (len == 2 && name[0] == '>' && name[1] == '=') { + genop_1(s, OP_GE, cursp()); + } + else { + int idx = sym_idx(s, sym); + genop_3(s, OP_SEND, cursp(), idx, 1); + } + + /* Assign the result back to LHS */ + gen_assignment(s, lhs, NULL, cursp(), val); +} + +/* Variable-sized expression codegen functions */ +static void +codegen_and(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_and_node *and_n = (struct mrb_ast_and_node*)varnode; + node *left = and_n->left; + node *right = and_n->right; + uint32_t pos; + + if (true_always(left)) { + codegen(s, right, val); + return; + } + if (false_always(left)) { + codegen(s, left, val); + return; + } + codegen(s, left, VAL); + pop(); + pos = genjmp2_0(s, OP_JMPNOT, cursp(), val); + codegen(s, right, val); + dispatch(s, pos); +} + +static void +codegen_or(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_or_node *or_n = (struct mrb_ast_or_node*)varnode; + node *left = or_n->left; + node *right = or_n->right; + uint32_t pos; + + if (true_always(left)) { + codegen(s, left, val); + return; + } + if (false_always(left)) { + codegen(s, right, val); + return; + } + codegen(s, left, VAL); + pop(); + pos = genjmp2_0(s, OP_JMPIF, cursp(), val); + codegen(s, right, val); + dispatch(s, pos); +} + +static void +codegen_return(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_return_node *return_n = return_node(varnode); + node *args = return_n->args; + + if (args) { + gen_retval(s, args); + } + else { + genop_1(s, OP_LOADNIL, cursp()); + } + if (s->loop) { + gen_return(s, OP_RETURN_BLK, cursp()); + } + else { + gen_return(s, OP_RETURN, cursp()); + } + if (!val) return; + push(); +} + +static void +codegen_yield(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_yield_node *yield_n = yield_node(varnode); + node *args = yield_n->args; + codegen_scope *s2 = s; + int lv = 0, ainfo = -1; + int n = 0, nk = 0, sendv = 0; + + while (!s2->mscope) { + lv++; + s2 = s2->prev; + if (!s2) break; + } + if (s2) { + ainfo = (int)s2->ainfo; + } + if (ainfo < 0) codegen_error(s, "invalid yield (SyntaxError)"); + if (lv > 0xf) codegen_error(s, "too deep nesting"); + push(); + if (args) { + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)args; + if (callargs->regular_args) { + n = gen_values(s, callargs->regular_args, VAL, 14); + if (n < 0) { + n = sendv = 1; + push(); + } + } + if (callargs->keyword_args) { + nk = gen_hash(s, callargs->keyword_args, VAL, 14); + if (nk < 0) { + nk = 15; + } + } + } + push();pop(); /* space for a block */ + pop_n(n + (nk == 15 ? 1 : nk * 2) + 1); + genop_2S(s, OP_BLKPUSH, cursp(), (ainfo<<4)|(lv & 0xf)); + if (sendv) n = CALL_MAXARGS; + if (nk == 0 && n < 15) { + /* fast path: direct block call without method dispatch */ + genop_2(s, OP_BLKCALL, cursp(), n); + } + else { + /* fallback: use SEND for keyword args or splat */ + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_SYM(call)), n|(nk<<4)); + } + if (val) push(); +} + +static void +codegen_super(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_super_node *super_n = super_node(varnode); + node *tree = super_n->args; + + codegen_scope *s2 = s; + int lv = 0; + int n = 0, nk = 0, st = 0; + + push(); + while (!s2->mscope) { + lv++; + s2 = s2->prev; + if (!s2) break; + } + if (tree) { + /* Handle callargs structure - direct casting like new_args() */ + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)tree; + + /* Regular arguments */ + if (callargs->regular_args) { + st = n = gen_values(s, callargs->regular_args, VAL, 14); + if (n < 0) { + st = 1; n = 15; + push(); + } + } + + /* Keyword arguments */ + if (callargs->keyword_args) { + nk = gen_hash(s, callargs->keyword_args, VAL, 14); + if (nk < 0) {st++; nk = 15;} + else st += nk*2; + n |= nk<<4; + } + + /* Block arguments */ + if (callargs->block_arg) { + codegen(s, callargs->block_arg, VAL); + } + else if (s2) gen_blkmove(s, s2->ainfo, lv); + else { + genop_1(s, OP_LOADNIL, cursp()); + push(); + } + } + else { + if (s2) gen_blkmove(s, s2->ainfo, lv); + else { + genop_1(s, OP_LOADNIL, cursp()); + push(); + } + } + st++; + pop_n(st+1); + genop_2(s, OP_SUPER, cursp(), n); + if (val) push(); +} + +/* Variable-sized literal node generation functions */ +static void +codegen_str(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_str_node *str_n = str_node(varnode); + node *list = str_n->list; + + /* Use common cons list string codegen */ + gen_string(s, list, val); +} + +static void +codegen_range(codegen_scope *s, node *varnode, int val, mrb_bool exclusive) +{ + node *left = dot2_node(varnode)->left; + node *right = dot2_node(varnode)->right; + + codegen(s, left, val); + codegen(s, right, val); + if (!val) return; + pop(); pop(); + genop_1(s, exclusive ? OP_RANGE_EXC : OP_RANGE_INC, cursp()); + push(); +} + +static void +codegen_float(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_float_node *float_n = (struct mrb_ast_float_node*)varnode; + const char *value = float_n->value; + double f; + + mrb_read_float(value, NULL, &f); + int off = new_lit_float(s, (mrb_float)f); + + gen_load_op2(s, OP_LOADL, off, val); +} + +/* Variable-sized simple node generation functions */ +static void +codegen_self(codegen_scope *s, node *varnode, int val) +{ + /* Use traditional self codegen logic */ + gen_load_op1(s, OP_LOADSELF, val); +} + +static void +codegen_nil(codegen_scope *s, node *varnode, int val) +{ + /* Use traditional nil codegen logic */ + gen_load_op1(s, OP_LOADNIL, val); +} + +static void +codegen_true(codegen_scope *s, node *varnode, int val) +{ + /* Generate OP_LOADTRUE instruction for true literal */ + gen_load_op1(s, OP_LOADTRUE, val); +} + +static void +codegen_false(codegen_scope *s, node *varnode, int val) +{ + /* Generate OP_LOADFALSE instruction for false literal */ + gen_load_op1(s, OP_LOADFALSE, val); +} + +static void +codegen_const(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_const_node *const_n = const_node(varnode); + mrb_sym symbol = const_n->symbol; + + int i = sym_idx(s, symbol); + genop_2(s, OP_GETCONST, cursp(), i); + if (val) push(); +} + +static void +codegen_rescue(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_rescue_node *rescue = rescue_node(varnode); + node *body = rescue->body; + node *rescue_clauses = rescue->rescue_clauses; + node *else_clause = rescue->else_clause; + + int noexc; + uint32_t exend, pos1, pos2, tmp; + struct loopinfo *lp; + int catch_entry, begin, end; + + if (body == NULL) return; + lp = loop_push(s, LOOP_BEGIN); + lp->pc0 = new_label(s); + catch_entry = catch_handler_new(s); + begin = s->pc; + codegen(s, body, VAL); + pop(); + lp->type = LOOP_RESCUE; + end = s->pc; + noexc = genjmp_0(s, OP_JMP); + catch_handler_set(s, catch_entry, MRB_CATCH_RESCUE, begin, end, s->pc); + exend = JMPLINK_START; + pos1 = JMPLINK_START; + if (rescue_clauses) { + node *n2 = rescue_clauses; + int exc = cursp(); + + genop_1(s, OP_EXCEPT, exc); + push(); + while (n2) { + node *n3 = n2->car; + node *n4 = n3->car; + + dispatch(s, pos1); + pos2 = JMPLINK_START; + do { + if (n4 && n4->car && is_splat_node(n4->car)) { + codegen(s, n4->car, VAL); + gen_move(s, cursp(), exc, 0); + push_n(2); pop_n(2); /* space for one arg and a block */ + pop(); + genop_3(s, OP_SEND, cursp(), sym_idx(s, MRB_SYM(__case_eqq)), 1); + } + else { + if (n4) { + codegen(s, n4->car, VAL); + } + else { + genop_2(s, OP_GETCONST, cursp(), sym_idx(s, MRB_SYM(StandardError))); push(); } - goto exit; + pop(); + genop_2(s, OP_RESCUE, exc, cursp()); + } + tmp = genjmp2(s, OP_JMPIF, cursp(), pos2, val); + pos2 = tmp; + if (n4) { + n4 = n4->cdr; } + } while (n4); + pos1 = genjmp_0(s, OP_JMP); + dispatch_linked(s, pos2); + + pop(); + if (n3->cdr->car) { + gen_assignment(s, n3->cdr->car, NULL, exc, NOVAL); + } + if (n3->cdr->cdr->car) { + codegen(s, n3->cdr->cdr->car, val); + if (val) pop(); + } + tmp = genjmp(s, OP_JMP, exend); + exend = tmp; + n2 = n2->cdr; + push(); + } + if (pos1 != JMPLINK_START) { + dispatch(s, pos1); + genop_1(s, OP_RAISEIF, exc); + } + } + pop(); + dispatch(s, noexc); + if (else_clause) { + codegen(s, else_clause, val); + } + else if (val) { + push(); + } + dispatch_linked(s, exend); + loop_pop(s, NOVAL); +} + +static void +codegen_block(codegen_scope *s, node *varnode, int val) +{ + if (!val) return; + + struct mrb_ast_block_node *n = block_node(varnode); + + /* Call lambda_body directly with individual parameters */ + int idx = lambda_body(s, n->locals, n->args, n->body, 1); + genop_2(s, OP_BLOCK, cursp(), idx); + push(); +} + +static void +codegen_break(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_break_node *n = (struct mrb_ast_break_node*)varnode; + loop_break(s, n->value); + if (!val) return; + push(); +} + +static void +codegen_next(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_next_node *n = (struct mrb_ast_next_node*)varnode; + if (!s->loop) { + raise_error(s, "unexpected next"); + } + else if (s->loop->type == LOOP_NORMAL) { + codegen(s, n->value, NOVAL); + genjmp(s, OP_JMPUW, s->loop->pc0); + } + else { + if (n->value) { + codegen(s, n->value, VAL); + pop(); + } + else { + genop_1(s, OP_LOADNIL, cursp()); + } + gen_return(s, OP_RETURN, cursp()); + } + if (!val) return; + push(); +} + +static void +codegen_redo(codegen_scope *s, node *varnode, int val) +{ + for (const struct loopinfo *lp = s->loop; ; lp = lp->prev) { + if (!lp) { + raise_error(s, "unexpected redo"); + break; + } + if (lp->type != LOOP_BEGIN && lp->type != LOOP_RESCUE) { + genjmp(s, OP_JMPUW, lp->pc1); + break; + } + } + if (!val) return; + push(); +} + +static void +codegen_retry(codegen_scope *s, node *varnode, int val) +{ + const struct loopinfo *lp = s->loop; + + while (lp && lp->type != LOOP_RESCUE) { + lp = lp->prev; + } + if (!lp) { + raise_error(s, "unexpected retry"); + } + else { + genjmp(s, OP_JMPUW, lp->pc0); + } + if (!val) return; + push(); +} + +static void +codegen_xstr(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_xstr_node *n = xstr_node(varnode); + node *list = n->list; + int sym; + + /* Always execute backtick command for side effects, even in NOVAL mode */ + push(); + /* Generate string using common function */ + gen_string(s, list, VAL); + + push(); /* for block */ + pop_n(3); + sym = sym_idx(s, MRB_OPSYM(tick)); /* ` */ + genop_3(s, OP_SSEND, cursp(), sym, 1); + + if (val) { + push(); /* Keep result on stack if needed */ + } + /* If val=0, the result is discarded but the method was still called */ +} + +static void +codegen_regx(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_regx_node *n = regx_node(varnode); + + if (val) { + int sym = sym_idx(s, mrb_intern_lit(s->mrb, REGEXP_CLASS)); + int argc = 1; + int off; + + genop_1(s, OP_OCLASS, cursp()); + genop_2(s, OP_GETMCNST, cursp(), sym); + push(); + + /* Generate regex pattern using common cons list function */ + gen_string(s, n->list, VAL); + + /* Add flags and/or encoding if present */ + if ((n->flags && *n->flags) || (n->encoding && *n->encoding)) { + /* Add flags (or nil if not present but encoding is) */ + if (n->flags && *n->flags) { + off = new_lit_cstr(s, n->flags); + genop_2(s, OP_STRING, cursp(), off); + } + else { + genop_1(s, OP_LOADNIL, cursp()); + } + push(); + argc++; + + /* Add encoding if present */ + if (n->encoding && *n->encoding) { + off = new_lit_cstr(s, n->encoding); + genop_2(s, OP_STRING, cursp(), off); + push(); + argc++; + } + } + + push(); /* space for a block */ + pop_n(argc+2); + sym = sym_idx(s, MRB_SYM(compile)); + genop_3(s, OP_SEND, cursp(), sym, argc); + push(); + } + else { + /* NOVAL case: still need to evaluate expressions for side effects */ + gen_string(s, n->list, NOVAL); + } +} + +static void +codegen_heredoc(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_heredoc_node *n = heredoc_node(varnode); + // Process heredoc doc field as cons list string + gen_string(s, n->info.doc, val); +} + +static void +codegen_dsym(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_str_node *n = dsym_node(varnode); + // Generate the list content, then intern to symbol + gen_string(s, n->list, val); + if (val) { + gen_intern(s); + } +} + +static void +codegen_nth_ref(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_nth_ref_node *n = (struct mrb_ast_nth_ref_node*)varnode; + mrb_state *mrb = s->mrb; + mrb_value str; + int sym; + + str = mrb_format(mrb, "$%d", n->nth); + sym = sym_idx(s, mrb_intern_str(mrb, str)); + gen_load_op2(s, OP_GETGV, sym, val); +} + +static void +codegen_back_ref(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_back_ref_node *n = (struct mrb_ast_back_ref_node*)varnode; + char buf[] = {'$', (char)n->type}; + int sym = sym_idx(s, mrb_intern(s->mrb, buf, sizeof(buf))); + gen_load_op2(s, OP_GETGV, sym, val); +} + +static void +codegen_nvar(codegen_scope *s, node *varnode, int val) +{ + if (!val) return; + struct mrb_ast_nvar_node *n = (struct mrb_ast_nvar_node*)varnode; + + gen_move(s, cursp(), n->num, val); + push(); +} + +static void +codegen_dvar(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_dvar_node *n = (struct mrb_ast_dvar_node*)varnode; + // DVAR nodes are not currently used in mruby, but provide basic implementation + if (val) { + gen_lvar(s, n->name, val); + } +} + +/* Unary operator codegen functions */ +static void +codegen_not(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_not_node *n = (struct mrb_ast_not_node*)varnode; + // NOT nodes are rarely used - generate method call to ! + if (val) { + codegen(s, n->operand, TRUE); + pop(); + mrb_sym sym = sym_idx(s, mrb_intern_lit(s->mrb, "!")); + genop_3(s, OP_SEND, cursp(), sym, 0); + push(); + } +} + +static void +codegen_negate(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_negate_node *n = (struct mrb_ast_negate_node*)varnode; + node *tree = n->operand; + + /* Check if the operand is a variable-sized node */ + enum node_type vnt = node_type(tree); + switch (vnt) { +#ifndef MRB_NO_FLOAT + case NODE_FLOAT: + if (val) { + struct mrb_ast_float_node *float_n = (struct mrb_ast_float_node*)tree; + const char *value = float_n->value; + double f; + + mrb_read_float(value, NULL, &f); + int off = new_lit_float(s, (mrb_float)-f); + + gen_load_lit(s, off); + } + break; +#endif + + case NODE_INT: + if (val) { + int32_t value = int_node(tree)->value; + if (value == INT32_MIN) { + /* -INT32_MIN overflows, use bigint */ + int off = new_litbint(s, "2147483648", -10); + genop_2(s, OP_LOADL, cursp(), off); + } + else { + gen_int(s, cursp(), -value); } + push(); + } + break; + + case NODE_BIGINT: + if (val) { + char *str = bigint_node(tree)->string; + int base = bigint_node(tree)->base; + /* Negate base to indicate negative number */ + int off = new_litbint(s, str, -base); + genop_2(s, OP_LOADL, cursp(), off); + push(); + } + break; + + default: + codegen(s, tree, VAL); + pop(); + push_n(2);pop_n(2); /* space for receiver&block */ + mrb_sym minus = MRB_OPSYM(minus); + if (!gen_uniop(s, minus, cursp())) { + genop_3(s, OP_SEND, cursp(), sym_idx(s, minus), 0); + } + if (val) push(); + break; + } +} + +static void +codegen_colon2(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_colon2_node *n = (struct mrb_ast_colon2_node*)varnode; + // Generate COLON2 (::) access manually + int sym = sym_idx(s, n->name); + codegen(s, n->base, VAL); + pop(); + genop_2(s, OP_GETMCNST, cursp(), sym); + if (val) push(); +} + +static void +codegen_colon3(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_colon3_node *n = (struct mrb_ast_colon3_node*)varnode; + int sym = sym_idx(s, n->name); + genop_1(s, OP_OCLASS, cursp()); + genop_2(s, OP_GETMCNST, cursp(), sym); + if (val) push(); +} - uint32_t pos = JMPLINK_START; - struct loopinfo *lp = loop_push(s, LOOP_NORMAL); +static void +codegen_defined(codegen_scope *s, node *varnode, int val) +{ + // DEFINED nodes are rarely used - generate basic implementation + (void)varnode; // suppress unused warning + if (val) { + // For now, just return nil (defined? is complex to implement correctly) + genop_1(s, OP_LOADNIL, cursp()); + push(); + } +} - if (!val) lp->reg = -1; - lp->pc0 = new_label(s); - codegen(s, tree->car, VAL); - pop(); - if (nt == NODE_WHILE) { - pos = genjmp2_0(s, OP_JMPNOT, cursp(), NOVAL); +static void +codegen_zsuper(codegen_scope *s, node *varnode, int val) +{ + /* NODE_ZSUPER now uses mrb_ast_super_node, which may have args */ + struct mrb_ast_super_node *zsuper_n = super_node(varnode); + node *tree = zsuper_n->args; /* May be NULL or args added by call_with_block */ + + codegen_scope *s2 = s; + int lv = 0; + uint16_t ainfo = 0; + int n = CALL_MAXARGS; + int sp = cursp(); + mrb_bool has_block_arg = FALSE; + + push(); /* room for receiver */ + int argary_pos = cursp(); + while (!s2->mscope) { + lv++; + s2 = s2->prev; + if (!s2) break; + } + if (s2 && s2->ainfo > 0) { + ainfo = s2->ainfo; + has_block_arg = (ainfo >> 13) & 0x1; + } + if (lv > 0xf) codegen_error(s, "too deep nesting"); + if (ainfo > 0) { + genop_2S(s, OP_ARGARY, argary_pos, (ainfo<<4)|(lv & 0xf)); + push(); push(); push(); /* ARGARY pushes 3 values at most */ + pop(); pop(); pop(); + /* keyword arguments */ + if (ainfo & 0x1) { + n |= CALL_MAXARGS<<4; + push(); + /* If parent has keywords but no block parameter, ARGARY reads garbage for block */ + if (!has_block_arg) { + genop_1(s, OP_LOADNIL, argary_pos+2); } - else { - pos = genjmp2_0(s, OP_JMPIF, cursp(), NOVAL); + } + /* block argument - tree here is args, so check for block */ + if (tree) { + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)tree; + if (callargs->block_arg) { + push(); + codegen(s, callargs->block_arg, VAL); } - lp->pc1 = new_label(s); - codegen(s, tree->cdr, NOVAL); - genjmp(s, OP_JMP, lp->pc0); - dispatch(s, pos); - loop_pop(s, val); } - break; + } + else { + /* block argument */ + if (tree) { + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)tree; + if (callargs->block_arg) { + codegen(s, callargs->block_arg, VAL); + } + } + else if (s2) { + gen_blkmove(s, 0, lv); + } + else { + genop_1(s, OP_LOADNIL, cursp()); + } + n = 0; + } + s->sp = sp; + genop_2(s, OP_SUPER, cursp(), n); + if (val) push(); +} - case NODE_FOR: - for_body(s, tree); +static void +codegen_lambda(codegen_scope *s, node *varnode, int val) +{ + if (!val) return; + + struct mrb_ast_lambda_node *n = lambda_node(varnode); + + /* Call lambda_body directly with individual parameters */ + int idx = lambda_body(s, n->locals, n->args, n->body, 1); + genop_2(s, OP_LAMBDA, cursp(), idx); + push(); +} + +static void +codegen_words(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_words_node *n = words_node(varnode); + gen_literal_array(s, n->args, FALSE, val); +} + +static void +codegen_symbols(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_symbols_node *n = symbols_node(varnode); + gen_literal_array(s, n->args, TRUE, val); +} + +static void +codegen_splat(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_splat_node *n = splat_node(varnode); + // Generate code for the splat value directly + codegen(s, n->value, val); +} + +static void +codegen_block_arg(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_block_arg_node *n = block_arg_node(varnode); + + if (!n->value) { + int idx = lv_idx(s, MRB_OPSYM(and)); + + if (idx == 0) { + gen_getupvar(s, cursp(), MRB_OPSYM(and)); + } + else { + gen_move(s, cursp(), idx, val); + } if (val) push(); - break; + } + else { + codegen(s, n->value, val); + } +} - case NODE_CASE: - { - int head = 0; - uint32_t pos1, pos2, pos3, tmp; - node *n; - - pos3 = JMPLINK_START; - if (tree->car) { - head = cursp(); - codegen(s, tree->car, VAL); - } - tree = tree->cdr; - while (tree) { - n = tree->car->car; - pos1 = pos2 = JMPLINK_START; - while (n) { - codegen(s, n->car, VAL); - if (head) { - gen_move(s, cursp(), head, 0); - push(); push(); pop(); pop(); pop(); - if (nint(n->car->car) == NODE_SPLAT) { - genop_3(s, OP_SEND, cursp(), new_sym(s, MRB_SYM_2(s->mrb, __case_eqq)), 1); - } - else { - genop_3(s, OP_SEND, cursp(), new_sym(s, MRB_OPSYM_2(s->mrb, eqq)), 1); - } - } - else { - pop(); - } - tmp = genjmp2(s, OP_JMPIF, cursp(), pos2, NOVAL); - pos2 = tmp; - n = n->cdr; - } - if (tree->car->car) { - pos1 = genjmp_0(s, OP_JMP); - dispatch_linked(s, pos2); - } - codegen(s, tree->car->cdr, val); - if (val) pop(); - tmp = genjmp(s, OP_JMP, pos3); - pos3 = tmp; - dispatch(s, pos1); - tree = tree->cdr; - } - if (val) { - uint32_t pos = cursp(); - genop_1(s, OP_LOADNIL, cursp()); - if (pos3 != JMPLINK_START) dispatch_linked(s, pos3); - if (head) pop(); - if (cursp() != pos) { - gen_move(s, cursp(), pos, 0); - } - push(); - } - else { - if (pos3 != JMPLINK_START) { - dispatch_linked(s, pos3); - } - if (head) { - pop(); - } - } +static void +codegen_scope_node(codegen_scope *s, const node *varnode, int val) +{ + struct mrb_ast_scope_node *scope = scope_node(varnode); + + /* Pass locals and body directly to scope_body() */ + scope_body(s, scope->locals, scope->body, val); +} + +static void +codegen_begin(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_begin_node *begin = begin_node(varnode); + node *body = begin->body; + + codegen(s, body, val); +} + +static void +codegen_ensure(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_ensure_node *ensure = ensure_node(varnode); + node *body = ensure->body; + node *ensure_clause = ensure->ensure_clause; + + if (!ensure_clause || !is_empty_stmts(ensure_clause)) { + int catch_entry, begin, end, target; + int idx; + + catch_entry = catch_handler_new(s); + begin = s->pc; + codegen(s, body, val); + end = target = s->pc; + push(); + idx = cursp(); + genop_1(s, OP_EXCEPT, idx); + push(); + codegen(s, ensure_clause, NOVAL); + pop(); + genop_1(s, OP_RAISEIF, idx); + pop(); + catch_handler_set(s, catch_entry, MRB_CATCH_ENSURE, begin, end, target); + } + else { /* empty ensure ignored */ + codegen(s, body, val); + } +} + +static void +codegen_stmts(codegen_scope *s, node *varnode, int val) +{ + struct mrb_ast_stmts_node *stmts = stmts_node(varnode); + node *tree = stmts_node(stmts)->stmts; + + if (val && !tree) { + gen_load_nil(s, 1); + } + while (tree) { + codegen(s, tree->car, tree->cdr ? NOVAL : val); + tree = tree->cdr; + } +} + +static mrb_bool +is_empty_stmts(node *stmt_node) +{ + if (!stmt_node) return TRUE; + + if (node_type(stmt_node) == NODE_STMTS) { + /* Variable-sized NODE_STMTS with internal cons-list */ + struct mrb_ast_stmts_node *stmts = (struct mrb_ast_stmts_node*)stmt_node; + return stmts->stmts == NULL; + } + + return FALSE; +} + +/* Declaration codegen functions */ + +static void +codegen_alias(codegen_scope *s, const node *varnode, int val) +{ + struct mrb_ast_alias_node *alias = alias_node(varnode); + + int a = sym_idx(s, alias->new_name); + int b = sym_idx(s, alias->old_name); + + genop_2(s, OP_ALIAS, a, b); + gen_load_nil(s, val); +} + +static void +codegen_undef(codegen_scope *s, const node *varnode, int val) +{ + struct mrb_ast_undef_node *undef = undef_node(varnode); + node *t = undef->syms; + + while (t) { + int symbol = sym_idx(s, node_to_sym(t->car)); + genop_1(s, OP_UNDEF, symbol); + t = t->cdr; + } + gen_load_nil(s, val); +} + +static void +codegen_sdef(codegen_scope *s, const node *varnode, int val) +{ + struct mrb_ast_sdef_node *sdef = sdef_node(varnode); + node *recv = sdef->obj; + int sym = sym_idx(s, sdef->name); + + /* Call lambda_body directly with individual parameters */ + /* For NODE_SDEF, args should contain the full locals structure from defs_setup */ + int idx = lambda_body(s, sdef->locals, sdef->args, sdef->body, 0); + + codegen(s, recv, VAL); + pop(); + if (idx <= 0xff) { + /* SDEF fusion: SCLASS + METHOD + DEF -> SDEF */ + genop_3(s, OP_SDEF, cursp(), sym, idx); + } + else { + genop_1(s, OP_SCLASS, cursp()); + push(); + genop_2(s, OP_METHOD, cursp(), idx); + push(); pop(); + pop(); + genop_2(s, OP_DEF, cursp(), sym); + } + if (val) push(); +} + + +static void +codegen(codegen_scope *s, node *tree, int val) +{ + int rlev = s->rlev; + + if (!tree) { + if (val) { + genop_1(s, OP_LOADNIL, cursp()); + push(); } - break; + return; + } - case NODE_SCOPE: - scope_body(s, tree, NOVAL); - break; + s->rlev++; + if (s->rlev > MRB_CODEGEN_LEVEL_MAX) { + codegen_error(s, "too complex expression"); + } - case NODE_FCALL: - case NODE_CALL: - gen_call(s, tree, val, 0); - break; - case NODE_SCALL: - gen_call(s, tree, val, 1); - break; + /* Check if this is a variable-sized node */ + /* For variable-sized nodes, get filename/lineno from the variable node header */ + struct mrb_ast_var_header *var_head = get_var_header(tree); - case NODE_DOT2: - codegen(s, tree->car, val); - codegen(s, tree->cdr, val); + if (s->irep && s->filename_index != var_head->filename_index) { + mrb_sym fname = mrb_parser_get_filename(s->parser, s->filename_index); + const char *filename = mrb_sym_name_len(s->mrb, fname, NULL); + + if (filename) { + mrb_debug_info_append_file(s->mrb, s->irep->debug_info, + filename, s->lines, s->debug_start_pos, s->pc); + } + s->debug_start_pos = s->pc; + s->filename_index = var_head->filename_index; + s->filename_sym = mrb_parser_get_filename(s->parser, var_head->filename_index); + } + s->lineno = var_head->lineno; + + /* Process variable-sized node directly */ + enum node_type var_type = (enum node_type)var_head->node_type; + + switch (var_type) { + case NODE_INT: if (val) { - pop(); pop(); - genop_1(s, OP_RANGE_INC, cursp()); + gen_int(s, cursp(), int_node(tree)->value); push(); } break; - case NODE_DOT3: - codegen(s, tree->car, val); - codegen(s, tree->cdr, val); + case NODE_BIGINT: if (val) { - pop(); pop(); - genop_1(s, OP_RANGE_EXC, cursp()); + char *str = bigint_node(tree)->string; + int base = bigint_node(tree)->base; + int off = new_litbint(s, str, base); + genop_2(s, OP_LOADL, cursp(), off); push(); } break; - case NODE_COLON2: + case NODE_SYM: { - int sym = new_sym(s, nsym(tree->cdr)); - - codegen(s, tree->car, VAL); - pop(); - genop_2(s, OP_GETMCNST, cursp(), sym); - if (val) push(); + int i = sym_idx(s, sym_node(tree)->symbol); + gen_load_op2(s, OP_LOADSYM, i, val); } break; - case NODE_COLON3: - { - int sym = new_sym(s, nsym(tree)); + case NODE_LVAR: + gen_lvar(s, var_node(tree)->symbol, val); + break; - genop_1(s, OP_OCLASS, cursp()); - genop_2(s, OP_GETMCNST, cursp(), sym); - if (val) push(); - } + case NODE_GVAR: + gen_xvar(s, var_node(tree)->symbol, val, OP_GETGV); break; - case NODE_ARRAY: - { - int n; + case NODE_IVAR: + gen_xvar(s, var_node(tree)->symbol, val, OP_GETIV); + break; - n = gen_values(s, tree, val, 0); - if (val) { - if (n >= 0) { - pop_n(n); - genop_2(s, OP_ARRAY, cursp(), n); - } - push(); - } - } + case NODE_CVAR: + gen_xvar(s, var_node(tree)->symbol, val, OP_GETCV); + break; + + case NODE_CALL: + codegen_call(s, tree, val); + break; + + case NODE_ARRAY: + codegen_array(s, tree, val); break; case NODE_HASH: - case NODE_KW_HASH: - { - int nk = gen_hash(s, tree, val, GEN_LIT_ARY_MAX); - if (val && nk >= 0) { - pop_n(nk*2); - genop_2(s, OP_HASH, cursp(), nk); - push(); - } - } + codegen_hash(s, tree, val); break; - case NODE_SPLAT: - codegen(s, tree, val); + case NODE_IF: + codegen_if(s, tree, val); break; - case NODE_ASGN: - gen_assignment(s, tree->car, tree->cdr, 0, val); + case NODE_WHILE: + codegen_loop(s, tree, val, FALSE); break; - case NODE_MASGN: - { - int len = 0, n = 0, post = 0; - node *t = tree->cdr, *p; - int rhs = cursp(); + case NODE_UNTIL: + codegen_loop(s, tree, val, TRUE); + break; - if (nint(t->car) == NODE_ARRAY && t->cdr && nosplat(t->cdr)) { - /* fixed rhs */ - t = t->cdr; - while (t) { - codegen(s, t->car, VAL); - len++; - t = t->cdr; - } - tree = tree->car; - if (tree->car) { /* pre */ - t = tree->car; - n = 0; - while (t) { - if (n < len) { - gen_assignment(s, t->car, NULL, rhs+n, NOVAL); - n++; - } - else { - genop_1(s, OP_LOADNIL, rhs+n); - gen_assignment(s, t->car, NULL, rhs+n, NOVAL); - } - t = t->cdr; + case NODE_FOR: + codegen_for(s, tree, val); + break; + + case NODE_CASE: + codegen_case(s, tree, val); + break; + + case NODE_CASE_MATCH: + codegen_case_match(s, tree, val); + break; + + case NODE_MATCH_PAT: + { + /* One-line pattern matching: expr in pattern / expr => pattern */ + struct mrb_ast_match_pat_node *mp = match_pat_node(tree); + int head; + int known_array_len = -1; + uint32_t fail_pos = JMPLINK_START; + + /* Optimize: for simple variable pattern, generate value directly into variable */ + if (node_type(mp->pattern) == NODE_PAT_VAR) { + struct mrb_ast_pat_var_node *pat_var = pat_var_node(mp->pattern); + if (pat_var->name) { + int idx = lv_idx(s, pat_var->name); + if (idx > 0) { + codegen(s, mp->value, VAL); + pop(); + gen_move(s, idx, cursp(), 0); /* peephole optimizes LOADI+MOVE */ + goto match_pat_push_result; } } - t = tree->cdr; - if (t) { - if (t->cdr) { /* post count */ - p = t->cdr->car; - while (p) { - post++; - p = p->cdr; - } + /* Wildcard pattern - just evaluate value for side effects */ + codegen(s, mp->value, NOVAL); + match_pat_push_result: + if (val) { + /* 'in' pattern returns true, '=>' pattern returns nil */ + if (mp->raise_on_fail) { + gen_load_nil(s, 1); } - if (t->car) { /* rest (len - pre - post) */ - int rn; + else { + genop_1(s, OP_LOADTRUE, cursp()); + push(); + } + } + break; + } - if (len < post + n) { - rn = 0; - } - else { - rn = len - post - n; - } - if (cursp() == rhs+n) { - genop_2(s, OP_ARRAY, cursp(), rn); + /* Optimize: array literal => array pattern with matching sizes */ + if (node_type(mp->value) == NODE_ARRAY && + node_type(mp->pattern) == NODE_PAT_ARRAY) { + struct mrb_ast_pat_array_node *pat = pat_array_node(mp->pattern); + /* Only optimize for exact match (no rest, no post) */ + if (pat->rest == 0 && pat->post == NULL) { + /* Count array elements (bail if splat present) and pattern pre elements */ + int arr_len = array_literal_known_len(mp->value); + int pat_len = 0; + node *e; + for (e = pat->pre; e; e = e->cdr) pat_len++; + if (arr_len >= 0 && arr_len == pat_len) { + /* Sizes match - skip deconstruct and size check */ + int arr_reg = cursp(); + int i = 0; + codegen(s, mp->value, VAL); /* Generate array */ + /* Extract elements directly with GETIDX */ + for (e = pat->pre; e; e = e->cdr, i++) { + gen_move(s, cursp(), arr_reg, 0); + push(); + gen_int(s, cursp(), i); + push(); + genop_1(s, OP_GETIDX, cursp() - 2); /* R[a] = R[a][R[a+1]] */ + pop(); + /* Match element pattern (element is now at cursp()-1) */ + codegen_pattern(s, e->car, cursp() - 1, &fail_pos, -1); + pop(); /* clean up array copy slot */ } - else { - genop_3(s, OP_ARRAY2, cursp(), rhs+n, rn); + pop(); /* pop array */ + if (fail_pos != JMPLINK_START) { + goto pattern_fail_handling; } - gen_assignment(s, t->car, NULL, cursp(), NOVAL); - n += rn; - } - if (t->cdr && t->cdr->car) { - t = t->cdr->car; - while (t) { - if (ncar, NULL, rhs+n, NOVAL); + /* Pattern always matches - push result if needed */ + if (val) { + if (mp->raise_on_fail) { + gen_load_nil(s, 1); /* '=>' pattern returns nil */ } else { - genop_1(s, OP_LOADNIL, cursp()); - gen_assignment(s, t->car, NULL, cursp(), NOVAL); + genop_1(s, OP_LOADTRUE, cursp()); /* 'in' pattern returns true */ + push(); } - t = t->cdr; - n++; } + break; } } - pop_n(len); - if (val) { - genop_2(s, OP_ARRAY, rhs, len); - push(); - } - } - else { - /* variable rhs */ - codegen(s, t, VAL); - gen_massignment(s, tree->car, rhs, val); - if (!val) { - pop(); - } } - } - break; - case NODE_OP_ASGN: - { - mrb_sym sym = nsym(tree->cdr->car); - mrb_int len; - const char *name = mrb_sym_name_len(s->mrb, sym, &len); - int idx, callargs = -1, vsp = -1; - - if ((len == 2 && name[0] == '|' && name[1] == '|') && - (nint(tree->car->car) == NODE_CONST || - nint(tree->car->car) == NODE_CVAR)) { - int catch_entry, begin, end; - int noexc, exc; - struct loopinfo *lp; - - lp = loop_push(s, LOOP_BEGIN); - lp->pc0 = new_label(s); - catch_entry = catch_handler_new(s); - begin = s->pc; - exc = cursp(); - codegen(s, tree->car, VAL); - end = s->pc; - noexc = genjmp_0(s, OP_JMP); - lp->type = LOOP_RESCUE; - catch_handler_set(s, catch_entry, MRB_CATCH_RESCUE, begin, end, s->pc); - genop_1(s, OP_EXCEPT, exc); - genop_1(s, OP_LOADF, exc); - dispatch(s, noexc); - loop_pop(s, NOVAL); - } - else if (nint(tree->car->car) == NODE_CALL) { - node *n = tree->car->cdr; - int base, i, nargs = 0; - callargs = 0; + head = cursp(); + + /* Check if value is array literal for optimization */ + known_array_len = array_literal_known_len(mp->value); + + /* Evaluate the value */ + codegen(s, mp->value, VAL); + + /* Generate pattern matching code */ + codegen_pattern(s, mp->pattern, head, &fail_pos, known_array_len); + pattern_fail_handling: + if (fail_pos != JMPLINK_START) { + /* Pattern can fail - generate failure handling code */ + uint32_t match_pos; + int saved_sp = cursp(); /* save stack pointer before branching */ + + /* Success path: pattern matched */ + pop(); /* pop the value */ if (val) { - vsp = cursp(); - push(); - } - codegen(s, n->car, VAL); /* receiver */ - idx = new_sym(s, nsym(n->cdr->car)); - base = cursp()-1; - if (n->cdr->cdr->car) { - nargs = gen_values(s, n->cdr->cdr->car->car, VAL, 13); - if (nargs >= 0) { - callargs = nargs; + /* 'in' pattern returns true, '=>' pattern returns nil */ + if (mp->raise_on_fail) { + gen_load_nil(s, 1); } - else { /* varargs */ + else { + genop_1(s, OP_LOADTRUE, cursp()); push(); - nargs = 1; - callargs = CALL_MAXARGS; } } - /* copy receiver and arguments */ - gen_move(s, cursp(), base, 1); - for (i=0; icar, VAL); - } - if (len == 2 && - ((name[0] == '|' && name[1] == '|') || - (name[0] == '&' && name[1] == '&'))) { - uint32_t pos; - pop(); - if (val) { - if (vsp >= 0) { - gen_move(s, vsp, cursp(), 1); - } - pos = genjmp2_0(s, name[0]=='|'?OP_JMPIF:OP_JMPNOT, cursp(), val); - } - else { - pos = genjmp2_0(s, name[0]=='|'?OP_JMPIF:OP_JMPNOT, cursp(), val); - } - codegen(s, tree->cdr->cdr->car, VAL); - pop(); - if (val && vsp >= 0) { - gen_move(s, vsp, cursp(), 1); - } - if (nint(tree->car->car) == NODE_CALL) { - if (callargs == CALL_MAXARGS) { - pop(); - genop_2(s, OP_ARYPUSH, cursp(), 1); - } - else { - pop_n(callargs); - callargs++; + /* Optimize: single JMPNOT can be replaced with MATCHERR for raise_on_fail */ + /* Conditions: (1) single entry in fail_pos chain, + * (2) JMPNOT is immediately before current position (no code between), and + * (3) the instruction is actually JMPNOT (not JMP from undefined pinned var) */ + if ((int32_t)(fail_pos + 2) + (int16_t)PEEK_S(s->iseq+fail_pos) == 0 && + fail_pos + 2 == s->pc && + s->iseq[fail_pos - 2] == OP_JMPNOT) { + if (mp->raise_on_fail) { + /* Replace JMPNOT(BS,4bytes) with MATCHERR(B,2bytes)+NOP+NOP; + * keep the same size so that any jump targeting s->pc stays valid */ + s->iseq[fail_pos - 2] = OP_MATCHERR; + /* fail_pos-1 already holds the register operand */ + s->iseq[fail_pos] = OP_NOP; + s->iseq[fail_pos + 1] = OP_NOP; + s->sp = saved_sp - 1; + if (val) push(); + break; /* Pattern matching complete */ } - pop(); - idx = new_sym(s, attrsym(s, nsym(tree->car->cdr->cdr->car))); - genop_3(s, OP_SEND, cursp(), idx, callargs); + /* Single failure point with 'in' pattern - invert JMPNOT to JMPIF */ + s->iseq[fail_pos - 2] = OP_JMPIF; + match_pos = fail_pos; } else { - gen_assignment(s, tree->car, NULL, cursp(), val); + /* Multiple failure points - need JMP to skip error handling */ + match_pos = genjmp(s, OP_JMP, JMPLINK_START); + dispatch_linked(s, fail_pos); } - dispatch(s, pos); - goto exit; - } - codegen(s, tree->cdr->cdr->car, VAL); - push(); pop(); - pop(); pop(); - if (len == 1 && name[0] == '+') { - gen_addsub(s, OP_ADD, cursp()); - } - else if (len == 1 && name[0] == '-') { - gen_addsub(s, OP_SUB, cursp()); - } - else if (len == 1 && name[0] == '*') { - genop_1(s, OP_MUL, cursp()); - } - else if (len == 1 && name[0] == '/') { - genop_1(s, OP_DIV, cursp()); - } - else if (len == 1 && name[0] == '<') { - genop_1(s, OP_LT, cursp()); - } - else if (len == 2 && name[0] == '<' && name[1] == '=') { - genop_1(s, OP_LE, cursp()); - } - else if (len == 1 && name[0] == '>') { - genop_1(s, OP_GT, cursp()); - } - else if (len == 2 && name[0] == '>' && name[1] == '=') { - genop_1(s, OP_GE, cursp()); - } - else { - idx = new_sym(s, sym); - genop_3(s, OP_SEND, cursp(), idx, 1); - } - if (callargs < 0) { - gen_assignment(s, tree->car, NULL, cursp(), val); - } - else { - if (val && vsp >= 0) { - gen_move(s, vsp, cursp(), 0); - } - if (callargs == CALL_MAXARGS) { - pop(); - genop_2(s, OP_ARYPUSH, cursp(), 1); + /* Failure path: restore stack pointer (value still on stack at runtime) */ + s->sp = saved_sp; + pop(); /* pop the value */ + if (mp->raise_on_fail) { + /* expr => pattern: raise NoMatchingPatternError */ + genop_1(s, OP_LOADFALSE, cursp()); /* Load false for MATCHERR */ + genop_1(s, OP_MATCHERR, cursp()); } else { - pop_n(callargs); - callargs++; - } - pop(); - idx = new_sym(s, attrsym(s,nsym(tree->car->cdr->cdr->car))); - genop_3(s, OP_SEND, cursp(), idx, callargs); - } - } - break; - - case NODE_SUPER: - { - codegen_scope *s2 = s; - int lv = 0; - int n = 0, nk = 0, st = 0; - - push(); - while (!s2->mscope) { - lv++; - s2 = s2->prev; - if (!s2) break; - } - if (tree) { - node *args = tree->car; - if (args) { - st = n = gen_values(s, args, VAL, 14); - if (n < 0) { - st = 1; n = 15; + /* expr in pattern: return false */ + if (val) { + genop_1(s, OP_LOADFALSE, cursp()); push(); } } - /* keyword arguments */ - if (tree->cdr->car) { - nk = gen_hash(s, tree->cdr->car->cdr, VAL, 14); - if (nk < 0) {st++; nk = 15;} - else st += nk*2; - n |= nk<<4; - } - /* block arguments */ - if (tree->cdr->cdr) { - codegen(s, tree->cdr->cdr, VAL); - } - else if (s2) gen_blkmove(s, s2->ainfo, lv); - else { - genop_1(s, OP_LOADNIL, cursp()); - push(); - } - } - else { - if (s2) gen_blkmove(s, s2->ainfo, lv); - else { - genop_1(s, OP_LOADNIL, cursp()); - push(); - } - } - st++; - pop_n(st+1); - genop_2(s, OP_SUPER, cursp(), n); - if (val) push(); - } - break; - - case NODE_ZSUPER: - { - codegen_scope *s2 = s; - int lv = 0; - uint16_t ainfo = 0; - int n = CALL_MAXARGS; - int sp = cursp(); - push(); /* room for receiver */ - while (!s2->mscope) { - lv++; - s2 = s2->prev; - if (!s2) break; - } - if (s2 && s2->ainfo > 0) { - ainfo = s2->ainfo; - } - if (ainfo > 0) { - genop_2S(s, OP_ARGARY, cursp(), (ainfo<<4)|(lv & 0xf)); - push(); push(); push(); /* ARGARY pushes 3 values at most */ - pop(); pop(); pop(); - /* keyword arguments */ - if (ainfo & 0x1) { - n |= CALL_MAXARGS<<4; - push(); - } - /* block argument */ - if (tree && tree->cdr && tree->cdr->cdr) { - push(); - codegen(s, tree->cdr->cdr, VAL); - } + /* End of pattern matching */ + dispatch(s, match_pos); + /* Restore sp to match success path value */ + s->sp = saved_sp - 1; + if (val) push(); } else { - /* block argument */ - if (tree && tree->cdr && tree->cdr->cdr) { - codegen(s, tree->cdr->cdr, VAL); - } - else { - gen_blkmove(s, 0, lv); + /* Pattern always matches - pop value and push result if needed */ + pop(); /* pop the value */ + if (val) { + if (mp->raise_on_fail) { + gen_load_nil(s, 1); /* '=>' pattern returns nil */ + } + else { + genop_1(s, OP_LOADTRUE, cursp()); /* 'in' pattern returns true */ + push(); + } } - n = 0; } - s->sp = sp; - genop_2(s, OP_SUPER, cursp(), n); - if (val) push(); - } - break; - - case NODE_RETURN: - if (tree) { - gen_retval(s, tree); } - else { - genop_1(s, OP_LOADNIL, cursp()); - } - if (s->loop) { - gen_return(s, OP_RETURN_BLK, cursp()); - } - else { - gen_return(s, OP_RETURN, cursp()); - } - if (val) push(); break; - case NODE_YIELD: - { - codegen_scope *s2 = s; - int lv = 0, ainfo = -1; - int n = 0, sendv = 0; - - while (!s2->mscope) { - lv++; - s2 = s2->prev; - if (!s2) break; - } - if (s2) { - ainfo = (int)s2->ainfo; - } - if (ainfo < 0) codegen_error(s, "invalid yield (SyntaxError)"); - push(); - if (tree) { - n = gen_values(s, tree, VAL, 14); - if (n < 0) { - n = sendv = 1; - push(); - } - } - push();pop(); /* space for a block */ - pop_n(n+1); - genop_2S(s, OP_BLKPUSH, cursp(), (ainfo<<4)|(lv & 0xf)); - if (sendv) n = CALL_MAXARGS; - genop_3(s, OP_SEND, cursp(), new_sym(s, MRB_SYM_2(s->mrb, call)), n); - if (val) push(); - } + case NODE_DEF: + codegen_def(s, tree, val); break; - case NODE_BREAK: - loop_break(s, tree); - if (val) push(); + case NODE_CLASS: + codegen_class(s, tree, val); break; - case NODE_NEXT: - if (!s->loop) { - raise_error(s, "unexpected next"); - } - else if (s->loop->type == LOOP_NORMAL) { - codegen(s, tree, NOVAL); - genjmp(s, OP_JMPUW, s->loop->pc0); - } - else { - if (tree) { - codegen(s, tree, VAL); - pop(); - } - else { - genop_1(s, OP_LOADNIL, cursp()); - } - gen_return(s, OP_RETURN, cursp()); - } - if (val) push(); + case NODE_MODULE: + codegen_module(s, tree, val); break; - case NODE_REDO: - if (!s->loop || s->loop->type == LOOP_BEGIN || s->loop->type == LOOP_RESCUE) { - raise_error(s, "unexpected redo"); - } - else { - genjmp(s, OP_JMPUW, s->loop->pc1); - } - if (val) push(); + case NODE_SCLASS: + codegen_sclass(s, tree, val); break; - case NODE_RETRY: - { - const char *msg = "unexpected retry"; - const struct loopinfo *lp = s->loop; - - while (lp && lp->type != LOOP_RESCUE) { - lp = lp->prev; - } - if (!lp) { - raise_error(s, msg); - } - else { - genjmp(s, OP_JMPUW, lp->pc0); - } - if (val) push(); - } + case NODE_ASGN: + codegen_asgn(s, tree, val); break; - case NODE_LVAR: - if (val) { - int idx = lv_idx(s, nsym(tree)); - - if (idx > 0) { - gen_move(s, cursp(), idx, val); - } - else { - gen_getupvar(s, cursp(), nsym(tree)); - } - push(); - } + case NODE_MASGN: + codegen_masgn(s, tree, NULL, 0, val); break; - case NODE_NVAR: - if (val) { - int idx = nint(tree); - - gen_move(s, cursp(), idx, val); - - push(); - } + case NODE_MARG: + /* Parameter destructuring should be handled inline by lambda_body */ + /* This case should not be reached in normal execution */ break; - case NODE_GVAR: - { - int sym = new_sym(s, nsym(tree)); - - genop_2(s, OP_GETGV, cursp(), sym); - if (val) push(); - } + case NODE_OP_ASGN: + codegen_op_asgn(s, tree, val); break; - case NODE_IVAR: - { - int sym = new_sym(s, nsym(tree)); - - genop_2(s, OP_GETIV, cursp(), sym); - if (val) push(); - } + case NODE_AND: + codegen_and(s, tree, val); break; - case NODE_CVAR: - { - int sym = new_sym(s, nsym(tree)); - - genop_2(s, OP_GETCV, cursp(), sym); - if (val) push(); - } + case NODE_OR: + codegen_or(s, tree, val); break; - case NODE_CONST: - { - int sym = new_sym(s, nsym(tree)); - - genop_2(s, OP_GETCONST, cursp(), sym); - if (val) push(); - } + case NODE_RETURN: + codegen_return(s, tree, val); break; - case NODE_BACK_REF: - if (val) { - char buf[] = {'$', nchar(tree)}; - int sym = new_sym(s, mrb_intern(s->mrb, buf, sizeof(buf))); - - genop_2(s, OP_GETGV, cursp(), sym); - push(); - } + case NODE_YIELD: + codegen_yield(s, tree, val); break; - case NODE_NTH_REF: - if (val) { - mrb_state *mrb = s->mrb; - mrb_value str; - int sym; - - str = mrb_format(mrb, "$%d", nint(tree)); - sym = new_sym(s, mrb_intern_str(mrb, str)); - genop_2(s, OP_GETGV, cursp(), sym); - push(); - } + case NODE_SUPER: + codegen_super(s, tree, val); break; - case NODE_ARG: - /* should not happen */ + case NODE_STR: + codegen_str(s, tree, val); break; - case NODE_BLOCK_ARG: - if (!tree) { - int idx = lv_idx(s, MRB_OPSYM_2(s->mrb, and)); - - if (idx == 0) { - gen_getupvar(s, cursp(), MRB_OPSYM_2(s->mrb, and)); - } - else { - gen_move(s, cursp(), idx, val); - } - if (val) push(); - } - else { - codegen(s, tree, val); - } + case NODE_DOT2: + codegen_range(s, tree, val, FALSE); break; - case NODE_INT: - if (val) { - char *p = (char*)tree->car; - int base = nint(tree->cdr->car); - mrb_int i; - mrb_bool overflow; - - i = readint(s, p, base, FALSE, &overflow); - if (overflow) { - int off = new_litbint(s, p, base, FALSE); - genop_2(s, OP_LOADL, cursp(), off); - } - else { - gen_int(s, cursp(), i); - } - push(); - } + case NODE_DOT3: + codegen_range(s, tree, val, TRUE); break; -#ifndef MRB_NO_FLOAT case NODE_FLOAT: - if (val) { - char *p = (char*)tree; - mrb_float f = mrb_float_read(p, NULL); - int off = new_lit_float(s, f); - - genop_2(s, OP_LOADL, cursp(), off); - push(); - } + codegen_float(s, tree, val); break; -#endif - case NODE_NEGATE: - { - nt = nint(tree->car); - switch (nt) { -#ifndef MRB_NO_FLOAT - case NODE_FLOAT: - if (val) { - char *p = (char*)tree->cdr; - mrb_float f = mrb_float_read(p, NULL); - int off = new_lit_float(s, -f); + case NODE_SELF: + codegen_self(s, tree, val); + break; - genop_2(s, OP_LOADL, cursp(), off); - push(); - } - break; -#endif + case NODE_NIL: + codegen_nil(s, tree, val); + break; - case NODE_INT: - if (val) { - char *p = (char*)tree->cdr->car; - int base = nint(tree->cdr->cdr->car); - mrb_int i; - mrb_bool overflow; - - i = readint(s, p, base, TRUE, &overflow); - if (overflow) { - int off = new_litbint(s, p, base, TRUE); - genop_2(s, OP_LOADL, cursp(), off); - } - else { - gen_int(s, cursp(), i); - } - push(); - } - break; + case NODE_TRUE: + codegen_true(s, tree, val); + break; - default: - if (val) { - codegen(s, tree, VAL); - pop(); - push_n(2);pop_n(2); /* space for receiver&block */ - mrb_sym minus = MRB_OPSYM_2(s->mrb, minus); - if (!gen_uniop(s, minus, cursp())) { - genop_3(s, OP_SEND, cursp(), new_sym(s, minus), 0); - } - push(); - } - else { - codegen(s, tree, NOVAL); - } - break; - } - } + case NODE_FALSE: + codegen_false(s, tree, val); break; - case NODE_STR: - if (val) { - char *p = (char*)tree->car; - mrb_int len = nint(tree->cdr); - int off = new_lit_str(s, p, len); + case NODE_CONST: + codegen_const(s, tree, val); + break; - genop_2(s, OP_STRING, cursp(), off); - push(); - } + case NODE_RESCUE: + codegen_rescue(s, tree, val); break; - case NODE_HEREDOC: - tree = ((struct mrb_parser_heredoc_info*)tree)->doc; - /* fall through */ - case NODE_DSTR: - if (val) { - node *n = tree; + case NODE_BLOCK: + codegen_block(s, tree, val); + break; - if (!n) { - genop_1(s, OP_LOADNIL, cursp()); - push(); - break; - } - codegen(s, n->car, VAL); - n = n->cdr; - while (n) { - codegen(s, n->car, VAL); - pop(); pop(); - genop_1(s, OP_STRCAT, cursp()); - push(); - n = n->cdr; - } - } - else { - node *n = tree; + case NODE_BREAK: + codegen_break(s, tree, val); + break; - while (n) { - if (nint(n->car->car) != NODE_STR) { - codegen(s, n->car, NOVAL); - } - n = n->cdr; - } - } + case NODE_NEXT: + codegen_next(s, tree, val); break; - case NODE_WORDS: - gen_literal_array(s, tree, FALSE, val); + case NODE_REDO: + codegen_redo(s, tree, val); break; - case NODE_SYMBOLS: - gen_literal_array(s, tree, TRUE, val); + case NODE_RETRY: + codegen_retry(s, tree, val); break; - case NODE_DXSTR: - { - node *n; - int sym = new_sym(s, MRB_SYM_2(s->mrb, Kernel)); + case NODE_WHILE_MOD: + codegen_loop_mod(s, tree, val, FALSE); + break; - genop_1(s, OP_LOADSELF, cursp()); - push(); - codegen(s, tree->car, VAL); - n = tree->cdr; - while (n) { - if (nint(n->car->car) == NODE_XSTR) { - n->car->car = (struct mrb_ast_node*)(intptr_t)NODE_STR; - mrb_assert(!n->cdr); /* must be the end */ - } - codegen(s, n->car, VAL); - pop(); pop(); - genop_1(s, OP_STRCAT, cursp()); - push(); - n = n->cdr; - } - push(); /* for block */ - pop_n(3); - sym = new_sym(s, MRB_OPSYM_2(s->mrb, tick)); /* ` */ - genop_3(s, OP_SEND, cursp(), sym, 1); - if (val) push(); - } + case NODE_UNTIL_MOD: + codegen_loop_mod(s, tree, val, TRUE); break; case NODE_XSTR: - { - char *p = (char*)tree->car; - mrb_int len = nint(tree->cdr); - int off = new_lit_str(s, p, len); - int sym; - - genop_1(s, OP_LOADSELF, cursp()); - push(); - genop_2(s, OP_STRING, cursp(), off); - push(); push(); - pop_n(3); - sym = new_sym(s, MRB_OPSYM_2(s->mrb, tick)); /* ` */ - genop_3(s, OP_SEND, cursp(), sym, 1); - if (val) push(); - } + codegen_xstr(s, tree, val); break; case NODE_REGX: - if (val) { - char *p1 = (char*)tree->car; - char *p2 = (char*)tree->cdr->car; - char *p3 = (char*)tree->cdr->cdr; - int sym = new_sym(s, mrb_intern_lit(s->mrb, REGEXP_CLASS)); - int off = new_lit_cstr(s, p1); - int argc = 1; - - genop_1(s, OP_OCLASS, cursp()); - genop_2(s, OP_GETMCNST, cursp(), sym); - push(); - genop_2(s, OP_STRING, cursp(), off); - push(); - if (p2 || p3) { - if (p2) { /* opt */ - off = new_lit_cstr(s, p2); - genop_2(s, OP_STRING, cursp(), off); - } - else { - genop_1(s, OP_LOADNIL, cursp()); - } - push(); - argc++; - if (p3) { /* enc */ - off = new_lit_str(s, p3, 1); - genop_2(s, OP_STRING, cursp(), off); - push(); - argc++; - } - } - push(); /* space for a block */ - pop_n(argc+2); - sym = new_sym(s, MRB_SYM_2(s->mrb, compile)); - genop_3(s, OP_SEND, cursp(), sym, argc); - push(); - } + codegen_regx(s, tree, val); break; - case NODE_DREGX: - if (val) { - node *n = tree->car; - int sym = new_sym(s, mrb_intern_lit(s->mrb, REGEXP_CLASS)); - int argc = 1; - int off; - char *p; - - genop_1(s, OP_OCLASS, cursp()); - genop_2(s, OP_GETMCNST, cursp(), sym); - push(); - codegen(s, n->car, VAL); - n = n->cdr; - while (n) { - codegen(s, n->car, VAL); - pop(); pop(); - genop_1(s, OP_STRCAT, cursp()); - push(); - n = n->cdr; - } - n = tree->cdr->cdr; - if (n->car) { /* tail */ - p = (char*)n->car; - off = new_lit_cstr(s, p); - codegen(s, tree->car, VAL); - genop_2(s, OP_STRING, cursp(), off); - pop(); - genop_1(s, OP_STRCAT, cursp()); - push(); - } - if (n->cdr->car) { /* opt */ - char *p2 = (char*)n->cdr->car; - off = new_lit_cstr(s, p2); - genop_2(s, OP_STRING, cursp(), off); - push(); - argc++; - } - if (n->cdr->cdr) { /* enc */ - char *p2 = (char*)n->cdr->cdr; - off = new_lit_cstr(s, p2); - genop_2(s, OP_STRING, cursp(), off); - push(); - argc++; - } - push(); /* space for a block */ - pop_n(argc+2); - sym = new_sym(s, MRB_SYM_2(s->mrb, compile)); - genop_3(s, OP_SEND, cursp(), sym, argc); - push(); - } - else { - node *n = tree->car; + case NODE_HEREDOC: + codegen_heredoc(s, tree, val); + break; - while (n) { - if (nint(n->car->car) != NODE_STR) { - codegen(s, n->car, NOVAL); - } - n = n->cdr; - } - } + case NODE_DSYM: + codegen_dsym(s, tree, val); break; - case NODE_SYM: - if (val) { - int sym = new_sym(s, nsym(tree)); + case NODE_NTH_REF: + codegen_nth_ref(s, tree, val); + break; - genop_2(s, OP_LOADSYM, cursp(), sym); - push(); - } + case NODE_BACK_REF: + codegen_back_ref(s, tree, val); break; - case NODE_DSYM: - codegen(s, tree, val); - if (val) { - gen_intern(s); - } + case NODE_NVAR: + codegen_nvar(s, tree, val); break; - case NODE_SELF: - if (val) { - genop_1(s, OP_LOADSELF, cursp()); - push(); - } + case NODE_DVAR: + codegen_dvar(s, tree, val); break; - case NODE_NIL: - if (val) { - genop_1(s, OP_LOADNIL, cursp()); - push(); - } + case NODE_NOT: + codegen_not(s, tree, val); break; - case NODE_TRUE: - if (val) { - genop_1(s, OP_LOADT, cursp()); - push(); - } + case NODE_NEGATE: + codegen_negate(s, tree, val); break; - case NODE_FALSE: - if (val) { - genop_1(s, OP_LOADF, cursp()); - push(); - } + case NODE_COLON2: + codegen_colon2(s, tree, val); break; - case NODE_ALIAS: - { - int a = new_sym(s, nsym(tree->car)); - int b = new_sym(s, nsym(tree->cdr)); + case NODE_COLON3: + codegen_colon3(s, tree, val); + break; - genop_2(s, OP_ALIAS, a, b); - if (val) { - genop_1(s, OP_LOADNIL, cursp()); - push(); - } - } - break; + case NODE_DEFINED: + codegen_defined(s, tree, val); + break; - case NODE_UNDEF: - { - node *t = tree; + case NODE_ZSUPER: + codegen_zsuper(s, tree, val); + break; - while (t) { - int symbol = new_sym(s, nsym(t->car)); - genop_1(s, OP_UNDEF, symbol); - t = t->cdr; - } - if (val) { - genop_1(s, OP_LOADNIL, cursp()); - push(); - } - } + case NODE_LAMBDA: + codegen_lambda(s, tree, val); break; - case NODE_CLASS: - { - int idx; - node *body; + case NODE_WORDS: + codegen_words(s, tree, val); + break; - if (tree->car->car == (node*)0) { - genop_1(s, OP_LOADNIL, cursp()); - push(); - } - else if (tree->car->car == (node*)1) { - genop_1(s, OP_OCLASS, cursp()); - push(); - } - else { - codegen(s, tree->car->car, VAL); - } - if (tree->cdr->car) { - codegen(s, tree->cdr->car, VAL); - } - else { - genop_1(s, OP_LOADNIL, cursp()); - push(); - } - pop(); pop(); - idx = new_sym(s, nsym(tree->car->cdr)); - genop_2(s, OP_CLASS, cursp(), idx); - body = tree->cdr->cdr->car; - if (nint(body->cdr->car) == NODE_BEGIN && body->cdr->cdr == NULL) { - genop_1(s, OP_LOADNIL, cursp()); - } - else { - idx = scope_body(s, body, val); - genop_2(s, OP_EXEC, cursp(), idx); - } - if (val) { - push(); - } - } + case NODE_SYMBOLS: + codegen_symbols(s, tree, val); break; - case NODE_MODULE: - { - int idx; + case NODE_SPLAT: + codegen_splat(s, tree, val); + break; - if (tree->car->car == (node*)0) { - genop_1(s, OP_LOADNIL, cursp()); - push(); - } - else if (tree->car->car == (node*)1) { - genop_1(s, OP_OCLASS, cursp()); - push(); - } - else { - codegen(s, tree->car->car, VAL); - } - pop(); - idx = new_sym(s, nsym(tree->car->cdr)); - genop_2(s, OP_MODULE, cursp(), idx); - if (nint(tree->cdr->car->cdr->car) == NODE_BEGIN && - tree->cdr->car->cdr->cdr == NULL) { - genop_1(s, OP_LOADNIL, cursp()); - } - else { - idx = scope_body(s, tree->cdr->car, val); - genop_2(s, OP_EXEC, cursp(), idx); - } - if (val) { - push(); - } - } + case NODE_BLOCK_ARG: + codegen_block_arg(s, tree, val); break; - case NODE_SCLASS: - { - int idx; + case NODE_SCOPE: + codegen_scope_node(s, tree, val); + break; - codegen(s, tree->car, VAL); - pop(); - genop_1(s, OP_SCLASS, cursp()); - if (nint(tree->cdr->car->cdr->car) == NODE_BEGIN && - tree->cdr->car->cdr->cdr == NULL) { - genop_1(s, OP_LOADNIL, cursp()); - } - else { - idx = scope_body(s, tree->cdr->car, val); - genop_2(s, OP_EXEC, cursp(), idx); - } - if (val) { - push(); - } - } + case NODE_BEGIN: + codegen_begin(s, tree, val); break; - case NODE_DEF: - { - int sym = new_sym(s, nsym(tree->car)); - int idx = lambda_body(s, tree->cdr, 0); + case NODE_ENSURE: + codegen_ensure(s, tree, val); + break; - genop_1(s, OP_TCLASS, cursp()); - push(); - genop_2(s, OP_METHOD, cursp(), idx); - push(); pop(); - pop(); - genop_2(s, OP_DEF, cursp(), sym); - if (val) push(); - } + case NODE_STMTS: + codegen_stmts(s, tree, val); break; - case NODE_SDEF: - { - node *recv = tree->car; - int sym = new_sym(s, nsym(tree->cdr->car)); - int idx = lambda_body(s, tree->cdr->cdr, 0); + case NODE_ALIAS: + codegen_alias(s, tree, val); + break; - codegen(s, recv, VAL); - pop(); - genop_1(s, OP_SCLASS, cursp()); - push(); - genop_2(s, OP_METHOD, cursp(), idx); - push(); pop(); - pop(); - genop_2(s, OP_DEF, cursp(), sym); - if (val) push(); - } + case NODE_UNDEF: + codegen_undef(s, tree, val); break; case NODE_POSTEXE: - codegen(s, tree, NOVAL); + { + struct mrb_ast_postexe_node *postexe = postexe_node(tree); + codegen(s, postexe->body, NOVAL); + } + break; + + case NODE_SDEF: + codegen_sdef(s, tree, val); break; default: + /* Unhandled variable-sized node type - should not occur with current AST */ break; } - exit: s->rlev = rlev; } @@ -3814,7 +6965,7 @@ scope_add_irep(codegen_scope *s) s->irep = irep = mrb_add_irep(s->mrb); if (prev->irep->rlen == prev->rcapa) { prev->rcapa *= 2; - prev->reps = (mrb_irep**)codegen_realloc(s, prev->reps, sizeof(mrb_irep*)*prev->rcapa); + prev->reps = (mrb_irep**)mrbc_realloc(prev->reps, sizeof(mrb_irep*)*prev->rcapa); } prev->reps[prev->irep->rlen] = irep; prev->irep->rlen++; @@ -3825,8 +6976,8 @@ static codegen_scope* scope_new(mrb_state *mrb, codegen_scope *prev, node *nlv) { static const codegen_scope codegen_scope_zero = { 0 }; - mrb_pool *pool = mrb_pool_open(mrb); - codegen_scope *s = (codegen_scope*)mrb_pool_alloc(pool, sizeof(codegen_scope)); + mempool *pool = mempool_open(); + codegen_scope *s = (codegen_scope*)mempool_alloc(pool, sizeof(codegen_scope)); if (!s) { if (prev) @@ -3844,26 +6995,26 @@ scope_new(mrb_state *mrb, codegen_scope *prev, node *nlv) scope_add_irep(s); s->rcapa = 8; - s->reps = (mrb_irep**)mrb_malloc(mrb, sizeof(mrb_irep*)*s->rcapa); + s->reps = (mrb_irep**)mrbc_malloc(sizeof(mrb_irep*)*s->rcapa); s->icapa = 1024; - s->iseq = (mrb_code*)mrb_malloc(mrb, sizeof(mrb_code)*s->icapa); + s->iseq = (mrb_code*)mrbc_malloc(sizeof(mrb_code)*s->icapa); s->pcapa = 32; - s->pool = (mrb_pool_value*)mrb_malloc(mrb, sizeof(mrb_pool_value)*s->pcapa); + s->pool = (mrb_irep_pool*)mrbc_malloc(sizeof(mrb_irep_pool)*s->pcapa); s->scapa = 256; - s->syms = (mrb_sym*)mrb_malloc(mrb, sizeof(mrb_sym)*s->scapa); + s->syms = (mrb_sym*)mrbc_malloc(sizeof(mrb_sym)*s->scapa); s->lv = nlv; - s->sp += node_len(nlv)+1; /* add self */ - s->nlocals = s->sp; + s->sp += (nlv ? node_len(nlv) : 0) + 1; /* add self */ + s->nlocals = s->nregs = s->sp; if (nlv) { mrb_sym *lv; node *n = nlv; size_t i = 0; - s->irep->lv = lv = (mrb_sym*)mrb_malloc(mrb, sizeof(mrb_sym)*(s->nlocals-1)); + s->irep->lv = lv = (mrb_sym*)mrbc_malloc(sizeof(mrb_sym)*(s->nlocals-1)); for (i=0, n=nlv; n; i++,n=n->cdr) { lv[i] = lv_name(n); } @@ -3873,7 +7024,7 @@ scope_new(mrb_state *mrb, codegen_scope *prev, node *nlv) s->filename_sym = prev->filename_sym; if (s->filename_sym) { - s->lines = (uint16_t*)mrb_malloc(mrb, sizeof(short)*s->icapa); + s->lines = (uint16_t*)mrbc_malloc(sizeof(short)*s->icapa); } s->lineno = prev->lineno; @@ -3905,7 +7056,7 @@ scope_finish(codegen_scope *s) irep->flags = 0; if (s->iseq) { size_t catchsize = sizeof(struct mrb_irep_catch_handler) * irep->clen; - irep->iseq = (const mrb_code*)codegen_realloc(s, s->iseq, sizeof(mrb_code)*s->pc + catchsize); + irep->iseq = (const mrb_code*)mrbc_realloc(s->iseq, sizeof(mrb_code)*s->pc + catchsize); irep->ilen = s->pc; if (irep->clen > 0) { memcpy((void*)(irep->iseq + irep->ilen), s->catch_table, catchsize); @@ -3914,11 +7065,11 @@ scope_finish(codegen_scope *s) else { irep->clen = 0; } - mrb_free(s->mrb, s->catch_table); + mrbc_free(s->catch_table); s->catch_table = NULL; - irep->pool = (const mrb_pool_value*)codegen_realloc(s, s->pool, sizeof(mrb_pool_value)*irep->plen); - irep->syms = (const mrb_sym*)codegen_realloc(s, s->syms, sizeof(mrb_sym)*irep->slen); - irep->reps = (const mrb_irep**)codegen_realloc(s, s->reps, sizeof(mrb_irep*)*irep->rlen); + irep->pool = (const mrb_irep_pool*)mrbc_realloc(s->pool, sizeof(mrb_irep_pool)*irep->plen); + irep->syms = (const mrb_sym*)mrbc_realloc(s->syms, sizeof(mrb_sym)*irep->slen); + irep->reps = (const mrb_irep**)mrbc_realloc(s->reps, sizeof(mrb_irep*)*irep->rlen); if (s->filename_sym) { mrb_sym fname = mrb_parser_get_filename(s->parser, s->filename_index); const char *filename = mrb_sym_name_len(s->mrb, fname, NULL); @@ -3926,13 +7077,13 @@ scope_finish(codegen_scope *s) mrb_debug_info_append_file(s->mrb, s->irep->debug_info, filename, s->lines, s->debug_start_pos, s->pc); } - mrb_free(s->mrb, s->lines); + mrbc_free(s->lines); irep->nlocals = s->nlocals; irep->nregs = s->nregs; mrb_gc_arena_restore(mrb, s->ai); - mrb_pool_close(s->mpool); + mempool_close(s->mpool); } static struct loopinfo* @@ -3959,7 +7110,6 @@ loop_break(codegen_scope *s, node *tree) else { struct loopinfo *loop; - loop = s->loop; if (tree) { if (loop->reg < 0) { @@ -4023,8 +7173,8 @@ static int catch_handler_new(codegen_scope *s) { size_t newsize = sizeof(struct mrb_irep_catch_handler) * (s->irep->clen + 1); - s->catch_table = (struct mrb_irep_catch_handler*)codegen_realloc(s, (void*)s->catch_table, newsize); - return s->irep->clen ++; + s->catch_table = (struct mrb_irep_catch_handler*)mrbc_realloc((void*)s->catch_table, newsize); + return s->irep->clen++; } static void @@ -4061,7 +7211,7 @@ generate_code(mrb_state *mrb, parser_state *p, int val) codegen(scope, p->tree, val); proc = mrb_proc_new(mrb, scope->irep); mrb_irep_decref(mrb, scope->irep); - mrb_pool_close(scope->mpool); + mempool_close(scope->mpool); proc->c = NULL; if (mrb->c->cibase && mrb->c->cibase->proc == proc->upper) { proc->upper = NULL; @@ -4071,31 +7221,16 @@ generate_code(mrb_state *mrb, parser_state *p, int val) } MRB_CATCH(mrb->jmp) { mrb_irep_decref(mrb, scope->irep); - mrb_pool_close(scope->mpool); + mempool_close(scope->mpool); mrb->jmp = prev_jmp; return NULL; } MRB_END_EXC(mrb->jmp); } + MRB_API struct RProc* mrb_generate_code(mrb_state *mrb, parser_state *p) { - return generate_code(mrb, p, VAL); -} - -void -mrb_irep_remove_lv(mrb_state *mrb, mrb_irep *irep) -{ - int i; - - if (irep->flags & MRB_IREP_NO_FREE) return; - if (irep->lv) { - mrb_free(mrb, (void*)irep->lv); - irep->lv = NULL; - } - if (!irep->reps) return; - for (i = 0; i < irep->rlen; ++i) { - mrb_irep_remove_lv(mrb, (mrb_irep*)irep->reps[i]); - } + return generate_code(mrb, p, p->no_return_value ? NOVAL : VAL); } diff --git a/mrbgems/mruby-compiler/core/node.h b/mrbgems/mruby-compiler/core/node.h index a57b7bdf72..beb507bbc8 100644 --- a/mrbgems/mruby-compiler/core/node.h +++ b/mrbgems/mruby-compiler/core/node.h @@ -8,20 +8,20 @@ #define MRUBY_COMPILER_NODE_H enum node_type { - NODE_METHOD, NODE_SCOPE, NODE_BLOCK, NODE_IF, NODE_CASE, - NODE_WHEN, NODE_WHILE, NODE_UNTIL, - NODE_ITER, + NODE_WHILE_MOD, + NODE_UNTIL_MOD, NODE_FOR, NODE_BREAK, NODE_NEXT, NODE_REDO, NODE_RETRY, + NODE_STMTS, NODE_BEGIN, NODE_RESCUE, NODE_ENSURE, @@ -29,20 +29,15 @@ enum node_type { NODE_OR, NODE_NOT, NODE_MASGN, + NODE_MARG, NODE_ASGN, - NODE_CDECL, - NODE_CVASGN, - NODE_CVDECL, NODE_OP_ASGN, NODE_CALL, - NODE_SCALL, - NODE_FCALL, NODE_SUPER, NODE_ZSUPER, NODE_ARRAY, NODE_ZARRAY, NODE_HASH, - NODE_KW_HASH, NODE_RETURN, NODE_YIELD, NODE_LVAR, @@ -54,26 +49,16 @@ enum node_type { NODE_NVAR, NODE_NTH_REF, NODE_BACK_REF, - NODE_MATCH, NODE_INT, + NODE_BIGINT, NODE_FLOAT, NODE_NEGATE, NODE_LAMBDA, NODE_SYM, NODE_STR, - NODE_DSTR, NODE_XSTR, - NODE_DXSTR, NODE_REGX, - NODE_DREGX, - NODE_DREGX_ONCE, - NODE_ARG, - NODE_ARGS_TAIL, - NODE_KW_ARG, - NODE_KW_REST_ARGS, NODE_SPLAT, - NODE_TO_ARY, - NODE_SVALUE, NODE_BLOCK_ARG, NODE_DEF, NODE_SDEF, @@ -94,10 +79,706 @@ enum node_type { NODE_POSTEXE, NODE_DSYM, NODE_HEREDOC, - NODE_LITERAL_DELIM, NODE_WORDS, NODE_SYMBOLS, + /* Pattern matching nodes */ + NODE_CASE_MATCH, /* case/in pattern matching expression */ + NODE_IN, /* in-clause node */ + NODE_PAT_VALUE, /* value pattern (literal, constant) */ + NODE_PAT_VAR, /* variable pattern */ + NODE_PAT_PIN, /* pin operator ^var */ + NODE_PAT_AS, /* as pattern (pattern => var) */ + NODE_PAT_ALT, /* alternative pattern (pat1 | pat2) */ + NODE_PAT_ARRAY, /* array pattern [a, b, *rest] */ + NODE_PAT_FIND, /* find pattern [*pre, elem, *post] */ + NODE_PAT_HASH, /* hash pattern {a:, b: x} */ + NODE_MATCH_PAT, /* one-line pattern matching (expr in pat / expr => pat) */ NODE_LAST }; +#define STR_FUNC_PARSING 0x01 +#define STR_FUNC_EXPAND 0x02 +#define STR_FUNC_REGEXP 0x04 +#define STR_FUNC_WORD 0x08 +#define STR_FUNC_SYMBOL 0x10 +#define STR_FUNC_ARRAY 0x20 +#define STR_FUNC_HEREDOC 0x40 +#define STR_FUNC_XQUOTE 0x80 + +enum mrb_string_type { + str_not_parsing = (0), + str_squote = (STR_FUNC_PARSING), + str_dquote = (STR_FUNC_PARSING|STR_FUNC_EXPAND), + str_regexp = (STR_FUNC_PARSING|STR_FUNC_REGEXP|STR_FUNC_EXPAND), + str_sword = (STR_FUNC_PARSING|STR_FUNC_WORD|STR_FUNC_ARRAY), + str_dword = (STR_FUNC_PARSING|STR_FUNC_WORD|STR_FUNC_ARRAY|STR_FUNC_EXPAND), + str_ssym = (STR_FUNC_PARSING|STR_FUNC_SYMBOL), + str_ssymbols = (STR_FUNC_PARSING|STR_FUNC_SYMBOL|STR_FUNC_ARRAY), + str_dsymbols = (STR_FUNC_PARSING|STR_FUNC_SYMBOL|STR_FUNC_ARRAY|STR_FUNC_EXPAND), + str_heredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC), + str_xquote = (STR_FUNC_PARSING|STR_FUNC_XQUOTE|STR_FUNC_EXPAND), +}; + +/* heredoc structure */ +struct mrb_parser_heredoc_info { + mrb_bool allow_indent:1; + mrb_bool remove_indent:1; + mrb_bool line_head:1; + size_t indent; + mrb_ast_node *indented; + enum mrb_string_type type; + const char *term; + int term_len; + mrb_ast_node *doc; +}; + +/* AST node structures - Head-only location optimization */ + +/* Structure nodes - only car/cdr, ignores location fields */ +struct mrb_ast_node { + struct mrb_ast_node *car, *cdr; + /* No location fields - saves 4 bytes per structure node */ +}; + +/* Variable-sized AST nodes */ + +/* Variable node header - common to all variable-sized nodes */ +struct mrb_ast_var_header { + uint16_t lineno; /* Line number information */ + uint16_t filename_index; /* File index information */ + uint8_t node_type; /* NODE_INT, NODE_SYM, NODE_STR, etc. */ + /* Total: 6 bytes header for all variable nodes */ +}; + +/* Literal value nodes */ + +/* Variable-sized symbol node */ +struct mrb_ast_sym_node { + struct mrb_ast_var_header header; /* 8 bytes */ + mrb_sym symbol; /* Direct symbol reference */ + /* Total: 12-16 bytes vs previous 20+ bytes + indirection */ +}; + +/* Variable-sized string node with cons list */ +struct mrb_ast_str_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *list; +}; + +/* Variable-sized integer node */ +struct mrb_ast_int_node { + struct mrb_ast_var_header header; /* 8 bytes */ + int32_t value; /* Direct 32-bit integer storage */ +}; + +/* Variable-sized big integer node */ +struct mrb_ast_bigint_node { + struct mrb_ast_var_header header; /* 8 bytes */ + char *string; /* String representation of big number */ + int base; /* Number base (8, 10, 16) */ +}; + +/* Variable-sized node for variables (lvar, ivar, etc.) */ +struct mrb_ast_var_node { + struct mrb_ast_var_header header; + mrb_sym symbol; +}; + +/* Expression and operation nodes */ + +/* Variable-sized call node with inline argument storage */ +struct mrb_ast_call_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *receiver; /* Receiver object */ + mrb_sym method_name; /* Method name symbol */ + uint8_t safe_call:1; /* Safe navigation (&.) */ + struct mrb_ast_node *args; /* Arguments Information */ +}; + +/* Variable-sized array node */ +struct mrb_ast_array_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *elements; /* Elements list (cons list) */ +}; + +/* Variable-sized hash node */ +struct mrb_ast_hash_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *pairs; /* Key-value pairs (cons list) */ +}; + +/* Control flow and definition nodes */ + +/* Variable-sized method definition node */ +struct mrb_ast_def_node { + struct mrb_ast_var_header header; /* 8 bytes */ + mrb_sym name; /* Method name */ + struct mrb_ast_args *args; /* Method arguments */ + struct mrb_ast_node *body; /* Method body */ + struct mrb_ast_node *locals; /* Local Variables */ +} ; + +/* Variable-sized singleton method definition node */ +struct mrb_ast_sdef_node { + struct mrb_ast_var_header header; /* 8 bytes */ + mrb_sym name; /* Method name */ + struct mrb_ast_args *args; /* Method arguments */ + struct mrb_ast_node *body; /* Method body */ + struct mrb_ast_node *locals; /* Local Variables */ + struct mrb_ast_node *obj; /* receiver */ +}; + +/* variable-sized class definition node */ +struct mrb_ast_class_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *name; /* Class name (NODE_CONST or NODE_COLON2) */ + struct mrb_ast_node *superclass; /* Superclass (can be NULL) */ + struct mrb_ast_node *body; /* Class body */ +}; + +/* Variable-sized module definition node */ +struct mrb_ast_module_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *name; /* Module name (NODE_CONST or NODE_COLON2) */ + struct mrb_ast_node *body; /* Module body */ +}; + +/* Variable-sized singleton class definition node */ +struct mrb_ast_sclass_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *obj; /* Object for singleton class */ + struct mrb_ast_node *body; /* Singleton class body */ +}; + +/* Variable-sized if node */ +struct mrb_ast_if_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *condition; /* Condition expression */ + struct mrb_ast_node *then_body; /* Then branch */ + struct mrb_ast_node *else_body; /* Else branch (can be NULL) */ +}; + +/* Variable-sized while node */ +struct mrb_ast_while_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *condition; /* Loop condition */ + struct mrb_ast_node *body; /* Loop body */ +}; + +/* Variable-sized until node */ +struct mrb_ast_until_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *condition; /* Loop condition */ + struct mrb_ast_node *body; /* Loop body */ +}; + +/* Variable-sized case node */ +struct mrb_ast_case_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *value; /* Case value expression */ + struct mrb_ast_node *body; /* When/else clauses (cons list) */ +}; + +/* Pattern matching case node (case/in) */ +struct mrb_ast_case_match_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *value; /* Case value expression */ + struct mrb_ast_node *in_clauses; /* In clause list (cons list) */ +}; + +/* In clause node */ +struct mrb_ast_in_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *pattern; /* Pattern to match */ + struct mrb_ast_node *guard; /* Guard expression (optional) */ + struct mrb_ast_node *body; /* Body to execute on match */ + mrb_bool guard_is_unless; /* TRUE if 'unless', FALSE if 'if' */ +}; + +/* Value pattern node (literal, constant) */ +struct mrb_ast_pat_value_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *value; /* Literal or constant node */ +}; + +/* Variable pattern node */ +struct mrb_ast_pat_var_node { + struct mrb_ast_var_header header; /* 8 bytes */ + mrb_sym name; /* Variable name (0 for wildcard _) */ +}; + +/* Pin pattern node (^var) */ +struct mrb_ast_pat_pin_node { + struct mrb_ast_var_header header; /* 8 bytes */ + mrb_sym name; /* Variable name to pin */ +}; + +/* As pattern node (pattern => var) */ +struct mrb_ast_pat_as_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *pattern; /* Pattern to match */ + mrb_sym name; /* Variable to bind */ +}; + +/* Alternative pattern node (pat1 | pat2) */ +struct mrb_ast_pat_alt_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *left; /* Left pattern */ + struct mrb_ast_node *right; /* Right pattern */ +}; + +/* Array pattern node [a, b, *rest] */ +struct mrb_ast_pat_array_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *pre; /* Pre-rest patterns (cons list) */ + struct mrb_ast_node *rest; /* Rest pattern (NULL if none, -1 if anonymous) */ + struct mrb_ast_node *post; /* Post-rest patterns (cons list) */ +}; + +/* Find pattern node [*pre, elem, *post] - searches for elem anywhere in array */ +struct mrb_ast_pat_find_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *pre; /* Pre rest pattern (NULL or -1 if anonymous) */ + struct mrb_ast_node *elems; /* Middle patterns to find (cons list) */ + struct mrb_ast_node *post; /* Post rest pattern (NULL or -1 if anonymous) */ +}; + +/* Hash pattern node {a:, b: x} */ +struct mrb_ast_pat_hash_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *pairs; /* Key-pattern pairs (cons list) */ + struct mrb_ast_node *rest; /* Rest pattern (NULL if none, -1 if **nil) */ +}; + +/* One-line pattern matching: expr in pattern / expr => pattern */ +struct mrb_ast_match_pat_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *value; /* Expression to match */ + struct mrb_ast_node *pattern; /* Pattern */ + mrb_bool raise_on_fail; /* TRUE for =>, FALSE for in */ +}; + +/* Variable-sized for node */ +struct mrb_ast_for_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *var; /* Loop variable */ + struct mrb_ast_node *iterable; /* Object to iterate over */ + struct mrb_ast_node *body; /* Loop body */ +}; + +/* Assignment Node Structures */ + +/* Variable-sized assignment node */ +struct mrb_ast_asgn_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *lhs; /* Left-hand side (target) */ + struct mrb_ast_node *rhs; /* Right-hand side (value) */ +}; + +/* Variable-sized multiple assignment node */ +struct mrb_ast_masgn_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *pre; /* Pre-splat variables (cons list) */ + struct mrb_ast_node *rest; /* Splat variable (single node or -1) */ + struct mrb_ast_node *post; /* Post-splat variables (cons list) */ + struct mrb_ast_node *rhs; /* Right-hand side (values) */ +}; + +/* Variable-sized operator assignment node */ +struct mrb_ast_op_asgn_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *lhs; /* Left-hand side (target) */ + mrb_sym op; /* Assignment operator (e.g., +=, -=, etc.) */ + struct mrb_ast_node *rhs; /* Right-hand side (value) */ +}; + +/* Expression Node Structures */ + +/* Variable-sized AND node */ +struct mrb_ast_and_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *left; /* Left operand */ + struct mrb_ast_node *right; /* Right operand */ +}; + +/* Variable-sized OR node */ +struct mrb_ast_or_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *left; /* Left operand */ + struct mrb_ast_node *right; /* Right operand */ +}; + +/* Variable-sized RETURN node */ +struct mrb_ast_return_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *args; /* Return arguments (can be NULL) */ +}; + +/* Variable-sized YIELD node */ +struct mrb_ast_yield_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *args; /* Yield arguments (can be NULL) */ +}; + +/* Variable-sized SUPER node */ +struct mrb_ast_super_node { + struct mrb_ast_var_header header; /* 8 bytes */ + struct mrb_ast_node *args; /* Super arguments (can be NULL) */ +}; + +#define NODE_TYPE(n) ((enum node_type)(((struct mrb_ast_var_header*)(n))->node_type)) + +/* Type-safe casting macros */ +#define var_header(n) ((struct mrb_ast_var_header*)(n)) + +/* Common type casting macros used by parser and codegen */ +#define node_to_sym(x) ((mrb_sym)(intptr_t)(x)) +#define sym_to_node(x) ((node*)(intptr_t)(x)) +#define int_to_node(x) ((node*)(intptr_t)(x)) +#define node_to_int(x) ((int)(intptr_t)(x)) +#define node_to_type(x) ((enum node_type)(intptr_t)(x)) +#define node_to_char(x) ((char)(intptr_t)(x)) + +/* Literal value node casting macros */ +#define sym_node(n) ((struct mrb_ast_sym_node*)(n)) +#define str_node(n) ((struct mrb_ast_str_node*)(n)) +#define int_node(n) ((struct mrb_ast_int_node*)(n)) +#define bigint_node(n) ((struct mrb_ast_bigint_node*)(n)) +#define var_node(n) ((struct mrb_ast_var_node*)(n)) + +/* Expression and operation node casting macros */ +#define call_node(n) ((struct mrb_ast_call_node*)(n)) +#define array_node(n) ((struct mrb_ast_array_node*)(n)) +#define hash_node(n) ((struct mrb_ast_hash_node*)(n)) + +/* Control flow and definition node casting macros */ +#define def_node(n) ((struct mrb_ast_def_node*)(n)) +#define class_node(n) ((struct mrb_ast_class_node*)(n)) +#define module_node(n) ((struct mrb_ast_module_node*)(n)) +#define sclass_node(n) ((struct mrb_ast_sclass_node*)(n)) +#define if_node(n) ((struct mrb_ast_if_node*)(n)) +#define while_node(n) ((struct mrb_ast_while_node*)(n)) +#define until_node(n) ((struct mrb_ast_until_node*)(n)) +#define case_node(n) ((struct mrb_ast_case_node*)(n)) +#define case_match_node(n) ((struct mrb_ast_case_match_node*)(n)) +#define in_node(n) ((struct mrb_ast_in_node*)(n)) +#define pat_value_node(n) ((struct mrb_ast_pat_value_node*)(n)) +#define pat_var_node(n) ((struct mrb_ast_pat_var_node*)(n)) +#define pat_pin_node(n) ((struct mrb_ast_pat_pin_node*)(n)) +#define pat_as_node(n) ((struct mrb_ast_pat_as_node*)(n)) +#define pat_alt_node(n) ((struct mrb_ast_pat_alt_node*)(n)) +#define pat_array_node(n) ((struct mrb_ast_pat_array_node*)(n)) +#define pat_find_node(n) ((struct mrb_ast_pat_find_node*)(n)) +#define pat_hash_node(n) ((struct mrb_ast_pat_hash_node*)(n)) +#define match_pat_node(n) ((struct mrb_ast_match_pat_node*)(n)) +#define for_node(n) ((struct mrb_ast_for_node*)(n)) +#define asgn_node(n) ((struct mrb_ast_asgn_node*)(n)) +#define masgn_node(n) ((struct mrb_ast_masgn_node*)(n)) +#define op_asgn_node(n) ((struct mrb_ast_op_asgn_node*)(n)) +#define and_node(n) ((struct mrb_ast_and_node*)(n)) +#define or_node(n) ((struct mrb_ast_or_node*)(n)) +#define return_node(n) ((struct mrb_ast_return_node*)(n)) +#define yield_node(n) ((struct mrb_ast_yield_node*)(n)) +#define super_node(n) ((struct mrb_ast_super_node*)(n)) + +/* Variable-sized literal node structures */ + +struct mrb_ast_dot2_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *left; + struct mrb_ast_node *right; +}; + +struct mrb_ast_dot3_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *left; + struct mrb_ast_node *right; +}; + +struct mrb_ast_float_node { + struct mrb_ast_var_header header; + const char *value; +}; + +/* Literal node casting macros */ +#define dot2_node(n) ((struct mrb_ast_dot2_node*)(n)) +#define dot3_node(n) ((struct mrb_ast_dot3_node*)(n)) +#define float_node(n) ((struct mrb_ast_float_node*)(n)) + +/* Variable-sized simple node structures */ +struct mrb_ast_self_node { + struct mrb_ast_var_header header; +}; + +struct mrb_ast_nil_node { + struct mrb_ast_var_header header; +}; + +struct mrb_ast_true_node { + struct mrb_ast_var_header header; +}; + +struct mrb_ast_false_node { + struct mrb_ast_var_header header; +}; + +struct mrb_ast_const_node { + struct mrb_ast_var_header header; + mrb_sym symbol; +}; + +/* Simple node casting macros */ +#define self_node(n) ((struct mrb_ast_self_node*)(n)) +#define nil_node(n) ((struct mrb_ast_nil_node*)(n)) +#define true_node(n) ((struct mrb_ast_true_node*)(n)) +#define false_node(n) ((struct mrb_ast_false_node*)(n)) +#define const_node(n) ((struct mrb_ast_const_node*)(n)) + +/* Variable-sized advanced node structures */ +struct mrb_ast_rescue_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *body; + struct mrb_ast_node *rescue_clauses; + struct mrb_ast_node *else_clause; +}; + +struct mrb_ast_block_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *locals; + struct mrb_ast_args *args; + struct mrb_ast_node *body; +}; + +/* Unified argument structure - eliminates args_tail_node allocation */ +struct mrb_ast_args { + /* Core argument lists (parser builds these naturally) */ + struct mrb_ast_node *mandatory_args; /* Cons list of mandatory arguments */ + struct mrb_ast_node *optional_args; /* Cons list of optional arguments */ + struct mrb_ast_node *post_mandatory_args; /* Cons list of post-mandatory arguments */ + struct mrb_ast_node *keyword_args; /* Cons list of keyword arguments */ + + /* Special arguments (directly embedded) */ + mrb_sym rest_arg; /* Rest argument symbol (0 = none) */ + mrb_sym kwrest_arg; /* Keyword rest argument (0 = none) */ + mrb_sym block_arg; /* Block argument symbol (0 = none) */ +}; + +/* Call arguments structure - replaces cons-based new_callargs */ +struct mrb_ast_callargs { + struct mrb_ast_node *regular_args; /* Cons list of regular arguments (preserves splat compatibility) */ + struct mrb_ast_node *keyword_args; /* Keyword arguments hash node */ + struct mrb_ast_node *block_arg; /* Block argument node */ +}; + +/* Advanced node casting macros */ +#define rescue_node(n) ((struct mrb_ast_rescue_node*)(n)) +#define block_node(n) ((struct mrb_ast_block_node*)(n)) +#define callargs_node(n) ((struct mrb_ast_callargs*)(n)) + +/* Control flow statement nodes */ +struct mrb_ast_break_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *value; +}; + +struct mrb_ast_next_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *value; +}; + +struct mrb_ast_redo_node { + struct mrb_ast_var_header header; +}; + +struct mrb_ast_retry_node { + struct mrb_ast_var_header header; +}; + +#define break_node(n) ((struct mrb_ast_break_node*)(n)) +#define next_node(n) ((struct mrb_ast_next_node*)(n)) +#define redo_node(n) ((struct mrb_ast_redo_node*)(n)) +#define retry_node(n) ((struct mrb_ast_retry_node*)(n)) + +/* String and regex variant nodes */ +struct mrb_ast_xstr_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *list; +}; + +struct mrb_ast_regx_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *list; + const char *flags; + const char *encoding; +}; + +struct mrb_ast_heredoc_node { + struct mrb_ast_var_header header; + struct mrb_parser_heredoc_info info; +}; + +#define xstr_node(n) ((struct mrb_ast_xstr_node*)(n)) +#define regx_node(n) ((struct mrb_ast_regx_node*)(n)) +#define heredoc_node(n) ((struct mrb_ast_heredoc_node*)(n)) +#define dsym_node(n) ((struct mrb_ast_str_node*)(n)) + +/* Reference and special variable nodes */ +struct mrb_ast_nth_ref_node { + struct mrb_ast_var_header header; + int nth; +}; + +struct mrb_ast_back_ref_node { + struct mrb_ast_var_header header; + int type; +}; + +struct mrb_ast_nvar_node { + struct mrb_ast_var_header header; + int num; +}; + +struct mrb_ast_dvar_node { + struct mrb_ast_var_header header; + mrb_sym name; +}; + +#define nth_ref_node(n) ((struct mrb_ast_nth_ref_node*)(n)) +#define back_ref_node(n) ((struct mrb_ast_back_ref_node*)(n)) +#define nvar_node(n) ((struct mrb_ast_nvar_node*)(n)) +#define dvar_node(n) ((struct mrb_ast_dvar_node*)(n)) + +/* Unary operator and scope resolution nodes */ +struct mrb_ast_not_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *operand; +}; + +struct mrb_ast_negate_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *operand; +}; + +struct mrb_ast_colon2_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *base; + mrb_sym name; +}; + +struct mrb_ast_colon3_node { + struct mrb_ast_var_header header; + mrb_sym name; +}; + +struct mrb_ast_defined_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *expr; +}; + +#define not_node(n) ((struct mrb_ast_not_node*)(n)) +#define negate_node(n) ((struct mrb_ast_negate_node*)(n)) +#define colon2_node(n) ((struct mrb_ast_colon2_node*)(n)) +#define colon3_node(n) ((struct mrb_ast_colon3_node*)(n)) +#define defined_node(n) ((struct mrb_ast_defined_node*)(n)) + +/* Lambda nodes */ +struct mrb_ast_lambda_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *locals; + struct mrb_ast_args *args; + struct mrb_ast_node *body; +}; + +/* Array literal variant nodes */ +struct mrb_ast_zarray_node { + struct mrb_ast_var_header header; +}; + +struct mrb_ast_words_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *args; +}; + +struct mrb_ast_symbols_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *args; +}; + +/* Argument and parameter nodes */ + +struct mrb_ast_splat_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *value; +}; + +struct mrb_ast_block_arg_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *value; +}; + +/* Structural and block nodes */ + +struct mrb_ast_scope_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *locals; + struct mrb_ast_node *body; +}; + +struct mrb_ast_begin_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *body; +}; + +struct mrb_ast_ensure_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *body; + struct mrb_ast_node *ensure_clause; +}; + +struct mrb_ast_stmts_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *stmts; /* Cons-list of statements */ +}; + +struct mrb_ast_iter_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *vars; + struct mrb_ast_node *body; +}; + +/* Declaration nodes */ + +struct mrb_ast_alias_node { + struct mrb_ast_var_header header; + mrb_sym new_name; + mrb_sym old_name; +}; + +struct mrb_ast_undef_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *syms; +}; + +struct mrb_ast_postexe_node { + struct mrb_ast_var_header header; + struct mrb_ast_node *body; +}; + +#define zsuper_node(n) ((struct mrb_ast_super_node*)(n)) +#define lambda_node(n) ((struct mrb_ast_lambda_node*)(n)) +#define zarray_node(n) ((struct mrb_ast_zarray_node*)(n)) +#define words_node(n) ((struct mrb_ast_words_node*)(n)) +#define symbols_node(n) ((struct mrb_ast_symbols_node*)(n)) +#define splat_node(n) ((struct mrb_ast_splat_node*)(n)) +#define block_arg_node(n) ((struct mrb_ast_block_arg_node*)(n)) +#define scope_node(n) ((struct mrb_ast_scope_node*)(n)) +#define begin_node(n) ((struct mrb_ast_begin_node*)(n)) +#define ensure_node(n) ((struct mrb_ast_ensure_node*)(n)) +#define stmts_node(n) ((struct mrb_ast_stmts_node*)(n)) +#define iter_node(n) ((struct mrb_ast_iter_node*)(n)) +#define alias_node(n) ((struct mrb_ast_alias_node*)(n)) +#define undef_node(n) ((struct mrb_ast_undef_node*)(n)) +#define postexe_node(n) ((struct mrb_ast_postexe_node*)(n)) +#define sdef_node(n) ((struct mrb_ast_sdef_node*)(n)) + #endif /* MRUBY_COMPILER_NODE_H */ diff --git a/mrbgems/mruby-compiler/core/parse.y b/mrbgems/mruby-compiler/core/parse.y index 55de90ed88..a10a6df9cc 100644 --- a/mrbgems/mruby-compiler/core/parse.y +++ b/mrbgems/mruby-compiler/core/parse.y @@ -12,7 +12,8 @@ #define YYSTACK_USE_ALLOCA 1 #include -#include +#include +#include #include #include #include @@ -22,22 +23,34 @@ #include #include #include -#include #include "node.h" #define YYLEX_PARAM p +#define mrbc_malloc(s) mrb_basic_alloc_func(NULL,(s)) +#define mrbc_realloc(p,s) mrb_basic_alloc_func((p),(s)) +#define mrbc_free(p) mrb_basic_alloc_func((p),0) + typedef mrb_ast_node node; typedef struct mrb_parser_state parser_state; typedef struct mrb_parser_heredoc_info parser_heredoc_info; static int yyparse(parser_state *p); -static int yylex(void *lval, parser_state *p); -static void yyerror(parser_state *p, const char *s); +static int yylex(void *lval, void *lp, parser_state *p); +static void yyerror(void *lp, parser_state *p, const char *s); static void yywarning(parser_state *p, const char *s); static void backref_error(parser_state *p, node *n); static void void_expr_error(parser_state *p, node *n); static void tokadd(parser_state *p, int32_t c); +static const char* tok(parser_state *p); +static int toklen(parser_state *p); + +/* Forward declarations for variable-sized simple node functions */ + +/* Forward declarations for variable-sized advanced node functions */ + +/* Helper function to check node type for both traditional and variable-sized nodes */ +static mrb_bool node_type_p(node *n, enum node_type type); #define identchar(c) (ISALNUM(c) || (c) == '_' || !ISASCII(c)) @@ -58,19 +71,7 @@ typedef unsigned int stack_type; #define CMDARG_LEXPOP() BITSTACK_LEXPOP(p->cmdarg_stack) #define CMDARG_P() BITSTACK_SET_P(p->cmdarg_stack) -#define SET_LINENO(c,n) ((c)->lineno = (n)) -#define NODE_LINENO(c,n) do {\ - if (n) {\ - (c)->filename_index = (n)->filename_index;\ - (c)->lineno = (n)->lineno;\ - }\ -} while (0) - -#define sym(x) ((mrb_sym)(intptr_t)(x)) -#define nsym(x) ((node*)(intptr_t)(x)) -#define nint(x) ((node*)(intptr_t)(x)) -#define intn(x) ((int)(intptr_t)(x)) -#define typen(x) ((enum node_type)(intptr_t)(x)) +#define SET_LINENO(c,n) (((struct mrb_ast_var_header*)(c))->lineno = (n)) #define NUM_SUFFIX_R (1<<0) #define NUM_SUFFIX_I (1<<1) @@ -89,7 +90,7 @@ intern_gen(parser_state *p, const char *s, size_t len) } #define intern(s,len) intern_gen(p,(s),(len)) -#define intern_op(op) MRB_OPSYM_2(p->mrb, op) +#define intern_op(op) MRB_OPSYM(op) static mrb_sym intern_numparam_gen(parser_state *p, int num) @@ -111,7 +112,7 @@ cons_free_gen(parser_state *p, node *cons) static void* parser_palloc(parser_state *p, size_t size) { - void *m = mrb_pool_alloc(p->pool, size); + void *m = mempool_alloc(p->pool, size); if (!m) { MRB_THROW(p->mrb->jmp); @@ -124,27 +125,54 @@ parser_palloc(parser_state *p, size_t size) static node* cons_gen(parser_state *p, node *car, node *cdr) { - node *c; + struct mrb_ast_node *c; + /* Try to reuse from free list first - only for 16-byte nodes */ if (p->cells) { - c = p->cells; + c = (struct mrb_ast_node*)p->cells; p->cells = p->cells->cdr; } else { - c = (node*)parser_palloc(p, sizeof(mrb_ast_node)); + c = (struct mrb_ast_node*)parser_palloc(p, sizeof(struct mrb_ast_node)); } - c->car = car; c->cdr = cdr; - c->lineno = p->lineno; - c->filename_index = p->current_filename_index; - /* beginning of next partial file; need to point the previous file */ + /* Don't initialize location fields for structure nodes - saves CPU */ + return (node*)c; +} + +/* Head-only location optimization: separate functions for head vs structure nodes */ +#define cons(a,b) cons_gen(p,(a),(b)) /* Structure nodes - no location */ +/* Initialize variable node header */ +static void +init_var_header(struct mrb_ast_var_header *header, parser_state *p, enum node_type type) +{ + header->lineno = p->lineno; + header->filename_index = p->current_filename_index; + header->node_type = (uint8_t)type; + + /* Handle file boundary edge case: this node is reduced from a token that + was buffered by bison lookahead before partial_hook switched the file, + so attribute it to the previous file at its last known lineno rather + than the new file at lineno=0. */ if (p->lineno == 0 && p->current_filename_index > 0) { - c->filename_index-- ; + header->filename_index--; + header->lineno = p->prev_file_lineno; } - return c; } -#define cons(a,b) cons_gen(p,(a),(b)) + +/* Combined allocate + init header helper */ +static inline void* +new_node(parser_state *p, size_t size, enum node_type type) +{ + void *n = parser_palloc(p, size); + init_var_header((struct mrb_ast_var_header*)n, p, type); + return n; +} + +/* Type-safe macro wrapper for node allocation */ +#define NEW_NODE(type_name, node_type) \ + (struct mrb_ast_##type_name##_node*)new_node(p, sizeof(struct mrb_ast_##type_name##_node), node_type) static node* list1_gen(parser_state *p, node *a) @@ -156,38 +184,17 @@ list1_gen(parser_state *p, node *a) static node* list2_gen(parser_state *p, node *a, node *b) { - return cons(a, cons(b,0)); + return cons(a, cons(b, 0)); } #define list2(a,b) list2_gen(p, (a),(b)) static node* list3_gen(parser_state *p, node *a, node *b, node *c) { - return cons(a, cons(b, cons(c,0))); + return cons(a, cons(b, cons(c, 0))); } #define list3(a,b,c) list3_gen(p, (a),(b),(c)) -static node* -list4_gen(parser_state *p, node *a, node *b, node *c, node *d) -{ - return cons(a, cons(b, cons(c, cons(d, 0)))); -} -#define list4(a,b,c,d) list4_gen(p, (a),(b),(c),(d)) - -static node* -list5_gen(parser_state *p, node *a, node *b, node *c, node *d, node *e) -{ - return cons(a, cons(b, cons(c, cons(d, cons(e, 0))))); -} -#define list5(a,b,c,d,e) list5_gen(p, (a),(b),(c),(d),(e)) - -static node* -list6_gen(parser_state *p, node *a, node *b, node *c, node *d, node *e, node *f) -{ - return cons(a, cons(b, cons(c, cons(d, cons(e, cons(f, 0)))))); -} -#define list6(a,b,c,d,e,f) list6_gen(p, (a),(b),(c),(d),(e),(f)) - static node* append_gen(parser_state *p, node *a, node *b) { @@ -284,7 +291,7 @@ local_var_p(parser_state *p, mrb_sym sym) while (l) { node *n = l->car; while (n) { - if (sym(n->car) == sym) return TRUE; + if (node_to_sym(n->car) == sym) return TRUE; n = n->cdr; } l = l->cdr; @@ -313,17 +320,17 @@ local_add_f(parser_state *p, mrb_sym sym) if (p->locals) { node *n = p->locals->car; while (n) { - if (sym(n->car) == sym) { + if (node_to_sym(n->car) == sym) { mrb_int len; const char* name = mrb_sym_name_len(p->mrb, sym, &len); if (len > 0 && name[0] != '_') { - yyerror(p, "duplicated argument name"); + yyerror(NULL, p, "duplicated argument name"); return; } } n = n->cdr; } - p->locals->car = push(p->locals->car, nsym(sym)); + p->locals->car = push(p->locals->car, sym_to_node(sym)); } } @@ -335,12 +342,8 @@ local_add(parser_state *p, mrb_sym sym) } } -static void -local_add_blk(parser_state *p, mrb_sym blk) -{ - /* allocate register for block */ - local_add_f(p, blk ? blk : 0); -} +/* allocate register for block */ +#define local_add_blk(p) local_add_f(p, 0) static void local_add_kw(parser_state *p, mrb_sym kwd) @@ -355,16 +358,47 @@ locals_node(parser_state *p) return p->locals ? p->locals->car : NULL; } +/* Helper function to check node type for both traditional and variable-sized nodes */ +static mrb_bool +node_type_p(node *n, enum node_type type) +{ + if (!n) return FALSE; + + /* Check if this is a variable-sized node */ + struct mrb_ast_var_header *header = (struct mrb_ast_var_header*)n; + return ((enum node_type)header->node_type == type); +} + +/* Helper functions for variable-sized node detection */ +static enum node_type +node_type(node *n) +{ + if (!n) return (enum node_type)0; + + /* Try to interpret as variable-sized node */ + struct mrb_ast_var_header *header = (struct mrb_ast_var_header*)n; + enum node_type type = (enum node_type)header->node_type; + + /* Validate that the node type is within valid range for variable-sized nodes */ + if (type >= NODE_SCOPE && type < NODE_LAST) { + return type; + } + + /* If node type is invalid, this is likely a cons-list node */ + /* Return a special sentinel value to indicate cons-list fallback */ + return NODE_LAST; /* Use NODE_LAST as sentinel for cons-list nodes */ +} + static void nvars_nest(parser_state *p) { - p->nvars = cons(nint(0), p->nvars); + p->nvars = cons(int_to_node(0), p->nvars); } static void nvars_block(parser_state *p) { - p->nvars = cons(nint(-2), p->nvars); + p->nvars = cons(int_to_node(-2), p->nvars); } static void @@ -373,30 +407,56 @@ nvars_unnest(parser_state *p) p->nvars = p->nvars->cdr; } -/* (:scope (vars..) (prog...)) */ +/* struct: scope_node(locals, body) */ static node* new_scope(parser_state *p, node *body) { - return cons((node*)NODE_SCOPE, cons(locals_node(p), body)); + struct mrb_ast_scope_node *scope_node = NEW_NODE(scope, NODE_SCOPE); + scope_node->locals = locals_node(p); + scope_node->body = body; + return (node*)scope_node; +} + +/* struct: stmts_node(stmts) - uses cons list */ +static node* +new_stmts(parser_state *p, node *body) +{ + struct mrb_ast_stmts_node *n = NEW_NODE(stmts, NODE_STMTS); + n->stmts = body ? list1(body) : 0; /* Wrap single statement in cons-list */ + + return (node*)n; +} + +/* Helper: push statement to stmts node */ +static node* +stmts_push(parser_state *p, node *stmts, node *stmt) +{ + struct mrb_ast_stmts_node *n = stmts_node(stmts); + n->stmts = push(n->stmts, stmt); + return stmts; } -/* (:begin prog...) */ +/* struct: begin_node(body) */ static node* new_begin(parser_state *p, node *body) { - if (body) { - return list2((node*)NODE_BEGIN, body); - } - return cons((node*)NODE_BEGIN, 0); + struct mrb_ast_begin_node *begin_node = NEW_NODE(begin, NODE_BEGIN); + begin_node->body = body; + return (node*)begin_node; } #define newline_node(n) (n) -/* (:rescue body rescue else) */ +/* struct: rescue_node(body, rescue_clauses, else_clause) */ static node* new_rescue(parser_state *p, node *body, node *resq, node *els) { - return list4((node*)NODE_RESCUE, body, resq, els); + struct mrb_ast_rescue_node *n = NEW_NODE(rescue, NODE_RESCUE); + n->body = body; + n->rescue_clauses = resq; + n->else_clause = els; + + return (node*)n; } static node* @@ -405,435 +465,695 @@ new_mod_rescue(parser_state *p, node *body, node *resq) return new_rescue(p, body, list1(list3(0, 0, resq)), 0); } -/* (:ensure body ensure) */ +/* struct: ensure_node(body, ensure_clause) */ static node* new_ensure(parser_state *p, node *a, node *b) { - return cons((node*)NODE_ENSURE, cons(a, cons(0, b))); + struct mrb_ast_ensure_node *ensure_node = NEW_NODE(ensure, NODE_ENSURE); + ensure_node->body = a; + ensure_node->ensure_clause = b; + return (node*)ensure_node; } -/* (:nil) */ +/* struct: nil_node() */ static node* new_nil(parser_state *p) { - return list1((node*)NODE_NIL); + struct mrb_ast_nil_node *n = NEW_NODE(nil, NODE_NIL); + + return (node*)n; } -/* (:true) */ +/* struct: true_node() */ static node* new_true(parser_state *p) { - return list1((node*)NODE_TRUE); + struct mrb_ast_true_node *n = NEW_NODE(true, NODE_TRUE); + + return (node*)n; } -/* (:false) */ +/* struct: false_node() */ static node* new_false(parser_state *p) { - return list1((node*)NODE_FALSE); + struct mrb_ast_false_node *n = NEW_NODE(false, NODE_FALSE); + + return (node*)n; } -/* (:alias new old) */ +/* struct: alias_node(new_name, old_name) */ static node* new_alias(parser_state *p, mrb_sym a, mrb_sym b) { - return cons((node*)NODE_ALIAS, cons(nsym(a), nsym(b))); + struct mrb_ast_alias_node *alias_node = NEW_NODE(alias, NODE_ALIAS); + alias_node->new_name = a; + alias_node->old_name = b; + return (node*)alias_node; } -/* (:if cond then else) */ +/* struct: if_node(cond, then_body, else_body) */ static node* -new_if(parser_state *p, node *a, node *b, node *c) +new_if(parser_state *p, node *condition, node *then_body, node *else_body) { - void_expr_error(p, a); - return list4((node*)NODE_IF, a, b, c); + void_expr_error(p, condition); + + struct mrb_ast_if_node *n = NEW_NODE(if, NODE_IF); + n->condition = condition; + n->then_body = then_body; + n->else_body = else_body; + + return (node*)n; } -/* (:unless cond then else) */ +/* struct: while_node(cond, body) */ static node* -new_unless(parser_state *p, node *a, node *b, node *c) +new_while(parser_state *p, node *condition, node *body) { - void_expr_error(p, a); - return list4((node*)NODE_IF, a, c, b); + void_expr_error(p, condition); + + struct mrb_ast_while_node *n = NEW_NODE(while, NODE_WHILE); + n->condition = condition; + n->body = body; + + return (node*)n; } -/* (:while cond body) */ +/* struct: until_node(cond, body) */ static node* -new_while(parser_state *p, node *a, node *b) +new_until(parser_state *p, node *condition, node *body) { - void_expr_error(p, a); - return cons((node*)NODE_WHILE, cons(a, b)); + void_expr_error(p, condition); + + struct mrb_ast_until_node *n = NEW_NODE(until, NODE_UNTIL); + n->condition = condition; + n->body = body; + + return (node*)n; } -/* (:until cond body) */ +/* struct: while_node(cond, body) */ static node* -new_until(parser_state *p, node *a, node *b) +new_while_mod(parser_state *p, node *condition, node *body) { - void_expr_error(p, a); - return cons((node*)NODE_UNTIL, cons(a, b)); + node *while_node = new_while(p, condition, body); + struct mrb_ast_while_node *n = (struct mrb_ast_while_node*)while_node; + n->header.node_type = NODE_WHILE_MOD; + return while_node; +} + +/* struct: until_node(cond, body) */ +static node* +new_until_mod(parser_state *p, node *a, node *b) +{ + node *until_node = new_until(p, a, b); + struct mrb_ast_until_node *n = (struct mrb_ast_until_node*)until_node; + n->header.node_type = NODE_UNTIL_MOD; + return until_node; } -/* (:for var obj body) */ + +/* struct: for_node(var, obj, body) */ static node* new_for(parser_state *p, node *v, node *o, node *b) { void_expr_error(p, o); - return list4((node*)NODE_FOR, v, o, b); + + struct mrb_ast_for_node *n = NEW_NODE(for, NODE_FOR); + n->var = v; + n->iterable = o; + n->body = b; + + return (node*)n; } -/* (:case a ((when ...) body) ((when...) body)) */ +/* struct: case_node(expr, when_clauses) - uses cons list */ static node* new_case(parser_state *p, node *a, node *b) { - node *n = list2((node*)NODE_CASE, a); - node *n2 = n; - void_expr_error(p, a); - while (n2->cdr) { - n2 = n2->cdr; + + struct mrb_ast_case_node *n = NEW_NODE(case, NODE_CASE); + n->value = a; + n->body = b; + + return (node*)n; +} + +/* Pattern matching case/in expression */ +static node* +new_case_match(parser_state *p, node *val, node *in_clauses) +{ + void_expr_error(p, val); + + struct mrb_ast_case_match_node *n = NEW_NODE(case_match, NODE_CASE_MATCH); + n->value = val; + n->in_clauses = in_clauses; + + return (node*)n; +} + +/* Create value pattern node */ +static node* +new_pat_value(parser_state *p, node *val) +{ + struct mrb_ast_pat_value_node *n = NEW_NODE(pat_value, NODE_PAT_VALUE); + n->value = val; + return (node*)n; +} + +/* Create variable pattern node */ +static node* +new_pat_var(parser_state *p, mrb_sym name) +{ + struct mrb_ast_pat_var_node *n = NEW_NODE(pat_var, NODE_PAT_VAR); + n->name = name; + /* Register as local variable if not wildcard */ + if (name) { + local_add(p, name); } - n2->cdr = b; - return n; + return (node*)n; +} + +/* Create pin pattern node (^var) */ +static node* +new_pat_pin(parser_state *p, mrb_sym name) +{ + struct mrb_ast_pat_pin_node *n = NEW_NODE(pat_pin, NODE_PAT_PIN); + n->name = name; + /* Pin operator references existing variable, does not create new binding */ + return (node*)n; +} + +/* Create as pattern node (pattern => var) */ +static node* +new_pat_as(parser_state *p, node *pattern, mrb_sym name) +{ + struct mrb_ast_pat_as_node *n = NEW_NODE(pat_as, NODE_PAT_AS); + n->pattern = pattern; + n->name = name; + local_add(p, name); + return (node*)n; +} + +/* Create alternative pattern node (pat1 | pat2) */ +static node* +new_pat_alt(parser_state *p, node *left, node *right) +{ + struct mrb_ast_pat_alt_node *n = NEW_NODE(pat_alt, NODE_PAT_ALT); + n->left = left; + n->right = right; + return (node*)n; +} + +/* Create array pattern node [a, b, *rest, c] */ +static node* +new_pat_array(parser_state *p, node *pre, node *rest, node *post) +{ + struct mrb_ast_pat_array_node *n = NEW_NODE(pat_array, NODE_PAT_ARRAY); + n->pre = pre; + n->rest = rest; + n->post = post; + return (node*)n; +} + +/* Create find pattern node [*pre, elems, *post] */ +static node* +new_pat_find(parser_state *p, node *pre, node *elems, node *post) +{ + struct mrb_ast_pat_find_node *n = NEW_NODE(pat_find, NODE_PAT_FIND); + n->pre = pre; + n->elems = elems; + n->post = post; + return (node*)n; +} + +/* Create hash pattern node {a:, b: x, **rest} */ +static node* +new_pat_hash(parser_state *p, node *pairs, node *rest) +{ + struct mrb_ast_pat_hash_node *n = NEW_NODE(pat_hash, NODE_PAT_HASH); + n->pairs = pairs; + n->rest = rest; + return (node*)n; +} + +/* Create one-line pattern matching node (expr in pattern / expr => pattern) */ +static node* +new_match_pat(parser_state *p, node *value, node *pattern, mrb_bool raise_on_fail) +{ + struct mrb_ast_match_pat_node *n = NEW_NODE(match_pat, NODE_MATCH_PAT); + n->value = value; + n->pattern = pattern; + n->raise_on_fail = raise_on_fail; + return (node*)n; +} + +/* Create in-clause node for case/in */ +static node* +new_in(parser_state *p, node *pattern, node *guard, node *body, mrb_bool guard_is_unless) +{ + struct mrb_ast_in_node *n = NEW_NODE(in, NODE_IN); + n->pattern = pattern; + n->guard = guard; + n->body = body; + n->guard_is_unless = guard_is_unless; + return (node*)n; } -/* (:postexe a) */ +/* struct: postexe_node(body) */ static node* new_postexe(parser_state *p, node *a) { - return cons((node*)NODE_POSTEXE, a); + struct mrb_ast_postexe_node *postexe_node = NEW_NODE(postexe, NODE_POSTEXE); + postexe_node->body = a; + return (node*)postexe_node; } -/* (:self) */ +/* struct: self_node() */ static node* new_self(parser_state *p) { - return list1((node*)NODE_SELF); + struct mrb_ast_self_node *n = NEW_NODE(self, NODE_SELF); + + return (node*)n; } -/* (:call a b c) */ +/* struct: call_node(receiver, method, args) */ static node* -new_call(parser_state *p, node *a, mrb_sym b, node *c, int pass) +new_call(parser_state *p, node *receiver, mrb_sym method, node *args, int pass) { - node *n = list4(nint(pass?NODE_CALL:NODE_SCALL), a, nsym(b), c); - void_expr_error(p, a); - NODE_LINENO(n, a); - return n; + /* Calculate size needed (fixed size now) */ struct mrb_ast_call_node *n = NEW_NODE(call, NODE_CALL); + n->receiver = receiver; + n->method_name = method; + n->safe_call = (pass == 0); /* pass == 0 means safe call (&.) */ + + /* Store args pointer directly - no need to unpack and repack */ + n->args = args; + + void_expr_error(p, receiver); + return (node*)n; } -/* (:fcall self mid args) */ +/* struct: fcall_node(method, args) */ static node* new_fcall(parser_state *p, mrb_sym b, node *c) { - node *n = list4((node*)NODE_FCALL, 0, nsym(b), c); - NODE_LINENO(n, c); - return n; + return new_call(p, NULL, b, c, '.'); } /* (a b . c) */ static node* new_callargs(parser_state *p, node *a, node *b, node *c) { - return cons(a, cons(b, c)); + /* Allocate struct mrb_ast_callargs (fixed size, like new_args) */ + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)parser_palloc(p, sizeof(struct mrb_ast_callargs)); + + /* Initialize members directly */ + callargs->regular_args = a; /* Cons list of regular arguments (preserves splat compatibility) */ + callargs->keyword_args = b; /* Keyword arguments hash node */ + callargs->block_arg = c; /* Block argument node */ + + /* Return direct cast to node (like new_args) */ + return (node*)callargs; } -/* (:super . c) */ +/* struct: super_node(args) */ static node* new_super(parser_state *p, node *c) { - return cons((node*)NODE_SUPER, c); + struct mrb_ast_super_node *n = NEW_NODE(super, NODE_SUPER); + n->args = c; + + return (node*)n; } -/* (:zsuper) */ +/* struct: zsuper_node() */ static node* new_zsuper(parser_state *p) { - return cons((node*)NODE_ZSUPER, 0); + struct mrb_ast_super_node *n = NEW_NODE(super, NODE_ZSUPER); + n->args = NULL; /* zsuper initially has no args, but may be added by call_with_block */ + return (node*)n; } -/* (:yield . c) */ +/* struct: yield_node(args) */ static node* new_yield(parser_state *p, node *c) { + /* Handle callargs structure - direct casting like new_args() */ if (c) { - if (c->cdr) { - if (c->cdr->cdr) { - yyerror(p, "both block arg and actual block given"); - } - if (c->cdr->car) { - return cons((node*)NODE_YIELD, push(c->car, c->cdr->car)); - } + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)c; + if (callargs->block_arg) { + yyerror(NULL, p, "both block arg and actual block given"); } - return cons((node*)NODE_YIELD, c->car); - } - return cons((node*)NODE_YIELD, 0); + } struct mrb_ast_yield_node *n = NEW_NODE(yield, NODE_YIELD); + n->args = c; + + return (node*)n; } -/* (:return . c) */ +/* struct: return_node(value) */ static node* new_return(parser_state *p, node *c) { - return cons((node*)NODE_RETURN, c); + struct mrb_ast_return_node *n = NEW_NODE(return, NODE_RETURN); + n->args = c; + + return (node*)n; } -/* (:break . c) */ +/* struct: break_node(value) */ static node* new_break(parser_state *p, node *c) { - return cons((node*)NODE_BREAK, c); + struct mrb_ast_break_node *n = NEW_NODE(break, NODE_BREAK); + n->value = c; + return (node*)n; } -/* (:next . c) */ +/* struct: next_node(value) */ static node* new_next(parser_state *p, node *c) { - return cons((node*)NODE_NEXT, c); + struct mrb_ast_next_node *n = NEW_NODE(next, NODE_NEXT); + n->value = c; + return (node*)n; } -/* (:redo) */ +/* struct: redo_node() */ static node* new_redo(parser_state *p) { - return list1((node*)NODE_REDO); + struct mrb_ast_redo_node *n = NEW_NODE(redo, NODE_REDO); + return (node*)n; } -/* (:retry) */ +/* struct: retry_node() */ static node* new_retry(parser_state *p) { - return list1((node*)NODE_RETRY); + struct mrb_ast_retry_node *n = NEW_NODE(retry, NODE_RETRY); + return (node*)n; } -/* (:dot2 a b) */ +/* struct: dot2_node(beg, end) */ static node* new_dot2(parser_state *p, node *a, node *b) { - return cons((node*)NODE_DOT2, cons(a, b)); + struct mrb_ast_dot2_node *n = NEW_NODE(dot2, NODE_DOT2); + n->left = a; + n->right = b; + + return (node*)n; } -/* (:dot3 a b) */ +/* struct: dot3_node(beg, end) */ static node* new_dot3(parser_state *p, node *a, node *b) { - return cons((node*)NODE_DOT3, cons(a, b)); + struct mrb_ast_dot3_node *n = NEW_NODE(dot3, NODE_DOT3); + n->left = a; + n->right = b; + + return (node*)n; } -/* (:colon2 b c) */ +/* struct: colon2_node(base, name) */ static node* new_colon2(parser_state *p, node *b, mrb_sym c) { void_expr_error(p, b); - return cons((node*)NODE_COLON2, cons(b, nsym(c))); + + struct mrb_ast_colon2_node *colon2_node = NEW_NODE(colon2, NODE_COLON2); + colon2_node->base = b; + colon2_node->name = c; + return (node*)colon2_node; } -/* (:colon3 . c) */ +/* struct: colon3_node(name) */ static node* new_colon3(parser_state *p, mrb_sym c) { - return cons((node*)NODE_COLON3, nsym(c)); + struct mrb_ast_colon3_node *colon3_node = NEW_NODE(colon3, NODE_COLON3); + colon3_node->name = c; + return (node*)colon3_node; } -/* (:and a b) */ +/* struct: and_node(left, right) */ static node* new_and(parser_state *p, node *a, node *b) { void_expr_error(p, a); - return cons((node*)NODE_AND, cons(a, b)); + + struct mrb_ast_and_node *n = NEW_NODE(and, NODE_AND); + n->left = a; + n->right = b; + + return (node*)n; } -/* (:or a b) */ +/* struct: or_node(left, right) */ static node* new_or(parser_state *p, node *a, node *b) { void_expr_error(p, a); - return cons((node*)NODE_OR, cons(a, b)); + + struct mrb_ast_or_node *n = NEW_NODE(or, NODE_OR); + n->left = a; + n->right = b; + + return (node*)n; } -/* (:array a...) */ +/* struct: array_node(elements) - uses cons list */ static node* new_array(parser_state *p, node *a) { - return cons((node*)NODE_ARRAY, a); + struct mrb_ast_array_node *n = NEW_NODE(array, NODE_ARRAY); + n->elements = a; + + return (node*)n; } -/* (:splat . a) */ +/* struct: splat_node(value) */ static node* new_splat(parser_state *p, node *a) { void_expr_error(p, a); - return cons((node*)NODE_SPLAT, a); + + struct mrb_ast_splat_node *splat_node = NEW_NODE(splat, NODE_SPLAT); + splat_node->value = a; + return (node*)splat_node; } -/* (:hash (k . v) (k . v)...) */ +/* struct: hash_node(pairs) - uses cons list */ static node* new_hash(parser_state *p, node *a) { - return cons((node*)NODE_HASH, a); -} + struct mrb_ast_hash_node *n = NEW_NODE(hash, NODE_HASH); + n->pairs = a; -/* (:kw_hash (k . v) (k . v)...) */ -static node* -new_kw_hash(parser_state *p, node *a) -{ - return cons((node*)NODE_KW_HASH, a); + return (node*)n; } /* (:sym . a) */ +/* Symbol node creation - supports both variable and legacy modes */ static node* new_sym(parser_state *p, mrb_sym sym) { - return cons((node*)NODE_SYM, nsym(sym)); -} - -static mrb_sym -new_strsym(parser_state *p, node* str) -{ - const char *s = (const char*)str->cdr->car; - size_t len = (size_t)str->cdr->cdr; + struct mrb_ast_sym_node *n = NEW_NODE(sym, NODE_SYM); + n->symbol = sym; - return mrb_intern(p->mrb, s, len); + return (node*)n; } -/* (:lvar . a) */ static node* -new_lvar(parser_state *p, mrb_sym sym) +new_xvar(parser_state *p, mrb_sym sym, enum node_type type) { - return cons((node*)NODE_LVAR, nsym(sym)); -} + struct mrb_ast_var_node *n = NEW_NODE(var, type); + n->symbol = sym; -/* (:gvar . a) */ -static node* -new_gvar(parser_state *p, mrb_sym sym) -{ - return cons((node*)NODE_GVAR, nsym(sym)); + return (node*)n; } -/* (:ivar . a) */ -static node* -new_ivar(parser_state *p, mrb_sym sym) -{ - return cons((node*)NODE_IVAR, nsym(sym)); -} +#define new_lvar(p, sym) new_xvar(p, sym, NODE_LVAR) +#define new_ivar(p, sym) new_xvar(p, sym, NODE_IVAR) +#define new_gvar(p, sym) new_xvar(p, sym, NODE_GVAR) +#define new_cvar(p, sym) new_xvar(p, sym, NODE_CVAR) -/* (:cvar . a) */ -static node* -new_cvar(parser_state *p, mrb_sym sym) +static mrb_sym +new_strsym(parser_state *p, node* str) { - return cons((node*)NODE_CVAR, nsym(sym)); + size_t len = (size_t)str->car; + const char *s = (const char*)str->cdr; + + return mrb_intern(p->mrb, s, len); } /* (:nvar . a) */ static node* new_nvar(parser_state *p, int num) { - return cons((node*)NODE_NVAR, nint(num)); + int nvar; + node *nvars = p->nvars->cdr; + while (nvars) { + nvar = node_to_int(nvars->car); + if (nvar == -2) break; /* top of the scope */ + if (nvar > 0) { + yyerror(NULL, p, "numbered parameter used in outer block"); + break; + } + nvars->car = int_to_node(-1); + nvars = nvars->cdr; + } + nvar = node_to_int(p->nvars->car); + if (nvar == -1) { + yyerror(NULL, p, "numbered parameter used in inner block"); + } + else { + p->nvars->car = int_to_node(nvar > num ? nvar : num); + } + struct mrb_ast_nvar_node *n = NEW_NODE(nvar, NODE_NVAR); + n->num = num; + return (node*)n; } -/* (:const . a) */ +/* struct: const_node(name) */ static node* new_const(parser_state *p, mrb_sym sym) { - return cons((node*)NODE_CONST, nsym(sym)); + struct mrb_ast_const_node *n = NEW_NODE(const, NODE_CONST); + n->symbol = sym; + + return (node*)n; } -/* (:undef a...) */ +/* struct: undef_node(syms) - uses cons list */ static node* -new_undef(parser_state *p, mrb_sym sym) +new_undef(parser_state *p, node *syms) { - return list2((node*)NODE_UNDEF, nsym(sym)); + struct mrb_ast_undef_node *undef_node = NEW_NODE(undef, NODE_UNDEF); + undef_node->syms = syms; + return (node*)undef_node; } -/* (:class class super body) */ +/* struct: class_node(path, super, body) */ static node* new_class(parser_state *p, node *c, node *s, node *b) { void_expr_error(p, s); - return list4((node*)NODE_CLASS, c, s, cons(locals_node(p), b)); + + struct mrb_ast_class_node *n = NEW_NODE(class, NODE_CLASS); + n->name = c; + n->superclass = s; + n->body = cons(locals_node(p), b); + + return (node*)n; } -/* (:sclass obj body) */ +/* struct: sclass_node(obj, body) */ static node* new_sclass(parser_state *p, node *o, node *b) { void_expr_error(p, o); - return list3((node*)NODE_SCLASS, o, cons(locals_node(p), b)); + + struct mrb_ast_sclass_node *n = NEW_NODE(sclass, NODE_SCLASS); + n->obj = o; + n->body = cons(locals_node(p), b); + + return (node*)n; } -/* (:module module body) */ +/* struct: module_node(path, body) */ static node* new_module(parser_state *p, node *m, node *b) { - return list3((node*)NODE_MODULE, m, cons(locals_node(p), b)); + struct mrb_ast_module_node *n = NEW_NODE(module, NODE_MODULE); + n->name = m; + n->body = cons(locals_node(p), b); + + return (node*)n; } -/* (:def m lv (arg . body)) */ +/* struct: def_node(name, args, body) */ static node* -new_def(parser_state *p, mrb_sym m, node *a, node *b) +new_def(parser_state *p, mrb_sym name) { - return list5((node*)NODE_DEF, nsym(m), 0, a, b); + struct mrb_ast_def_node *n = NEW_NODE(def, NODE_DEF); + n->name = name; + n->args = (struct mrb_ast_args *)int_to_node(p->cmdarg_stack); + n->locals = local_switch(p); + n->body = NULL; + + return (node*)n; } static void defn_setup(parser_state *p, node *d, node *a, node *b) { - node *n = d->cdr->cdr; + struct mrb_ast_def_node *n = def_node(d); + node *locals = n->locals; - n->car = locals_node(p); - p->cmdarg_stack = intn(n->cdr->car); - n->cdr->car = a; - local_resume(p, n->cdr->cdr->car); - n->cdr->cdr->car = b; + n->locals = locals_node(p); + p->cmdarg_stack = node_to_int(n->args); + n->args = (struct mrb_ast_args *)a; + n->body = b; + local_resume(p, locals); } -/* (:sdef obj m lv (arg . body)) */ +/* struct: sdef_node(obj, name, args, body) */ static node* -new_sdef(parser_state *p, node *o, mrb_sym m, node *a, node *b) +new_sdef(parser_state *p, node *o, mrb_sym name) { void_expr_error(p, o); - return list6((node*)NODE_SDEF, o, nsym(m), 0, a, b); -} - -static void -defs_setup(parser_state *p, node *d, node *a, node *b) -{ - node *n = d->cdr->cdr->cdr; - - n->car = locals_node(p); - p->cmdarg_stack = intn(n->cdr->car); - n->cdr->car = a; - local_resume(p, n->cdr->cdr->car); - n->cdr->cdr->car = b; -} -/* (:arg . sym) */ -static node* -new_arg(parser_state *p, mrb_sym sym) -{ - return cons((node*)NODE_ARG, nsym(sym)); + struct mrb_ast_sdef_node *sdef_node = NEW_NODE(sdef, NODE_SDEF); + sdef_node->obj = o; + sdef_node->name = name; + sdef_node->args = (struct mrb_ast_args *)int_to_node(p->cmdarg_stack); + sdef_node->locals = local_switch(p); + sdef_node->body = NULL; + return (node*)sdef_node; } static void local_add_margs(parser_state *p, node *n) { while (n) { - if (typen(n->car->car) == NODE_MASGN) { - node *t = n->car->cdr->cdr; + if (node_type(n->car) == NODE_MARG) { + struct mrb_ast_masgn_node *masgn_n = (struct mrb_ast_masgn_node*)n->car; + node *rhs = masgn_n->rhs; + + /* For parameter destructuring, rhs contains the locals */ + if (rhs) { + node *t = rhs; + while (t) { + local_add_f(p, node_to_sym(t->car)); + t = t->cdr; + } + /* Clear cons list RHS immediately after use */ + masgn_n->rhs = NULL; + } - n->car->cdr->cdr = NULL; - while (t) { - local_add_f(p, sym(t->car)); - t = t->cdr; + /* Process nested destructuring in lhs components */ + if (masgn_n->pre) { + local_add_margs(p, masgn_n->pre); + } + if (masgn_n->post) { + local_add_margs(p, masgn_n->post); } - local_add_margs(p, n->car->cdr->car->car); - local_add_margs(p, n->car->cdr->car->cdr->cdr->car); } n = n->cdr; } } + static void local_add_lv(parser_state *p, node *lv) { while (lv) { - local_add_f(p, sym(lv->car)); + local_add_f(p, node_to_sym(lv->car)); lv = lv->cdr; } } @@ -847,65 +1167,91 @@ local_add_lv(parser_state *p, node *lv) static node* new_args(parser_state *p, node *m, node *opt, mrb_sym rest, node *m2, node *tail) { - node *n; - local_add_margs(p, m); local_add_margs(p, m2); - n = cons(m2, tail); - n = cons(nsym(rest), n); - n = cons(opt, n); + + /* Save original optional arguments before processing */ + node *orig_opt = opt; + + /* Process optional arguments (keep original side effects) */ while (opt) { /* opt: (sym . (opt . lv)) -> (sym . opt) */ local_add_lv(p, opt->car->cdr->cdr); opt->car->cdr = opt->car->cdr->car; opt = opt->cdr; } - return cons(m, n); + + /* Allocate struct mrb_ast_args (no hdr) */ + struct mrb_ast_args *args = (struct mrb_ast_args*)parser_palloc(p, sizeof(struct mrb_ast_args)); + + /* Initialize members */ + args->mandatory_args = m; + args->optional_args = orig_opt; + args->rest_arg = rest; + args->post_mandatory_args = m2; + + /* Deconstruct tail cons list: (kws . (kwrest . blk)) */ + if (tail) { + args->keyword_args = (node*)tail->car; /* kws */ + args->kwrest_arg = (mrb_sym)(intptr_t)tail->cdr->car; /* kwrest */ + args->block_arg = (mrb_sym)(intptr_t)tail->cdr->cdr; /* blk */ + cons_free(tail->cdr); + cons_free(tail); + } + else { + args->keyword_args = NULL; + args->kwrest_arg = 0; + args->block_arg = 0; + } + + return (node*)args; } -/* (:args_tail keywords rest_keywords_sym block_sym) */ +/* struct: args_tail_node(kwargs, kwrest, block) */ static node* -new_args_tail(parser_state *p, node *kws, node *kwrest, mrb_sym blk) +new_args_tail(parser_state *p, node *kws, mrb_sym kwrest, mrb_sym blk) { node *k; if (kws || kwrest) { - local_add_kw(p, (kwrest && kwrest->cdr)? sym(kwrest->cdr) : 0); + local_add_kw(p, kwrest); } - local_add_blk(p, blk); + local_add_blk(p); + if (blk && blk != MRB_SYM(nil)) local_add_f(p, blk); /* allocate register for keywords arguments */ /* order is for Proc#parameters */ for (k = kws; k; k = k->cdr) { - if (!k->car->cdr->cdr->car) { /* allocate required keywords */ - local_add_f(p, sym(k->car->cdr->car)); + if (!k->car->cdr) { /* allocate required keywords - simplified structure: (key . NULL) */ + local_add_f(p, node_to_sym(k->car->car)); } } for (k = kws; k; k = k->cdr) { - if (k->car->cdr->cdr->car) { /* allocate keywords with default */ - local_add_lv(p, k->car->cdr->cdr->car->cdr); - k->car->cdr->cdr->car = k->car->cdr->cdr->car->car; - local_add_f(p, sym(k->car->cdr->car)); + if (k->car->cdr) { /* allocate keywords with default - simplified structure: (key . value) */ + local_add_lv(p, k->car->cdr->cdr); /* value->cdr for default args */ + k->car->cdr = k->car->cdr->car; /* value->car for default args */ + local_add_f(p, node_to_sym(k->car->car)); } } - return list4((node*)NODE_ARGS_TAIL, kws, kwrest, nsym(blk)); + /* Return cons list: (keyword . (kwrest . blk)) */ + return cons(kws, cons(sym_to_node(kwrest), sym_to_node(blk))); } -/* (:kw_arg kw_sym def_arg) */ +/* (kw_sym . def_arg) - simplified from NODE_KW_ARG wrapper */ static node* new_kw_arg(parser_state *p, mrb_sym kw, node *def_arg) { mrb_assert(kw); - return list3((node*)NODE_KW_ARG, nsym(kw), def_arg); + return cons(sym_to_node(kw), def_arg); } /* (:kw_rest_args . a) */ static node* new_kw_rest_args(parser_state *p, mrb_sym sym) { - return cons((node*)NODE_KW_REST_ARGS, nsym(sym)); + return sym_to_node(intern_op(pow)); /* Use ** symbol as direct marker */ } static node* @@ -915,27 +1261,30 @@ new_args_dots(parser_state *p, node *m) mrb_sym k = intern_op(pow); mrb_sym b = intern_op(and); local_add_f(p, r); - return new_args(p, m, 0, r, 0, - new_args_tail(p, 0, new_kw_rest_args(p, k), b)); + return new_args(p, m, 0, r, 0, new_args_tail(p, NULL, k, b)); } -/* (:block_arg . a) */ +/* struct: block_arg_node(value) */ static node* new_block_arg(parser_state *p, node *a) { - return cons((node*)NODE_BLOCK_ARG, a); + struct mrb_ast_block_arg_node *block_arg_node = NEW_NODE(block_arg, NODE_BLOCK_ARG); + block_arg_node->value = a; + return (node*)block_arg_node; } static node* setup_numparams(parser_state *p, node *a) { - int nvars = intn(p->nvars->car); + int nvars = node_to_int(p->nvars->car); if (nvars > 0) { int i; mrb_sym sym; - // m || opt || rest || tail - if (a && (a->car || (a->cdr && a->cdr->car) || (a->cdr->cdr && a->cdr->cdr->car) || (a->cdr->cdr->cdr->cdr && a->cdr->cdr->cdr->cdr->car))) { - yyerror(p, "ordinary parameter is defined"); + // Check if any arguments are already defined + struct mrb_ast_args *args = (struct mrb_ast_args *)a; + if (a && (args->mandatory_args || args->optional_args || args->rest_arg || + args->post_mandatory_args || args->keyword_args || args->kwrest_arg)) { + yyerror(NULL, p, "ordinary parameter is defined"); } else if (p->locals) { /* p->locals should not be NULL unless error happens before the point */ @@ -947,8 +1296,8 @@ setup_numparams(parser_state *p, node *a) buf[1] = i+'0'; buf[2] = '\0'; sym = intern_cstr(buf); - args = cons(new_arg(p, sym), args); - p->locals->car = cons(nsym(sym), p->locals->car); + args = cons(new_lvar(p, sym), args); + p->locals->car = cons(sym_to_node(sym), p->locals->car); } a = new_args(p, args, 0, 0, 0, 0); } @@ -956,244 +1305,332 @@ setup_numparams(parser_state *p, node *a) return a; } -/* (:block arg body) */ +/* struct: block_node(args, body) */ static node* new_block(parser_state *p, node *a, node *b) { - a = setup_numparams(p, a); - return list4((node*)NODE_BLOCK, locals_node(p), a, b); + a = setup_numparams(p, a); struct mrb_ast_block_node *n = NEW_NODE(block, NODE_BLOCK); + n->locals = locals_node(p); + n->args = (struct mrb_ast_args *)a; + n->body = b; + + return (node*)n; } -/* (:lambda arg body) */ +/* struct: lambda_node(args, body) */ static node* new_lambda(parser_state *p, node *a, node *b) { - return list4((node*)NODE_LAMBDA, locals_node(p), a, b); + a = setup_numparams(p, a); struct mrb_ast_lambda_node *lambda_node = NEW_NODE(lambda, NODE_LAMBDA); + lambda_node->locals = locals_node(p); + lambda_node->args = (struct mrb_ast_args *)a; + lambda_node->body = b; + return (node*)lambda_node; } -/* (:asgn lhs rhs) */ +/* struct: asgn_node(lhs, rhs) */ static node* new_asgn(parser_state *p, node *a, node *b) { void_expr_error(p, b); - return cons((node*)NODE_ASGN, cons(a, b)); + + struct mrb_ast_asgn_node *n = NEW_NODE(asgn, NODE_ASGN); + n->lhs = a; + n->rhs = b; + + return (node*)n; +} + +/* Helper function to create MASGN/MARG nodes */ +static node* +new_masgn_helper(parser_state *p, node *a, node *b, enum node_type node_type) +{ + struct mrb_ast_masgn_node *n = NEW_NODE(masgn, node_type); + + /* Extract pre, rest, post from cons list structure (a b c) */ + if (a) { + n->pre = a->car; /* Pre-splat variables */ + if (a->cdr) { + n->rest = a->cdr->car; /* Splat variable (or -1 for anonymous) */ + if (a->cdr->cdr) { + n->post = a->cdr->cdr->car; /* Post-splat variables */ + cons_free(a->cdr->cdr); + } + else { + n->post = NULL; + } + cons_free(a->cdr); + } + else { + n->rest = NULL; + n->post = NULL; + } + cons_free(a); + } + else { + n->pre = NULL; + n->rest = NULL; + n->post = NULL; + } + n->rhs = b; + + return (node*)n; } -/* (:masgn mlhs=(pre rest post) mrhs) */ +/* struct: masgn_node(lhs, rhs) */ static node* new_masgn(parser_state *p, node *a, node *b) { void_expr_error(p, b); - return cons((node*)NODE_MASGN, cons(a, b)); + return new_masgn_helper(p, a, b, NODE_MASGN); } -/* (:masgn mlhs mrhs) no check */ +/* (:marg mlhs mrhs) no check - for parameter destructuring */ static node* -new_masgn_param(parser_state *p, node *a, node *b) +new_marg(parser_state *p, node *a) { - return cons((node*)NODE_MASGN, cons(a, b)); + return new_masgn_helper(p, a, p->locals->car, NODE_MARG); } -/* (:asgn lhs rhs) */ +/* struct: op_asgn_node(lhs, op, rhs) */ static node* new_op_asgn(parser_state *p, node *a, mrb_sym op, node *b) { void_expr_error(p, b); - return list4((node*)NODE_OP_ASGN, a, nsym(op), b); + + struct mrb_ast_op_asgn_node *n = NEW_NODE(op_asgn, NODE_OP_ASGN); + n->lhs = a; + n->op = op; + n->rhs = b; + return (node*)n; } static node* -new_imaginary(parser_state *p, node *imaginary) +new_int_n(parser_state *p, int32_t val) { - return new_call(p, new_const(p, MRB_SYM_2(p->mrb, Kernel)), MRB_SYM_2(p->mrb, Complex), - new_callargs(p, list2(list3((node*)NODE_INT, (node*)strdup("0"), nint(10)), imaginary), 0, 0), '.'); + struct mrb_ast_int_node *n = NEW_NODE(int, NODE_INT); + n->value = val; + + return (node*)n; } static node* -new_rational(parser_state *p, node *rational) +new_imaginary(parser_state *p, node *imaginary) { - return new_call(p, new_const(p, MRB_SYM_2(p->mrb, Kernel)), MRB_SYM_2(p->mrb, Rational), new_callargs(p, list1(rational), 0, 0), '.'); + return new_fcall(p, MRB_SYM(Complex), + new_callargs(p, list2(new_int_n(p, 0), imaginary), 0, 0)); } -/* (:int . i) */ static node* -new_int(parser_state *p, const char *s, int base, int suffix) +new_rational(parser_state *p, node *rational) { - node* result = list3((node*)NODE_INT, (node*)strdup(s), nint(base)); - if (suffix & NUM_SUFFIX_R) { - result = new_rational(p, result); - } - if (suffix & NUM_SUFFIX_I) { - result = new_imaginary(p, result); - } - return result; + return new_fcall(p, MRB_SYM(Rational), new_callargs(p, list1(rational), 0, 0)); } -#ifndef MRB_NO_FLOAT -/* (:float . i) */ -static node* -new_float(parser_state *p, const char *s, int suffix) +/* Read integer into int32_t with overflow detection */ +static mrb_bool +read_int32(const char *p, int base, int32_t *result) { - node* result = cons((node*)NODE_FLOAT, (node*)strdup(s)); - if (suffix & NUM_SUFFIX_R) { - result = new_rational(p, result); + const char *e = p + strlen(p); + int32_t value = 0; + mrb_bool neg = FALSE; + + if (base < 2 || base > 16) { + return FALSE; } - if (suffix & NUM_SUFFIX_I) { - result = new_imaginary(p, result); + + if (*p == '+') { + p++; + } + else if (*p == '-') { + neg = TRUE; + p++; } - return result; -} -#endif -/* (:str . (s . len)) */ -static node* -new_str(parser_state *p, const char *s, size_t len) -{ - return cons((node*)NODE_STR, cons((node*)strndup(s, len), nint(len))); -} + while (p < e) { + int n; + char c = *p; -/* (:dstr . a) */ -static node* -new_dstr(parser_state *p, node *a) -{ - return cons((node*)NODE_DSTR, a); -} + /* Skip underscores */ + if (c == '_') { + p++; + continue; + } -static int -string_node_p(node *n) -{ - return (int)(typen(n->car) == NODE_STR); -} + /* Parse digit */ + if (c >= '0' && c <= '9') { + n = c - '0'; + } + else if (c >= 'a' && c <= 'f') { + n = c - 'a' + 10; + } + else if (c >= 'A' && c <= 'F') { + n = c - 'A' + 10; + } + else { + /* Invalid character */ + return FALSE; + } -static node* -composite_string_node(parser_state *p, node *a, node *b) -{ - size_t newlen = (size_t)a->cdr + (size_t)b->cdr; - char *str = (char*)mrb_pool_realloc(p->pool, a->car, (size_t)a->cdr + 1, newlen + 1); - memcpy(str + (size_t)a->cdr, b->car, (size_t)b->cdr); - str[newlen] = '\0'; - a->car = (node*)str; - a->cdr = (node*)newlen; - cons_free(b); - return a; -} + if (n >= base) { + /* Digit not valid for this base */ + return FALSE; + } -static node* -concat_string(parser_state *p, node *a, node *b) -{ - if (string_node_p(a)) { - if (string_node_p(b)) { - /* a == NODE_STR && b == NODE_STR */ - composite_string_node(p, a->cdr, b->cdr); - cons_free(b); - return a; + /* Check for multiplication overflow */ + if (value > INT32_MAX / base) { + return FALSE; } - else { - /* a == NODE_STR && b == NODE_DSTR */ - if (string_node_p(b->cdr->car)) { - /* a == NODE_STR && b->[NODE_STR, ...] */ - composite_string_node(p, a->cdr, b->cdr->car->cdr); - cons_free(b->cdr->car); - b->cdr->car = a; - return b; + value *= base; + + /* Check for addition overflow */ + if (value > INT32_MAX - n) { + /* Special case: -INT32_MIN is valid */ + if (neg && value == (INT32_MAX - n + 1) && p + 1 == e) { + *result = INT32_MIN; + return TRUE; } + return FALSE; } + + value += n; + p++; + } + + *result = neg ? -value : value; + return TRUE; +} + +static node* +new_int(parser_state *p, const char *s, int base, int suffix) +{ + int32_t val; + node* result; + + /* Try to parse as int32_t first */ + if (read_int32(s, base, &val)) { + result = new_int_n(p, val); } else { - node *c; /* last node of a */ - for (c = a; c->cdr != NULL; c = c->cdr) ; + /* Big integer - create NODE_BIGINT */ + struct mrb_ast_bigint_node *n = NEW_NODE(bigint, NODE_BIGINT); + n->string = strdup(s); + n->base = base; - if (string_node_p(b)) { - /* a == NODE_DSTR && b == NODE_STR */ - if (string_node_p(c->car)) { - /* a->[..., NODE_STR] && b == NODE_STR */ - composite_string_node(p, c->car->cdr, b->cdr); - cons_free(b); - return a; - } + result = (node*)n; + } - push(a, b); - return a; - } - else { - /* a == NODE_DSTR && b == NODE_DSTR */ - if (string_node_p(c->car) && string_node_p(b->cdr->car)) { - /* a->[..., NODE_STR] && b->[NODE_STR, ...] */ - node *d = b->cdr; - cons_free(b); - composite_string_node(p, c->car->cdr, d->car->cdr); - cons_free(d->car); - c->cdr = d->cdr; - cons_free(d); - return a; - } - else { - c->cdr = b->cdr; - cons_free(b); - return a; - } - } + /* Handle suffix modifiers */ + if (suffix & NUM_SUFFIX_R) { + result = new_rational(p, result); + } + if (suffix & NUM_SUFFIX_I) { + result = new_imaginary(p, result); } - return new_dstr(p, list2(a, b)); + return result; } -/* (:str . (s . len)) */ +#ifndef MRB_NO_FLOAT +/* struct: float_node(value) */ static node* -new_xstr(parser_state *p, const char *s, int len) +new_float(parser_state *p, const char *s, int suffix) { - return cons((node*)NODE_XSTR, cons((node*)strndup(s, len), nint(len))); + struct mrb_ast_float_node *n = NEW_NODE(float, NODE_FLOAT); + n->value = strdup(s); + + node* result = (node*)n; + + if (suffix & NUM_SUFFIX_R) { + result = new_rational(p, result); + } + if (suffix & NUM_SUFFIX_I) { + result = new_imaginary(p, result); + } + return result; } +#endif -/* (:xstr . a) */ +/* Create string node from cons list */ +/* struct: str_node(str) */ static node* -new_dxstr(parser_state *p, node *a) +new_str(parser_state *p, node *a) { - return cons((node*)NODE_DXSTR, a); + struct mrb_ast_str_node *n = NEW_NODE(str, NODE_STR); + n->list = a; + + return (node*)n; } -/* (:dsym . a) */ +/* struct: xstr_node(str) */ static node* -new_dsym(parser_state *p, node *a) +new_xstr(parser_state *p, node *a) { - return cons((node*)NODE_DSYM, a); + struct mrb_ast_xstr_node *n = NEW_NODE(xstr, NODE_XSTR); + n->list = a; + return (node*)n; } -/* (:regx . (s . (opt . enc))) */ +/* struct: dsym_node(parts) - uses cons list */ static node* -new_regx(parser_state *p, const char *p1, const char* p2, const char* p3) +new_dsym(parser_state *p, node *a) { - return cons((node*)NODE_REGX, cons((node*)p1, cons((node*)p2, (node*)p3))); + struct mrb_ast_str_node *n = NEW_NODE(str, NODE_DSYM); + n->list = a; + return (node*)n; } -/* (:dregx . (a . b)) */ +/* struct: regx_node(pattern, flags, encoding) */ static node* -new_dregx(parser_state *p, node *a, node *b) +new_regx(parser_state *p, node *list, const char *flags, const char *encoding) { - return cons((node*)NODE_DREGX, cons(a, b)); + struct mrb_ast_regx_node *n = NEW_NODE(regx, NODE_REGX); + n->list = list; + n->flags = flags; + n->encoding = encoding; + return (node*)n; } -/* (:backref . n) */ +/* struct: back_ref_node(n) */ static node* new_back_ref(parser_state *p, int n) { - return cons((node*)NODE_BACK_REF, nint(n)); + struct mrb_ast_back_ref_node *backref_node = NEW_NODE(back_ref, NODE_BACK_REF); + backref_node->type = n; + return (node*)backref_node; } -/* (:nthref . n) */ +/* struct: nth_ref_node(n) */ static node* new_nth_ref(parser_state *p, int n) { - return cons((node*)NODE_NTH_REF, nint(n)); + struct mrb_ast_nth_ref_node *nthref_node = NEW_NODE(nth_ref, NODE_NTH_REF); + nthref_node->nth = n; + return (node*)nthref_node; } -/* (:heredoc . a) */ +/* struct: heredoc_node(str) */ static node* -new_heredoc(parser_state *p) +new_heredoc(parser_state *p, struct mrb_parser_heredoc_info **infop) { - parser_heredoc_info *inf = (parser_heredoc_info*)parser_palloc(p, sizeof(parser_heredoc_info)); - return cons((node*)NODE_HEREDOC, (node*)inf); + struct mrb_ast_heredoc_node *n = NEW_NODE(heredoc, NODE_HEREDOC); + + /* Initialize embedded heredoc info struct */ + n->info.allow_indent = FALSE; + n->info.remove_indent = FALSE; + n->info.line_head = FALSE; + n->info.indent = 0; + n->info.indented = NULL; + n->info.type = str_not_parsing; // Will be set by heredoc processing + n->info.term = NULL; // Will be set by heredoc processing + n->info.term_len = 0; + n->info.doc = NULL; + + /* Return pointer to embedded info if requested */ + *infop = &n->info; + + return (node*)n; } static void @@ -1204,21 +1641,46 @@ new_bv(parser_state *p, mrb_sym id) static node* new_literal_delim(parser_state *p) { - return cons((node*)NODE_LITERAL_DELIM, 0); + return cons((node*)0, (node*)0); +} + +/* Helper for creating string representation cons (length . string_ptr) */ +static node* +new_str_rep(parser_state *p, const char *str, int len) +{ + return cons(int_to_node(len), (node*)strndup(str, len)); +} + +/* Helper for creating string representation from current token */ +static node* +new_str_tok(parser_state *p) +{ + return new_str_rep(p, tok(p), toklen(p)); +} + +/* Helper for creating empty string representation */ +static node* +new_str_empty(parser_state *p) +{ + return new_str_rep(p, "", 0); } /* (:words . a) */ static node* new_words(parser_state *p, node *a) { - return cons((node*)NODE_WORDS, a); + struct mrb_ast_words_node *words_node = NEW_NODE(words, NODE_WORDS); + words_node->args = a; + return (node*)words_node; } /* (:symbols . a) */ static node* new_symbols(parser_state *p, node *a) { - return cons((node*)NODE_SYMBOLS, a); + struct mrb_ast_symbols_node *symbols_node = NEW_NODE(symbols, NODE_SYMBOLS); + symbols_node->args = a; + return (node*)symbols_node; } /* xxx ----------------------------- */ @@ -1242,17 +1704,20 @@ static void args_with_block(parser_state *p, node *a, node *b) { if (b) { - if (a->cdr && a->cdr->cdr) { - yyerror(p, "both block arg and actual block given"); + /* Handle callargs structure - direct casting like new_args() */ + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)a; + if (callargs->block_arg) { + yyerror(NULL, p, "both block arg and actual block given"); } - a->cdr->cdr = b; + callargs->block_arg = b; } } static void endless_method_name(parser_state *p, node *defn) { - mrb_sym sym = sym(defn->cdr->car); + struct mrb_ast_def_node *def = (struct mrb_ast_def_node*)defn; + mrb_sym sym = def->name; mrb_int len; const char *name = mrb_sym_name_len(p->mrb, sym, &len); @@ -1260,30 +1725,87 @@ endless_method_name(parser_state *p, node *defn) for (int i=0; icar)) { + /* Handle direct variable-sized nodes */ + struct mrb_ast_var_header *header = (struct mrb_ast_var_header*)a; + + enum node_type var_type = (enum node_type)header->node_type; + switch (var_type) { case NODE_SUPER: case NODE_ZSUPER: - if (!a->cdr) a->cdr = new_callargs(p, 0, 0, b); - else args_with_block(p, a->cdr, b); + /* For variable-sized super/zsuper nodes, update the args field directly */ + { + struct mrb_ast_super_node *super_n = super_node(a); + if (!super_n->args) { + super_n->args = new_callargs(p, 0, 0, b); + } + else { + args_with_block(p, super_n->args, b); + } + } + break; + case NODE_YIELD: + /* Variable-sized yield nodes should generate an error when given a block */ + yyerror(NULL, p, "block given to yield"); + break; + case NODE_RETURN: + /* Variable-sized return nodes - recursively call with args */ + { + struct mrb_ast_return_node *return_n = return_node(a); + if (return_n->args != NULL) { + call_with_block(p, return_n->args, b); + } + } + break; + case NODE_BREAK: + /* Variable-sized break nodes - recursively call with value */ + { + struct mrb_ast_break_node *break_n = (struct mrb_ast_break_node*)a; + if (break_n->value != NULL) { + call_with_block(p, break_n->value, b); + } + } + break; + case NODE_NEXT: + /* Variable-sized next nodes - recursively call with value */ + { + struct mrb_ast_next_node *next_n = (struct mrb_ast_next_node*)a; + if (next_n->value != NULL) { + call_with_block(p, next_n->value, b); + } + } break; case NODE_CALL: - case NODE_FCALL: - case NODE_SCALL: - /* (NODE_CALL recv mid (args kw . blk)) */ - n = a->cdr->cdr->cdr; /* (args kw . blk) */ - if (!n->car) n->car = new_callargs(p, 0, 0, b); - else args_with_block(p, n->car, b); + /* Variable-sized call nodes - add block to existing args */ + { + struct mrb_ast_call_node *call = call_node(a); + + if (call->args && callargs_node(call->args)->block_arg) { + yyerror(NULL, p, "both block arg and actual block given"); + return; + } + + /* Use existing args and add block */ + if (call->args) { + /* Modify existing callargs structure to add block */ + args_with_block(p, call->args, b); + } + else { + /* Create new callargs with just the block */ + call->args = new_callargs(p, NULL, NULL, b); + } + } break; default: + /* For other variable-sized nodes, do nothing */ break; } } @@ -1291,7 +1813,9 @@ call_with_block(parser_state *p, node *a, node *b) static node* new_negate(parser_state *p, node *n) { - return cons((node*)NODE_NEGATE, n); + struct mrb_ast_negate_node *negate_node = NEW_NODE(negate, NODE_NEGATE); + negate_node->operand = n; + return (node*)negate_node; } static node* @@ -1303,36 +1827,46 @@ cond(node *n) static node* ret_args(parser_state *p, node *n) { - if (n->cdr->cdr) { - yyerror(p, "block argument should not be given"); + /* Handle callargs structure - direct casting like new_args() */ + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)n; + if (callargs->block_arg) { + yyerror(NULL, p, "block argument should not be given"); return NULL; } - if (!n->car) return NULL; - if (!n->car->cdr) return n->car->car; - return new_array(p, n->car); + if (!callargs->regular_args) return NULL; + if (!callargs->regular_args->cdr) return callargs->regular_args->car; + return new_array(p, callargs->regular_args); } static void assignable(parser_state *p, node *lhs) { - if (intn(lhs->car) == NODE_LVAR) { - local_add(p, sym(lhs->cdr)); + switch (node_type(lhs)) { + case NODE_LVAR: + local_add(p, var_node(lhs)->symbol); + break; + case NODE_CONST: + if (p->in_def) + yyerror(NULL, p, "dynamic constant assignment"); + break; + default: + /* Other node types don't need special handling in assignable */ + break; } } static node* var_reference(parser_state *p, node *lhs) { - node *n; - - if (intn(lhs->car) == NODE_LVAR) { - if (!local_var_p(p, sym(lhs->cdr))) { - n = new_fcall(p, sym(lhs->cdr), 0); - cons_free(lhs); + /* Check if this is a variable-sized node */ + if (node_type_p(lhs, NODE_LVAR)) { + mrb_sym sym = var_node(lhs)->symbol; + if (!local_var_p(p, sym)) { + node *n = new_fcall(p, sym, 0); + /* Don't free variable-sized nodes - they're managed by the parser allocator */ return n; } } - return lhs; } @@ -1340,18 +1874,16 @@ static node* label_reference(parser_state *p, mrb_sym sym) { const char *name = mrb_sym_name(p->mrb, sym); - node *n; if (local_var_p(p, sym)) { - n = new_lvar(p, sym); + return new_lvar(p, sym); } else if (ISUPPER(name[0])) { - n = new_const(p, sym); + return new_const(p, sym); } else { - n = new_fcall(p, sym, 0); + return new_fcall(p, sym, 0); } - return n; } typedef enum mrb_string_type string_type; @@ -1404,9 +1936,13 @@ static parser_heredoc_info * parsing_heredoc_info(parser_state *p) { node *nd = p->parsing_heredoc; - if (nd == NULL) - return NULL; + if (nd == NULL) return NULL; /* mrb_assert(nd->car->car == NODE_HEREDOC); */ + if (node_type(nd->car) == NODE_HEREDOC) { + /* Variable-sized heredoc node - return address of embedded info struct */ + struct mrb_ast_heredoc_node *heredoc = (struct mrb_ast_heredoc_node*)nd->car; + return &heredoc->info; + } return (parser_heredoc_info*)nd->car->cdr; } @@ -1437,6 +1973,29 @@ heredoc_end(parser_state *p) } #define is_strterm_type(p,str_func) ((p)->lex_strterm->type & (str_func)) +static void +prohibit_literals(parser_state *p, node *n) +{ + if (n == 0) { + yyerror(NULL, p, "can't define singleton method for ()."); + } + else { + enum node_type nt = node_type(n); + switch (nt) { + case NODE_INT: + case NODE_STR: + case NODE_XSTR: + case NODE_REGX: + case NODE_FLOAT: + case NODE_ARRAY: + case NODE_HEREDOC: + yyerror(NULL, p, "can't define singleton method for literals"); + default: + break; + } + } +} + /* xxx ----------------------------- */ %} @@ -1455,56 +2014,56 @@ heredoc_end(parser_state *p) } %token - keyword_class - keyword_module - keyword_def - keyword_begin - keyword_if - keyword_unless - keyword_while - keyword_until - keyword_for + keyword_class "'class'" + keyword_module "'module'" + keyword_def "'def'" + keyword_begin "'begin'" + keyword_if "'if'" + keyword_unless "'unless'" + keyword_while "'while'" + keyword_until "'until'" + keyword_for "'for'" %token - keyword_undef - keyword_rescue - keyword_ensure - keyword_end - keyword_then - keyword_elsif - keyword_else - keyword_case - keyword_when - keyword_break - keyword_next - keyword_redo - keyword_retry - keyword_in - keyword_do - keyword_do_cond - keyword_do_block - keyword_do_LAMBDA - keyword_return - keyword_yield - keyword_super - keyword_self - keyword_nil - keyword_true - keyword_false - keyword_and - keyword_or - keyword_not - modifier_if - modifier_unless - modifier_while - modifier_until - modifier_rescue - keyword_alias - keyword_BEGIN - keyword_END - keyword__LINE__ - keyword__FILE__ - keyword__ENCODING__ + keyword_undef "'undef'" + keyword_rescue "'rescue'" + keyword_ensure "'ensure'" + keyword_end "'end'" + keyword_then "'then'" + keyword_elsif "'elsif'" + keyword_else "'else'" + keyword_case "'case'" + keyword_when "'when'" + keyword_break "'break'" + keyword_next "'next'" + keyword_redo "'redo'" + keyword_retry "'retry'" + keyword_in "'in'" + keyword_do "'do'" + keyword_do_cond "'do' for condition" + keyword_do_block "'do' for block" + keyword_do_LAMBDA "'do' for lambda" + keyword_return "'return'" + keyword_yield "'yield'" + keyword_super "'super'" + keyword_self "'self'" + keyword_nil "'nil'" + keyword_true "'true'" + keyword_false "'false'" + keyword_and "'and'" + keyword_or "'or'" + keyword_not "'not'" + modifier_if "'if' modifier" + modifier_unless "'unless' modifier" + modifier_while "'while' modifier" + modifier_until "'until' modifier" + modifier_rescue "'rescue' modifier" + keyword_alias "'alias'" + keyword_BEGIN "'BEGIN'" + keyword_END "'END'" + keyword__LINE__ "'__LINE__'" + keyword__FILE__ "'__FILE__'" + keyword__ENCODING__ "'__ENCODING__'" %token tIDENTIFIER "local variable or method" %token tFID "method" @@ -1545,9 +2104,12 @@ heredoc_end(parser_state *p) %type heredoc words symbols %type call_op call_op2 /* 0:'&.', 1:'.', 2:'::' */ -%type args_tail opt_args_tail f_kwarg f_kw f_kwrest +%type args_tail opt_args_tail f_kwarg f_kw %type f_block_kwarg f_block_kw block_args_tail opt_block_args_tail -%type f_label +%type f_label f_kwrest + +/* pattern matching */ +%type in_clauses p_expr p_alt p_value p_var p_as p_array p_array_body p_array_elems p_rest p_hash p_hash_body p_hash_elems p_hash_elem p_kwrest p_args_head p_args_post p_const %token tUPLUS "unary plus" %token tUMINUS "unary minus" @@ -1594,7 +2156,7 @@ heredoc_end(parser_state *p) %nonassoc tLOWEST %nonassoc tLBRACE_ARG -%nonassoc modifier_if modifier_unless modifier_while modifier_until +%nonassoc modifier_if modifier_unless modifier_while modifier_until keyword_in %left keyword_or keyword_and %right keyword_not %right '=' tOP_ASGN @@ -1624,7 +2186,6 @@ program : { top_compstmt { p->tree = new_scope(p, $2); - NODE_LINENO(p->tree, $2); } ; @@ -1636,20 +2197,19 @@ top_compstmt : top_stmts opt_terms top_stmts : none { - $$ = new_begin(p, 0); + $$ = new_stmts(p, 0); } | top_stmt { - $$ = new_begin(p, $1); - NODE_LINENO($$, $1); + $$ = new_stmts(p, $1); } | top_stmts terms top_stmt { - $$ = push($1, newline_node($3)); + $$ = stmts_push(p, $1, newline_node($3)); } | error top_stmt { - $$ = new_begin(p, 0); + $$ = new_stmts(p, 0); } ; @@ -1661,7 +2221,7 @@ top_stmt : stmt } '{' top_compstmt '}' { - yyerror(p, "BEGIN not supported"); + yyerror(&@1, p, "BEGIN not supported"); local_resume(p, $2); nvars_unnest(p); $$ = 0; @@ -1675,11 +2235,10 @@ bodystmt : compstmt { if ($2) { $$ = new_rescue(p, $1, $2, $3); - NODE_LINENO($$, $1); } else if ($3) { yywarning(p, "else without rescue is useless"); - $$ = push($1, $3); + $$ = stmts_push(p, $1, $3); } else { $$ = $1; @@ -1703,20 +2262,19 @@ compstmt : stmts opt_terms stmts : none { - $$ = new_begin(p, 0); + $$ = new_stmts(p, 0); } | stmt { - $$ = new_begin(p, $1); - NODE_LINENO($$, $1); + $$ = new_stmts(p, $1); } | stmts terms stmt { - $$ = push($1, newline_node($3)); + $$ = stmts_push(p, $1, newline_node($3)); } | error stmt { - $$ = new_begin(p, $2); + $$ = new_stmts(p, $2); } ; @@ -1726,7 +2284,7 @@ stmt : keyword_alias fsym {p->lstate = EXPR_FNAME;} fsym } | keyword_undef undef_list { - $$ = $2; + $$ = new_undef(p, $2); } | stmt modifier_if expr_value { @@ -1734,15 +2292,25 @@ stmt : keyword_alias fsym {p->lstate = EXPR_FNAME;} fsym } | stmt modifier_unless expr_value { - $$ = new_unless(p, cond($3), $1, 0); + $$ = new_if(p, cond($3), 0, $1); } | stmt modifier_while expr_value { - $$ = new_while(p, cond($3), $1); + if ($1 && node_type_p($1, NODE_BEGIN)) { + $$ = new_while_mod(p, cond($3), $1); + } + else { + $$ = new_while(p, cond($3), $1); + } } | stmt modifier_until expr_value { - $$ = new_until(p, cond($3), $1); + if ($1 && node_type_p($1, NODE_BEGIN)) { + $$ = new_until_mod(p, cond($3), $1); + } + else { + $$ = new_until(p, cond($3), $1); + } } | stmt modifier_rescue stmt { @@ -1750,7 +2318,7 @@ stmt : keyword_alias fsym {p->lstate = EXPR_FNAME;} fsym } | keyword_END '{' compstmt '}' { - yyerror(p, "END not supported"); + yyerror(&@1, p, "END not supported"); $$ = new_postexe(p, $3); } | command_asgn @@ -1770,12 +2338,6 @@ stmt : keyword_alias fsym {p->lstate = EXPR_FNAME;} fsym { $$ = new_masgn(p, $1, new_array(p, $3)); } - | arg tASSOC tIDENTIFIER - { - node *lhs = new_lvar(p, $3); - assignable(p, lhs); - $$ = new_asgn(p, lhs, $1); - } | expr ; @@ -1801,7 +2363,7 @@ command_asgn : lhs '=' command_rhs } | primary_value tCOLON2 tCONSTANT tOP_ASGN command_call { - yyerror(p, "constant re-assignment"); + yyerror(&@1, p, "constant re-assignment"); $$ = 0; } | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_rhs @@ -1830,7 +2392,7 @@ command_asgn : lhs '=' command_rhs { $$ = $1; void_expr_error(p, $4); - defs_setup(p, $$, $2, $4); + defn_setup(p, $$, $2, $4); nvars_unnest(p); p->in_def--; p->in_single--; @@ -1839,7 +2401,7 @@ command_asgn : lhs '=' command_rhs { $$ = $1; void_expr_error(p, $4); - defs_setup(p, $$, $2, new_mod_rescue(p, $4, $6)); + defn_setup(p, $$, $2, new_mod_rescue(p, $4, $6)); nvars_unnest(p); p->in_def--; p->in_single--; @@ -1847,7 +2409,7 @@ command_asgn : lhs '=' command_rhs | backref tOP_ASGN command_rhs { backref_error(p, $1); - $$ = new_begin(p, 0); + $$ = new_stmts(p, 0); } ; @@ -1859,7 +2421,6 @@ command_rhs : command_call %prec tOP_ASGN | command_asgn ; - expr : command_call | expr keyword_and expr { @@ -1877,13 +2438,24 @@ expr : command_call { $$ = call_uni_op(p, cond($2), "!"); } - | arg + | arg tASSOC {p->in_kwarg++;} p_expr + { + /* expr => pattern (raises NoMatchingPatternError on failure) */ + p->in_kwarg--; + $$ = new_match_pat(p, $1, $4, TRUE); + } + | arg keyword_in {p->in_kwarg++;} p_expr + { + /* expr in pattern (returns true/false) */ + p->in_kwarg--; + $$ = new_match_pat(p, $1, $4, FALSE); + } + | arg %prec tLOWEST ; - defn_head : keyword_def fname { - $$ = new_def(p, $2, nint(p->cmdarg_stack), local_switch(p)); + $$ = new_def(p, $2); p->cmdarg_stack = 0; p->in_def++; nvars_block(p); @@ -1896,7 +2468,7 @@ defs_head : keyword_def singleton dot_or_colon } fname { - $$ = new_sdef(p, $2, $5, nint(p->cmdarg_stack), local_switch(p)); + $$ = new_sdef(p, $2, $5); p->cmdarg_stack = 0; p->in_def++; p->in_single++; @@ -2098,13 +2670,13 @@ mlhs_node : variable | primary_value tCOLON2 tCONSTANT { if (p->in_def || p->in_single) - yyerror(p, "dynamic constant assignment"); + yyerror(&@1, p, "dynamic constant assignment"); $$ = new_colon2(p, $1, $3); } | tCOLON3 tCONSTANT { if (p->in_def || p->in_single) - yyerror(p, "dynamic constant assignment"); + yyerror(&@1, p, "dynamic constant assignment"); $$ = new_colon3(p, $2); } | backref @@ -2137,13 +2709,13 @@ lhs : variable | primary_value tCOLON2 tCONSTANT { if (p->in_def || p->in_single) - yyerror(p, "dynamic constant assignment"); + yyerror(&@1, p, "dynamic constant assignment"); $$ = new_colon2(p, $1, $3); } | tCOLON3 tCONSTANT { if (p->in_def || p->in_single) - yyerror(p, "dynamic constant assignment"); + yyerror(&@1, p, "dynamic constant assignment"); $$ = new_colon3(p, $2); } | backref @@ -2153,29 +2725,29 @@ lhs : variable } | tNUMPARAM { - yyerror(p, "can't assign to numbered parameter"); + yyerror(&@1, p, "can't assign to numbered parameter"); } ; cname : tIDENTIFIER { - yyerror(p, "class/module name must be CONSTANT"); + yyerror(&@1, p, "class/module name must be CONSTANT"); } | tCONSTANT ; cpath : tCOLON3 cname { - $$ = cons(nint(1), nsym($2)); + $$ = cons(int_to_node(1), sym_to_node($2)); } | cname { - $$ = cons(nint(0), nsym($1)); + $$ = cons(int_to_node(0), sym_to_node($1)); } | primary_value tCOLON2 cname { void_expr_error(p, $1); - $$ = cons($1, nsym($3)); + $$ = cons($1, sym_to_node($3)); } ; @@ -2200,11 +2772,11 @@ fsym : fname undef_list : fsym { - $$ = new_undef(p, $1); + $$ = cons(sym_to_node($1), 0); } | undef_list ',' {p->lstate = EXPR_FNAME;} fsym { - $$ = push($1, nsym($4)); + $$ = push($1, sym_to_node($4)); } ; @@ -2280,18 +2852,18 @@ arg : lhs '=' arg_rhs } | primary_value tCOLON2 tCONSTANT tOP_ASGN arg_rhs { - yyerror(p, "constant re-assignment"); - $$ = new_begin(p, 0); + yyerror(&@1, p, "constant re-assignment"); + $$ = new_stmts(p, 0); } | tCOLON3 tCONSTANT tOP_ASGN arg_rhs { - yyerror(p, "constant re-assignment"); - $$ = new_begin(p, 0); + yyerror(&@1, p, "constant re-assignment"); + $$ = new_stmts(p, 0); } | backref tOP_ASGN arg_rhs { backref_error(p, $1); - $$ = new_begin(p, 0); + $$ = new_stmts(p, 0); } | arg tDOT2 arg { @@ -2463,7 +3035,7 @@ arg : lhs '=' arg_rhs { $$ = $1; void_expr_error(p, $4); - defs_setup(p, $$, $2, $4); + defn_setup(p, $$, $2, $4); nvars_unnest(p); p->in_def--; p->in_single--; @@ -2472,7 +3044,7 @@ arg : lhs '=' arg_rhs { $$ = $1; void_expr_error(p, $4); - defs_setup(p, $$, $2, new_mod_rescue(p, $4, $6)); + defn_setup(p, $$, $2, new_mod_rescue(p, $4, $6)); nvars_unnest(p); p->in_def--; p->in_single--; @@ -2487,7 +3059,6 @@ aref_args : none | args trailer { $$ = $1; - NODE_LINENO($$, $1); } | args comma assocs trailer { @@ -2495,8 +3066,7 @@ aref_args : none } | assocs trailer { - $$ = cons(new_kw_hash(p, $1), 0); - NODE_LINENO($$, $1); + $$ = cons(new_hash(p, $1), 0); } ; @@ -2521,7 +3091,7 @@ paren_args : '(' opt_call_args ')' mrb_sym k = intern_op(pow); mrb_sym b = intern_op(and); $$ = new_callargs(p, push($2, new_splat(p, new_lvar(p, r))), - new_kw_hash(p, list1(cons(new_kw_rest_args(p, 0), new_lvar(p, k)))), + list1(cons(new_kw_rest_args(p, 0), new_lvar(p, k))), new_block_arg(p, new_lvar(p, b))); } | '(' tBDOT3 rparen @@ -2531,11 +3101,11 @@ paren_args : '(' opt_call_args ')' mrb_sym b = intern_op(and); if (local_var_p(p, r) && local_var_p(p, k) && local_var_p(p, b)) { $$ = new_callargs(p, list1(new_splat(p, new_lvar(p, r))), - new_kw_hash(p, list1(cons(new_kw_rest_args(p, 0), new_lvar(p, k)))), + list1(cons(new_kw_rest_args(p, 0), new_lvar(p, k))), new_block_arg(p, new_lvar(p, b))); } else { - yyerror(p, "unexpected argument forwarding ..."); + yyerror(&@1, p, "unexpected argument forwarding ..."); $$ = 0; } } @@ -2550,17 +3120,14 @@ opt_call_args : none | args comma { $$ = new_callargs(p,$1,0,0); - NODE_LINENO($$, $1); } | args comma assocs comma { - $$ = new_callargs(p,$1,new_kw_hash(p,$3),0); - NODE_LINENO($$, $1); + $$ = new_callargs(p,$1,$3,0); } | assocs comma { - $$ = new_callargs(p,0,new_kw_hash(p,$1),0); - NODE_LINENO($$, $1); + $$ = new_callargs(p,0,$1,0); } ; @@ -2568,27 +3135,22 @@ call_args : command { void_expr_error(p, $1); $$ = new_callargs(p, list1($1), 0, 0); - NODE_LINENO($$, $1); } | args opt_block_arg { $$ = new_callargs(p, $1, 0, $2); - NODE_LINENO($$, $1); } | assocs opt_block_arg { - $$ = new_callargs(p, 0, new_kw_hash(p, $1), $2); - NODE_LINENO($$, $1); + $$ = new_callargs(p, 0, $1, $2); } | args comma assocs opt_block_arg { - $$ = new_callargs(p, $1, new_kw_hash(p, $3), $4); - NODE_LINENO($$, $1); + $$ = new_callargs(p, $1, $3, $4); } | block_arg { $$ = new_callargs(p, 0, 0, $1); - NODE_LINENO($$, $1); } ; @@ -2630,7 +3192,6 @@ args : arg { void_expr_error(p, $1); $$ = list1($1); - NODE_LINENO($$, $1); } | tSTAR { @@ -2639,13 +3200,16 @@ args : arg | tSTAR arg { $$ = list1(new_splat(p, $2)); - NODE_LINENO($$, $2); } | args comma arg { void_expr_error(p, $3); $$ = push($1, $3); } + | args comma tSTAR + { + $$ = push($1, new_splat(p, new_lvar(p, intern_op(mul)))); + } | args comma tSTAR arg { $$ = push($1, new_splat(p, $4)); @@ -2669,15 +3233,17 @@ mrhs : args comma arg primary : literal | string + { + $$ = new_str(p, $1); + } | xstring + { + $$ = new_xstr(p, $1); + } | regexp | heredoc | var_ref | backref - | tNUMPARAM - { - $$ = new_nvar(p, $1); - } | tFID { $$ = new_fcall(p, $1, 0); @@ -2691,14 +3257,14 @@ primary : literal keyword_end { p->cmdarg_stack = $2; - $$ = $3; + $$ = new_begin(p, $3); } | tLPAREN_ARG { $$ = p->cmdarg_stack; p->cmdarg_stack = 0; } - stmt {p->lstate = EXPR_ENDARG;} rparen + compstmt {p->lstate = EXPR_ENDARG;} rparen { p->cmdarg_stack = $2; $$ = $3; @@ -2722,12 +3288,10 @@ primary : literal | tLBRACK aref_args ']' { $$ = new_array(p, $2); - NODE_LINENO($$, $2); } | tLBRACE assoc_list '}' { $$ = new_hash(p, $2); - NODE_LINENO($$, $2); } | keyword_return { @@ -2789,7 +3353,7 @@ primary : literal opt_else keyword_end { - $$ = new_unless(p, cond($2), $4, $5); + $$ = new_if(p, cond($2), $5, $4); SET_LINENO($$, $1); } | keyword_while {COND_PUSH(1);} expr_value do {COND_POP();} @@ -2816,6 +3380,33 @@ primary : literal { $$ = new_case(p, 0, $3); } + | keyword_case expr_value opt_terms + keyword_in p_expr then + compstmt + in_clauses + keyword_end + { + node *in_clause = new_in(p, $5, NULL, $7, FALSE); + $$ = new_case_match(p, $2, cons(in_clause, $8)); + } + | keyword_case expr_value opt_terms + keyword_in p_expr modifier_if expr_value then + compstmt + in_clauses + keyword_end + { + node *in_clause = new_in(p, $5, $7, $9, FALSE); + $$ = new_case_match(p, $2, cons(in_clause, $10)); + } + | keyword_case expr_value opt_terms + keyword_in p_expr modifier_unless expr_value then + compstmt + in_clauses + keyword_end + { + node *in_clause = new_in(p, $5, $7, $9, TRUE); + $$ = new_case_match(p, $2, cons(in_clause, $10)); + } | keyword_for for_var keyword_in {COND_PUSH(1);} expr_value do @@ -2830,7 +3421,7 @@ primary : literal cpath superclass { if (p->in_def || p->in_single) - yyerror(p, "class definition in method body"); + yyerror(&@1, p, "class definition in method body"); $$ = local_switch(p); nvars_block(p); } @@ -2850,7 +3441,7 @@ primary : literal } term { - $$ = cons(local_switch(p), nint(p->in_single)); + $$ = cons(local_switch(p), int_to_node(p->in_single)); nvars_block(p); p->in_single = 0; } @@ -2862,13 +3453,13 @@ primary : literal local_resume(p, $6->car); nvars_unnest(p); p->in_def = $4; - p->in_single = intn($6->cdr); + p->in_single = node_to_int($6->cdr); } | keyword_module cpath { if (p->in_def || p->in_single) - yyerror(p, "module definition in method body"); + yyerror(&@1, p, "module definition in method body"); $$ = local_switch(p); nvars_block(p); } @@ -2896,7 +3487,7 @@ primary : literal keyword_end { $$ = $1; - defs_setup(p, $$, $2, $3); + defn_setup(p, $$, $2, $3); nvars_unnest(p); p->in_def--; p->in_single--; @@ -2964,33 +3555,33 @@ f_margs : f_arg } | f_arg ',' tSTAR f_norm_arg { - $$ = list3($1, new_arg(p, $4), 0); + $$ = list3($1, new_lvar(p, $4), 0); } | f_arg ',' tSTAR f_norm_arg ',' f_arg { - $$ = list3($1, new_arg(p, $4), $6); + $$ = list3($1, new_lvar(p, $4), $6); } | f_arg ',' tSTAR { local_add_f(p, intern_op(mul)); - $$ = list3($1, nint(-1), 0); + $$ = list3($1, int_to_node(-1), 0); } | f_arg ',' tSTAR ',' f_arg { - $$ = list3($1, nint(-1), $5); + $$ = list3($1, int_to_node(-1), $5); } | tSTAR f_norm_arg { - $$ = list3(0, new_arg(p, $2), 0); + $$ = list3(0, new_lvar(p, $2), 0); } | tSTAR f_norm_arg ',' f_arg { - $$ = list3(0, new_arg(p, $2), $4); + $$ = list3(0, new_lvar(p, $2), $4); } | tSTAR { local_add_f(p, intern_op(mul)); - $$ = list3(0, nint(-1), 0); + $$ = list3(0, int_to_node(-1), 0); } | tSTAR ',' { @@ -2998,7 +3589,7 @@ f_margs : f_arg } f_arg { - $$ = list3(0, nint(-1), $4); + $$ = list3(0, int_to_node(-1), $4); } ; @@ -3094,7 +3685,7 @@ block_param : f_arg ',' f_block_optarg ',' f_rest_arg opt_block_args_tail opt_block_param : none { - local_add_blk(p, 0); + local_add_blk(p); $$ = 0; } | block_param_def @@ -3104,13 +3695,13 @@ opt_block_param : none } ; -block_param_def : '|' {local_add_blk(p, 0);} opt_bv_decl '|' +block_param_def : '|' {local_add_blk(p);} opt_bv_decl '|' { $$ = 0; } | tOROP { - local_add_blk(p, 0); + local_add_blk(p); $$ = 0; } | '|' block_param opt_bv_decl '|' @@ -3119,7 +3710,6 @@ block_param_def : '|' {local_add_blk(p, 0);} opt_bv_decl '|' } ; - opt_bv_decl : opt_nl { $$ = 0; @@ -3166,12 +3756,14 @@ do_block : keyword_do_block { local_nest(p); nvars_nest(p); + $$ = p->lineno; } opt_block_param bodystmt keyword_end { $$ = new_block(p,$3,$4); + SET_LINENO($$, $2); local_unnest(p); nvars_unnest(p); } @@ -3179,12 +3771,7 @@ do_block : keyword_do_block block_call : command do_block { - if (typen($1->car) == NODE_YIELD) { - yyerror(p, "block given to yield"); - } - else { - call_with_block(p, $1, $2); - } + call_with_block(p, $1, $2); $$ = $1; } | block_call call_op2 operation2 opt_paren_args @@ -3221,11 +3808,11 @@ method_call : operation paren_args } | primary_value call_op paren_args { - $$ = new_call(p, $1, MRB_SYM_2(p->mrb, call), $3, $2); + $$ = new_call(p, $1, MRB_SYM(call), $3, $2); } | primary_value tCOLON2 paren_args { - $$ = new_call(p, $1, MRB_SYM_2(p->mrb, call), $3, tCOLON2); + $$ = new_call(p, $1, MRB_SYM(call), $3, tCOLON2); } | keyword_super paren_args { @@ -3291,139 +3878,441 @@ cases : opt_else | case_body ; -opt_rescue : keyword_rescue exc_list exc_var then - compstmt - opt_rescue - { - $$ = list1(list3($2, $3, $5)); - if ($6) $$ = append($$, $6); - } - | none - ; - -exc_list : arg +/* Pattern matching in-clauses for case/in */ +/* in_kwarg is set by lexer when keyword_in is returned */ +in_clauses : opt_else { - $$ = list1($1); + $$ = $1 ? list1(new_in(p, NULL, NULL, $1, FALSE)) : 0; } - | mrhs - | none - ; - -exc_var : tASSOC lhs + | keyword_in p_expr {p->in_kwarg--;} then compstmt in_clauses { - $$ = $2; + node *in_clause = new_in(p, $2, NULL, $5, FALSE); + $$ = cons(in_clause, $6); } - | none - ; - -opt_ensure : keyword_ensure compstmt + | keyword_in p_expr {p->in_kwarg--;} modifier_if expr_value then compstmt in_clauses { - $$ = $2; + node *in_clause = new_in(p, $2, $5, $7, FALSE); + $$ = cons(in_clause, $8); } - | none - ; - -literal : numeric - | symbol - | words - | symbols - ; - -string : string_fragment - | string string_fragment + | keyword_in p_expr {p->in_kwarg--;} modifier_unless expr_value then compstmt in_clauses { - $$ = concat_string(p, $1, $2); + node *in_clause = new_in(p, $2, $5, $7, TRUE); + $$ = cons(in_clause, $8); } ; -string_fragment : tCHAR - | tSTRING - | tSTRING_BEG tSTRING +/* Pattern expressions for case/in */ +/* Bracket-less array patterns: in 1, 2, x is same as in [1, 2, x] */ +/* Brace-less hash patterns: in a:, b: x is same as in {a:, b: x} */ +p_expr : p_as + | p_args_head p_as { - $$ = $2; + $$ = new_pat_array(p, push($1, $2), 0, 0); } - | tSTRING_BEG string_rep tSTRING + | p_args_head p_rest { - node *n = $2; - if (intn($3->cdr->cdr) > 0) { - n = push(n, $3); - } - else { - cons_free($3); - } - $$ = new_dstr(p, n); + $$ = new_pat_array(p, $1, $2, 0); } - ; - -string_rep : string_interp - | string_rep string_interp + | p_args_head p_rest ',' p_args_post { - $$ = append($1, $2); + $$ = new_pat_array(p, $1, $2, $4); } - ; - -string_interp : tSTRING_MID + | p_rest { - $$ = list1($1); + $$ = new_pat_array(p, 0, $1, 0); } - | tSTRING_PART + | p_rest ',' p_args_post { - $$ = push_strterm(p); + $$ = new_pat_array(p, 0, $1, $3); } - compstmt - '}' + | p_hash_elems { - pop_strterm(p,$2); - $$ = list2($1, $3); + /* Brace-less hash pattern: in a:, b: x */ + $$ = new_pat_hash(p, $1, 0); } - | tLITERAL_DELIM + | p_hash_elems ',' p_kwrest { - $$ = list1(new_literal_delim(p)); + /* Brace-less hash pattern with kwrest: in a:, **rest */ + $$ = new_pat_hash(p, $1, $3); } - | tHD_LITERAL_DELIM heredoc_bodies + | p_kwrest { - $$ = list1(new_literal_delim(p)); + /* Brace-less kwrest only: in **rest */ + $$ = new_pat_hash(p, 0, $1); } ; -xstring : tXSTRING_BEG tXSTRING +/* Comma-separated pattern list (prefix) */ +p_args_head : p_as ',' { - $$ = $2; + $$ = list1($1); } - | tXSTRING_BEG string_rep tXSTRING + | p_args_head p_as ',' { - node *n = $2; - if (intn($3->cdr->cdr) > 0) { - n = push(n, $3); - } - else { - cons_free($3); - } - $$ = new_dxstr(p, n); + $$ = push($1, $2); } ; -regexp : tREGEXP_BEG tREGEXP +/* Comma-separated pattern list (suffix, no trailing comma) */ +p_args_post : p_as { - $$ = $2; + $$ = list1($1); } - | tREGEXP_BEG string_rep tREGEXP + | p_args_post ',' p_as { - $$ = new_dregx(p, $2, $3); + $$ = push($1, $3); } ; -heredoc : tHEREDOC_BEG +p_as : p_alt + | p_alt tASSOC tIDENTIFIER + { + $$ = new_pat_as(p, $1, $3); + } ; -heredoc_bodies : heredoc_body - | heredoc_bodies heredoc_body +p_alt : p_value + | p_alt '|' p_value + { + $$ = new_pat_alt(p, $1, $3); + } ; -heredoc_body : tHEREDOC_END +p_value : p_var + | numeric + { + $$ = new_pat_value(p, $1); + } + | symbol + { + $$ = new_pat_value(p, $1); + } + | string + { + $$ = new_pat_value(p, new_str(p, $1)); + } + | keyword_nil + { + $$ = new_pat_value(p, new_nil(p)); + } + | keyword_true + { + $$ = new_pat_value(p, new_true(p)); + } + | keyword_false + { + $$ = new_pat_value(p, new_false(p)); + } + | p_const + { + $$ = new_pat_value(p, $1); + } + | p_array + | p_hash + | '^' tIDENTIFIER + { + $$ = new_pat_pin(p, $2); + } + ; + +/* Array pattern: [a, b, *rest, c] */ +p_array : tLBRACK p_array_body ']' + { + $$ = $2; + } + | tLBRACK ']' + { + $$ = new_pat_array(p, 0, 0, 0); + } + ; + +/* Array pattern body - pre elements, optional rest, post elements */ +p_array_body : p_array_elems + { + /* Just pre elements, no rest */ + $$ = new_pat_array(p, $1, 0, 0); + } + | p_array_elems ',' p_rest + { + /* Pre elements + rest, no post */ + $$ = new_pat_array(p, $1, $3, 0); + } + | p_array_elems ',' p_rest ',' p_array_elems + { + /* Pre + rest + post */ + $$ = new_pat_array(p, $1, $3, $5); + } + | p_rest + { + /* Just rest, no pre or post */ + $$ = new_pat_array(p, 0, $1, 0); + } + | p_rest ',' p_array_elems + { + /* Rest + post, no pre */ + $$ = new_pat_array(p, 0, $1, $3); + } + | p_rest ',' p_array_elems ',' p_rest + { + /* Find pattern: [*pre, elems, *post] */ + $$ = new_pat_find(p, $1, $3, $5); + } + ; + +/* Non-rest array pattern elements - use p_as, not p_expr to avoid bracket-less recursion */ +p_array_elems : p_as + { + $$ = list1($1); + } + | p_array_elems ',' p_as + { + $$ = push($1, $3); + } + ; + +/* Rest pattern in array: *var, *_, or just * */ +p_rest : tSTAR tIDENTIFIER + { + $$ = new_pat_var(p, $2); + } + | tSTAR + { + /* Anonymous rest pattern */ + $$ = (node*)-1; + } + ; + +/* Constant path for pattern matching: Foo, Foo::Bar, ::Foo */ +p_const : tCONSTANT + { + $$ = new_const(p, $1); + } + | p_const tCOLON2 tCONSTANT + { + $$ = new_colon2(p, $1, $3); + } + | tCOLON3 tCONSTANT + { + $$ = new_colon3(p, $2); + } + ; + +/* Hash pattern: {a:, b: x, **rest} */ +p_hash : tLBRACE p_hash_body '}' + { + $$ = $2; + } + | tLBRACE '}' + { + $$ = new_pat_hash(p, 0, 0); + } + ; + +/* Hash pattern body - pairs and optional kwrest */ +p_hash_body : p_hash_elems + { + $$ = new_pat_hash(p, $1, 0); + } + | p_hash_elems ',' p_kwrest + { + $$ = new_pat_hash(p, $1, $3); + } + | p_kwrest + { + $$ = new_pat_hash(p, 0, $1); + } + ; + +/* Hash pattern element list */ +p_hash_elems : p_hash_elem + { + $$ = list1($1); + } + | p_hash_elems ',' p_hash_elem + { + $$ = push($1, $3); + } + ; + +/* Hash pattern element: key: pattern or key: (shorthand) */ +/* Use p_as, not p_expr to avoid brace-less recursion inside hash patterns */ +/* Note: CRuby only supports label syntax (foo:), not hashrocket (:foo =>) */ +p_hash_elem : tIDENTIFIER tLABEL_TAG p_as + { + /* {key: pattern} */ + $$ = cons(new_sym(p, $1), $3); + } + | tIDENTIFIER tLABEL_TAG + { + /* {key:} shorthand - binds to variable with same name */ + $$ = cons(new_sym(p, $1), new_pat_var(p, $1)); + } + ; + +/* Keyword rest pattern: **var, **nil, or ** */ +p_kwrest : tDSTAR tIDENTIFIER + { + $$ = new_pat_var(p, $2); + } + | tDSTAR keyword_nil + { + /* **nil - exact match, no extra keys allowed */ + $$ = (node*)-1; + } + | tDSTAR + { + /* ** - anonymous rest, discards extra keys */ + $$ = (node*)-2; + } + ; + +p_var : tIDENTIFIER + { + $$ = new_pat_var(p, $1); + } + ; + +opt_rescue : keyword_rescue exc_list exc_var then + compstmt + opt_rescue + { + $$ = list1(list3($2, $3, $5)); + if ($6) $$ = append($$, $6); + } + | none + ; + +exc_list : arg + { + $$ = list1($1); + } + | mrhs + | none + ; + +exc_var : tASSOC lhs + { + $$ = $2; + } + | none + ; + +opt_ensure : keyword_ensure compstmt + { + $$ = $2; + } + | none + ; + +literal : numeric + | symbol + | words + | symbols + ; + +string : string_fragment + | string string_fragment + { + $$ = append($1, $2); + } + ; + +string_fragment : tCHAR + { + /* tCHAR is (len . str), wrap as cons list */ + $$ = list1($1); + } + | tSTRING + { + /* tSTRING is (len . str), wrap as cons list */ + $$ = list1($1); + } + | tSTRING_BEG tSTRING + { + /* $2 is (len . str), wrap as cons list */ + $$ = list1($2); + } + | tSTRING_BEG string_rep tSTRING + { + $$ = push($2, $3); + } + ; + +string_rep : string_interp + | string_rep string_interp + { + $$ = append($1, $2); + } + ; + +string_interp : tSTRING_MID + { + /* $1 is already in (len . str) format */ + $$ = list1($1); + } + | tSTRING_PART + { + $$ = push_strterm(p); + } + compstmt + '}' + { + pop_strterm(p,$2); + /* $1 is already in (len . str) format, create (-1 . node) for expression */ + node *expr_elem = cons(int_to_node(-1), $3); + $$ = list2($1, expr_elem); + } + | tLITERAL_DELIM + { + $$ = list1(new_literal_delim(p)); + } + | tHD_LITERAL_DELIM heredoc_bodies + { + $$ = list1(new_literal_delim(p)); + } + ; + +xstring : tXSTRING_BEG tXSTRING + { + $$ = cons($2, (node*)NULL); + } + | tXSTRING_BEG string_rep tXSTRING + { + $$ = push($2, $3); + } + ; + +regexp : tREGEXP_BEG tREGEXP + { + node *data = $2; /* ((len . pattern) . (flags . encoding)) */ + const char *flags = (const char*)data->cdr->car; + const char *encoding = (const char*)data->cdr->cdr; + /* Use data->car directly as pattern_list: (len . pattern) */ + node *pattern_list = cons(data->car, (node*)NULL); + $$ = new_regx(p, pattern_list, flags, encoding); + } + | tREGEXP_BEG string_rep tREGEXP + { + node *data = $3; /* ((len . pattern) . (flags . encoding)) */ + const char *flags = (const char*)data->cdr->car; + const char *encoding = (const char*)data->cdr->cdr; + /* Append the pattern from $3->car to the string list $2 */ + node *complete_list = push($2, data->car); + $$ = new_regx(p, complete_list, flags, encoding); + } + ; + +heredoc : tHEREDOC_BEG + ; + +heredoc_bodies : heredoc_body + | heredoc_bodies heredoc_body + ; + +heredoc_body : tHEREDOC_END { parser_heredoc_info *info = parsing_heredoc_info(p); - info->doc = push(info->doc, new_str(p, "", 0)); + info->doc = push(info->doc, new_str_empty(p)); heredoc_end(p); } | heredoc_string_rep tHEREDOC_END @@ -3451,7 +4340,9 @@ heredoc_string_interp : tHD_STRING_MID { pop_strterm(p, $2); parser_heredoc_info *info = parsing_heredoc_info(p); - info->doc = push(push(info->doc, $1), $3); + /* $1 is already in (len . str) format, create (-1 . node) for expression */ + node *expr_elem = cons(int_to_node(-1), $3); + info->doc = push(push(info->doc, $1), expr_elem); } ; @@ -3462,38 +4353,37 @@ words : tWORDS_BEG tSTRING | tWORDS_BEG string_rep tSTRING { node *n = $2; - if (intn($3->cdr->cdr) > 0) { - n = push(n, $3); - } - else { - cons_free($3); - } + n = push(n, $3); $$ = new_words(p, n); } ; - symbol : basic_symbol { - p->lstate = EXPR_ENDARG; $$ = new_sym(p, $1); } | tSYMBEG tSTRING_BEG string_rep tSTRING { node *n = $3; p->lstate = EXPR_ENDARG; - if (intn($4->cdr->cdr) > 0) { + if (node_to_int($4->car) > 0) { n = push(n, $4); } else { cons_free($4); } - $$ = new_dsym(p, new_dstr(p, n)); + $$ = new_dsym(p, n); + } + | tSYMBEG tNUMPARAM + { + mrb_sym sym = intern_numparam($2); + $$ = new_sym(p, sym); } ; basic_symbol : tSYMBEG sym { + p->lstate = EXPR_END; $$ = $2; } ; @@ -3519,9 +4409,7 @@ symbols : tSYMBOLS_BEG tSTRING | tSYMBOLS_BEG string_rep tSTRING { node *n = $2; - if (intn($3->cdr->cdr) > 0) { - n = push(n, $3); - } + n = push(n, $3); $$ = new_symbols(p, n); } ; @@ -3566,7 +4454,7 @@ var_lhs : variable } | tNUMPARAM { - yyerror(p, "can't assign to numbered parameter"); + yyerror(&@1, p, "can't assign to numbered parameter"); } ; @@ -3574,6 +4462,10 @@ var_ref : variable { $$ = var_reference(p, $1); } + | tNUMPARAM + { + $$ = new_nvar(p, $1); + } | keyword_nil { $$ = new_nil(p); @@ -3596,7 +4488,7 @@ var_ref : variable if (!fn) { fn = "(null)"; } - $$ = new_str(p, fn, strlen(fn)); + $$ = new_str(p, cons(cons(int_to_node(strlen(fn)), (node*)fn), (node*)NULL)); } | keyword__LINE__ { @@ -3607,7 +4499,7 @@ var_ref : variable } | keyword__ENCODING__ { - $$ = new_fcall(p, MRB_SYM_2(p->mrb, __ENCODING__), 0); + $$ = new_fcall(p, MRB_SYM(__ENCODING__), 0); } ; @@ -3673,11 +4565,15 @@ f_arglist : f_arglist_paren f_label : tIDENTIFIER tLABEL_TAG { + $$ = $1; local_nest(p); + p->lstate = EXPR_MID; /* make newlines significant after label */ } | tNUMPARAM tLABEL_TAG { + $$ = intern_numparam($1); local_nest(p); + p->lstate = EXPR_MID; /* make newlines significant after label */ } ; @@ -3733,11 +4629,11 @@ kwrest_mark : tPOW f_kwrest : kwrest_mark tIDENTIFIER { - $$ = new_kw_rest_args(p, $2); + $$ = $2; } | kwrest_mark { - $$ = new_kw_rest_args(p, 0); + $$ = intern_op(pow); } ; @@ -3763,6 +4659,10 @@ opt_args_tail : ',' args_tail { $$ = $2; } + | ',' + { + $$ = new_args_tail(p, 0, 0, 0); + } | /* none */ { $$ = new_args_tail(p, 0, 0, 0); @@ -3834,27 +4734,27 @@ f_args : f_arg ',' f_optarg ',' f_rest_arg opt_args_tail f_bad_arg : tCONSTANT { - yyerror(p, "formal argument cannot be a constant"); + yyerror(&@1, p, "formal argument cannot be a constant"); $$ = 0; } | tIVAR { - yyerror(p, "formal argument cannot be an instance variable"); + yyerror(&@1, p, "formal argument cannot be an instance variable"); $$ = 0; } | tGVAR { - yyerror(p, "formal argument cannot be a global variable"); + yyerror(&@1, p, "formal argument cannot be a global variable"); $$ = 0; } | tCVAR { - yyerror(p, "formal argument cannot be a class variable"); + yyerror(&@1, p, "formal argument cannot be a class variable"); $$ = 0; } | tNUMPARAM { - yyerror(p, "formal argument cannot be a numbered parameter"); + yyerror(&@1, p, "formal argument cannot be a numbered parameter"); $$ = 0; } ; @@ -3872,7 +4772,7 @@ f_norm_arg : f_bad_arg f_arg_item : f_norm_arg { - $$ = new_arg(p, $1); + $$ = new_lvar(p, $1); } | tLPAREN { @@ -3880,7 +4780,7 @@ f_arg_item : f_norm_arg } f_margs rparen { - $$ = new_masgn_param(p, $3, p->locals->car); + $$ = new_marg(p, $3); local_resume(p, $2); local_add_f(p, 0); } @@ -3907,7 +4807,7 @@ f_opt_asgn : tIDENTIFIER '=' f_opt : f_opt_asgn arg { void_expr_error(p, $2); - $$ = cons(nsym($1), cons($2, locals_node(p))); + $$ = cons(sym_to_node($1), cons($2, locals_node(p))); local_unnest(p); } ; @@ -3915,7 +4815,7 @@ f_opt : f_opt_asgn arg f_block_opt : f_opt_asgn primary_value { void_expr_error(p, $2); - $$ = cons(nsym($1), cons($2, locals_node(p))); + $$ = cons(sym_to_node($1), cons($2, locals_node(p))); local_unnest(p); } ; @@ -3964,6 +4864,10 @@ f_block_arg : blkarg_mark tIDENTIFIER { $$ = $2; } + | blkarg_mark keyword_nil + { + $$ = MRB_SYM(nil); + } | blkarg_mark { $$ = intern_op(and); @@ -3974,6 +4878,10 @@ opt_f_block_arg : ',' f_block_arg { $$ = $2; } + | ',' + { + $$ = 0; + } | none { $$ = 0; @@ -3982,30 +4890,13 @@ opt_f_block_arg : ',' f_block_arg singleton : var_ref { + prohibit_literals(p, $1); $$ = $1; if (!$$) $$ = new_nil(p); } | '(' {p->lstate = EXPR_BEG;} expr rparen { - if ($3 == 0) { - yyerror(p, "can't define singleton method for ()."); - } - else { - switch (typen($3->car)) { - case NODE_STR: - case NODE_DSTR: - case NODE_XSTR: - case NODE_DXSTR: - case NODE_DREGX: - case NODE_MATCH: - case NODE_FLOAT: - case NODE_ARRAY: - case NODE_HEREDOC: - yyerror(p, "can't define singleton method for literals"); - default: - break; - } - } + prohibit_literals(p, $3); $$ = $3; } ; @@ -4020,7 +4911,6 @@ assoc_list : none assocs : assoc { $$ = list1($1); - NODE_LINENO($$, $1); } | assocs comma assoc { @@ -4056,11 +4946,17 @@ assoc : arg tASSOC arg | string_fragment tLABEL_TAG arg { void_expr_error(p, $3); - if (typen($1->car) == NODE_DSTR) { + if ($1->cdr) { + /* Multiple fragments - create dynamic symbol */ + $$ = cons(new_dsym(p, $1), $3); + } + else if (node_to_int($1->car->car) < 0) { + /* Single fragment but it's an expression (-1 . node) - create dynamic symbol */ $$ = cons(new_dsym(p, $1), $3); } else { - $$ = cons(new_sym(p, new_strsym(p, $1)), $3); + /* Single string fragment - create simple symbol */ + $$ = cons(new_sym(p, new_strsym(p, $1->car)), $3); } } | tDSTAR arg @@ -4152,7 +5048,7 @@ none : /* none */ #define pylval (*((YYSTYPE*)(p->ylval))) static void -yyerror(parser_state *p, const char *s) +yyerror(void *lp, parser_state *p, const char *s) { char* c; size_t n; @@ -4187,7 +5083,7 @@ yyerror_c(parser_state *p, const char *msg, char c) strncpy(buf, msg, sizeof(buf) - 2); buf[sizeof(buf) - 2] = '\0'; strncat(buf, &c, 1); - yyerror(p, buf); + yyerror(NULL, p, buf); } static void @@ -4235,52 +5131,72 @@ backref_error(parser_state *p, node *n) { int c; - c = intn(n->car); + c = node_to_int(n->car); if (c == NODE_NTH_REF) { - yyerror_c(p, "can't set variable $", (char)intn(n->cdr)+'0'); + yyerror_c(p, "can't set variable $", (char)node_to_int(n->cdr)+'0'); } else if (c == NODE_BACK_REF) { - yyerror_c(p, "can't set variable $", (char)intn(n->cdr)); + yyerror_c(p, "can't set variable $", (char)node_to_int(n->cdr)); } else { - mrb_bug(p->mrb, "Internal error in backref_error() : n=>car == %d", c); + yyerror(NULL, p, "Internal error in backref_error()"); } } static void void_expr_error(parser_state *p, node *n) { - int c; - if (n == NULL) return; - c = intn(n->car); - switch (c) { - case NODE_BREAK: - case NODE_RETURN: - case NODE_NEXT: - case NODE_REDO: - case NODE_RETRY: - yyerror(p, "void value expression"); - break; - case NODE_AND: - case NODE_OR: - if (n->cdr) { - void_expr_error(p, n->cdr->car); - void_expr_error(p, n->cdr->cdr); - } - break; - case NODE_BEGIN: - if (n->cdr) { - while (n->cdr) { - n = n->cdr; + + /* Check if this is a variable-sized node first */ + struct mrb_ast_var_header *header = (struct mrb_ast_var_header*)n; + if (header) { + /* Handle variable-sized nodes */ + switch ((enum node_type)header->node_type) { + case NODE_BREAK: + case NODE_RETURN: + case NODE_NEXT: + case NODE_REDO: + case NODE_RETRY: + yyerror(NULL, p, "void value expression"); + return; + case NODE_AND: + case NODE_OR: + { + struct mrb_ast_and_node *and_n = (struct mrb_ast_and_node*)n; + void_expr_error(p, (node*)and_n->left); + void_expr_error(p, (node*)and_n->right); + } + return; + case NODE_STMTS: + { + struct mrb_ast_stmts_node *stmts = (struct mrb_ast_stmts_node*)n; + node *last = stmts->stmts; + if (last) { + /* Find the last statement in the cons list */ + while (last->cdr) { + last = last->cdr; + } + void_expr_error(p, last->car); + } + } + return; + case NODE_BEGIN: + { + struct mrb_ast_begin_node *begin_n = (struct mrb_ast_begin_node*)n; + if (begin_n->body) { + void_expr_error(p, (node*)begin_n->body); + } } - void_expr_error(p, n->car); + return; + default: + /* Other variable-sized nodes are OK */ + return; } - break; - default: - break; } + + /* Should not reach here - all nodes should be variable-sized now */ } static void pushback(parser_state *p, int c); @@ -4314,7 +5230,7 @@ nextc(parser_state *p) if (p->pb) { node *tmp; - c = intn(p->pb->car); + c = node_to_int(p->pb->car); tmp = p->pb; p->pb = p->pb->cdr; cons_free(tmp); @@ -4350,7 +5266,7 @@ pushback(parser_state *p, int c) if (c >= 0) { p->column--; } - p->pb = cons(nint(c), p->pb); + p->pb = cons(int_to_node(c), p->pb); } static void @@ -4375,7 +5291,7 @@ peekc_n(parser_state *p, int n) c0 = nextc(p); if (c0 == -1) return c0; /* do not skip partial EOF */ if (c0 >= 0) --p->column; - list = push(list, nint(c0)); + list = push(list, int_to_node(c0)); } while(n--); if (p->pb) { p->pb = append(list, p->pb); @@ -4442,19 +5358,18 @@ skips(parser_state *p, const char *s) } return TRUE; } - else{ + else { s--; } } return FALSE; } - static int newtok(parser_state *p) { if (p->tokbuf != p->buf) { - mrb_free(p->mrb, p->tokbuf); + mrbc_free(p->tokbuf); p->tokbuf = p->buf; p->tsiz = MRB_PARSER_TOKBUF_SIZE; } @@ -4475,30 +5390,8 @@ tokadd(parser_state *p, int32_t c) len = 1; } else { - /* Unicode character */ - c = -c; - if (c < 0x80) { - utf8[0] = (char)c; - len = 1; - } - else if (c < 0x800) { - utf8[0] = (char)(0xC0 | (c >> 6)); - utf8[1] = (char)(0x80 | (c & 0x3F)); - len = 2; - } - else if (c < 0x10000) { - utf8[0] = (char)(0xE0 | (c >> 12) ); - utf8[1] = (char)(0x80 | ((c >> 6) & 0x3F)); - utf8[2] = (char)(0x80 | ( c & 0x3F)); - len = 3; - } - else { - utf8[0] = (char)(0xF0 | (c >> 18) ); - utf8[1] = (char)(0x80 | ((c >> 12) & 0x3F)); - utf8[2] = (char)(0x80 | ((c >> 6) & 0x3F)); - utf8[3] = (char)(0x80 | ( c & 0x3F)); - len = 4; - } + /* Unicode character (negative c indicates codepoint) */ + len = (int)mrb_utf8_to_buf(utf8, (uint32_t)(-c)); } if (p->tidx+len >= p->tsiz) { if (p->tsiz >= MRB_PARSER_TOKBUF_MAX) { @@ -4507,11 +5400,11 @@ tokadd(parser_state *p, int32_t c) } p->tsiz *= 2; if (p->tokbuf == p->buf) { - p->tokbuf = (char*)mrb_malloc(p->mrb, p->tsiz); + p->tokbuf = (char*)mrbc_malloc(p->tsiz); memcpy(p->tokbuf, p->buf, MRB_PARSER_TOKBUF_SIZE); } else { - p->tokbuf = (char*)mrb_realloc(p->mrb, p->tokbuf, p->tsiz); + p->tokbuf = (char*)mrbc_realloc(p->tokbuf, p->tsiz); } } for (i = 0; i < len; i++) { @@ -4530,7 +5423,7 @@ tokfix(parser_state *p) { if (p->tidx >= MRB_PARSER_TOKBUF_MAX) { p->tidx = MRB_PARSER_TOKBUF_MAX-1; - yyerror(p, "string too long (truncated)"); + yyerror(NULL, p, "string too long (truncated)"); } p->tokbuf[p->tidx] = '\0'; } @@ -4551,7 +5444,7 @@ toklen(parser_state *p) #define IS_END() (p->lstate == EXPR_END || p->lstate == EXPR_ENDARG || p->lstate == EXPR_ENDFN) #define IS_BEG() (p->lstate == EXPR_BEG || p->lstate == EXPR_MID || p->lstate == EXPR_VALUE || p->lstate == EXPR_CLASS) #define IS_SPCARG(c) (IS_ARG() && space_seen && !ISSPACE(c)) -#define IS_LABEL_POSSIBLE() ((p->lstate == EXPR_BEG && !cmd_state) || IS_ARG()) +#define IS_LABEL_POSSIBLE() ((p->lstate == EXPR_BEG && !cmd_state) || IS_ARG() || p->lstate == EXPR_VALUE) #define IS_LABEL_SUFFIX(n) (peek_n(p, ':',(n)) && !peek_n(p, ':', (n)+1)) static int32_t @@ -4601,7 +5494,7 @@ read_escape_unicode(parser_state *p, int limit) buf[0] = nextc(p); if (buf[0] < 0) { eof: - yyerror(p, "invalid escape character syntax"); + yyerror(NULL, p, "invalid escape character syntax"); return -1; } if (ISXDIGIT(buf[0])) { @@ -4620,7 +5513,7 @@ read_escape_unicode(parser_state *p, int limit) } hex = scan_hex(p, buf, i, &i); if (i == 0 || hex > 0x10FFFF || (hex & 0xFFFFF800) == 0xD800) { - yyerror(p, "invalid Unicode code point"); + yyerror(NULL, p, "invalid Unicode code point"); return -1; } return hex; @@ -4690,7 +5583,7 @@ read_escape(parser_state *p) } } if (i == 0) { - yyerror(p, "invalid hex escape"); + yyerror(NULL, p, "invalid hex escape"); return -1; } return scan_hex(p, buf, i, &i); @@ -4718,7 +5611,7 @@ read_escape(parser_state *p) case 'M': if ((c = nextc(p)) != '-') { - yyerror(p, "Invalid escape character syntax"); + yyerror(NULL, p, "Invalid escape character syntax"); pushback(p, c); return '\0'; } @@ -4732,7 +5625,7 @@ read_escape(parser_state *p) case 'C': if ((c = nextc(p)) != '-') { - yyerror(p, "Invalid escape character syntax"); + yyerror(NULL, p, "Invalid escape character syntax"); pushback(p, c); return '\0'; } @@ -4748,7 +5641,7 @@ read_escape(parser_state *p) eof: case -1: case -2: /* end of a file */ - yyerror(p, "Invalid escape character syntax"); + yyerror(NULL, p, "Invalid escape character syntax"); return '\0'; default: @@ -4791,8 +5684,8 @@ heredoc_remove_indent(parser_state *p, parser_heredoc_info *hinfo) while (indented) { n = indented->car; pair = n->car; - str = (char*)pair->car; - len = (size_t)pair->cdr; + len = (size_t)pair->car; + str = (char*)pair->cdr; escaped = n->cdr->car; nspaces = n->cdr->cdr; if (escaped) { @@ -4815,13 +5708,14 @@ heredoc_remove_indent(parser_state *p, parser_heredoc_info *hinfo) } if (newlen < len) newstr[newlen] = '\0'; - pair->car = (node*)newstr; - pair->cdr = (node*)newlen; - } else { + pair->car = (node*)newlen; + pair->cdr = (node*)newstr; + } + else { spaces = (size_t)nspaces->car; heredoc_count_indent(hinfo, str, len, spaces, &offset); - pair->car = (node*)(str + offset); - pair->cdr = (node*)(len - offset); + pair->car = (node*)(len - offset); + pair->cdr = (node*)(str + offset); } indented = indented->cdr; } @@ -4876,8 +5770,8 @@ parse_string(parser_state *p) int len = toklen(p); if (hinfo->allow_indent) { while (ISSPACE(*s) && len > 0) { - ++s; - --len; + s++; + len--; } } if (hinfo->term_len > 0 && len-1 == hinfo->term_len && strncmp(s, hinfo->term, len-1) == 0) { @@ -4891,20 +5785,20 @@ parse_string(parser_state *p) const char s2[] = "\" anywhere before EOF"; if (sizeof(s1)+sizeof(s2)+strlen(hinfo->term)+1 >= sizeof(buf)) { - yyerror(p, "can't find heredoc delimiter anywhere before EOF"); - } else { + yyerror(NULL, p, "can't find heredoc delimiter anywhere before EOF"); + } + else { strcpy(buf, s1); strcat(buf, hinfo->term); strcat(buf, s2); - yyerror(p, buf); + yyerror(NULL, p, buf); } return 0; } - node *nd = new_str(p, tok(p), toklen(p)); - pylval.nd = nd; + pylval.nd = new_str_tok(p); if (unindent && head) { - nspaces = push(nspaces, nint(spaces)); - heredoc_push_indented(p, hinfo, nd->cdr, escaped, nspaces, empty && line_head); + nspaces = push(nspaces, int_to_node(spaces)); + heredoc_push_indented(p, hinfo, pylval.nd, escaped, nspaces, empty && line_head); } return tHD_STRING_MID; } @@ -4912,12 +5806,12 @@ parse_string(parser_state *p) if (c == '\t') spaces += 8; else if (ISSPACE(c)) - ++spaces; + spaces++; else empty = FALSE; } if (c < 0) { - yyerror(p, "unterminated string meets end of file"); + yyerror(NULL, p, "unterminated string meets end of file"); return 0; } else if (c == beg) { @@ -4938,8 +5832,8 @@ parse_string(parser_state *p) p->lineno++; p->column = 0; if (unindent) { - nspaces = push(nspaces, nint(spaces)); - escaped = push(escaped, nint(pos)); + nspaces = push(nspaces, int_to_node(spaces)); + escaped = push(escaped, int_to_node(pos)); pos--; empty = TRUE; spaces = 0; @@ -4993,12 +5887,11 @@ parse_string(parser_state *p) tokfix(p); p->lstate = EXPR_BEG; p->cmd_start = TRUE; - node *nd = new_str(p, tok(p), toklen(p)); - pylval.nd = nd; + pylval.nd = new_str_tok(p); if (hinfo) { if (unindent && head) { - nspaces = push(nspaces, nint(spaces)); - heredoc_push_indented(p, hinfo, nd->cdr, escaped, nspaces, FALSE); + nspaces = push(nspaces, int_to_node(spaces)); + heredoc_push_indented(p, hinfo, pylval.nd, escaped, nspaces, FALSE); } hinfo->line_head = FALSE; return tHD_STRING_PART; @@ -5028,7 +5921,7 @@ parse_string(parser_state *p) else { pushback(p, c); tokfix(p); - pylval.nd = new_str(p, tok(p), toklen(p)); + pylval.nd = new_str_tok(p); return tSTRING_MID; } } @@ -5040,18 +5933,19 @@ parse_string(parser_state *p) } tokfix(p); - p->lstate = EXPR_ENDARG; + p->lstate = EXPR_END; end_strterm(p); if (type & STR_FUNC_XQUOTE) { - pylval.nd = new_xstr(p, tok(p), toklen(p)); + pylval.nd = new_str_tok(p); return tXSTRING; } if (type & STR_FUNC_REGEXP) { int f = 0; int re_opt; - char *s = strndup(tok(p), toklen(p)); + int pattern_len = toklen(p); + char *s = strndup(tok(p), pattern_len); char flags[3]; char *flag = flags; char enc = '\0'; @@ -5081,7 +5975,7 @@ parse_string(parser_state *p) } strcat(msg, " - "); strncat(msg, tok(p), sizeof(msg) - strlen(msg) - 1); - yyerror(p, msg); + yyerror(NULL, p, msg); } if (f != 0) { if (f & 1) *flag++ = 'i'; @@ -5102,11 +5996,11 @@ parse_string(parser_state *p) else { encp = NULL; } - pylval.nd = new_regx(p, s, dup, encp); + pylval.nd = cons(cons(int_to_node(pattern_len), (node*)s), cons((node*)dup, (node*)encp)); return tREGEXP; } - pylval.nd = new_str(p, tok(p), toklen(p)); + pylval.nd = new_str_tok(p); return tSTRING; } @@ -5120,7 +6014,7 @@ number_literal_suffix(parser_state *p) int mask = NUM_SUFFIX_R|NUM_SUFFIX_I; while ((c = nextc(p)) != -1) { - list = push(list, nint(c)); + list = push(list, int_to_node(c)); if ((mask & NUM_SUFFIX_I) && c == 'i') { result |= (mask & NUM_SUFFIX_I); @@ -5186,7 +6080,7 @@ heredoc_identifier(parser_state *p) tokadd(p, c); } if (c < 0) { - yyerror(p, "unterminated here document identifier"); + yyerror(NULL, p, "unterminated here document identifier"); return 0; } } @@ -5207,8 +6101,7 @@ heredoc_identifier(parser_state *p) pushback(p, c); } tokfix(p); - newnode = new_heredoc(p); - info = (parser_heredoc_info*)newnode->cdr; + newnode = new_heredoc(p, &info); info->term = strndup(tok(p), toklen(p)); info->term_len = toklen(p); if (! quote) @@ -5246,6 +6139,11 @@ parser_yylex(parser_state *p) enum mrb_lex_state_enum last_state; int token_column; + /* Early termination if too many errors - prevents DoS from malformed input */ + if (p->nerr > 10) { + return 0; /* EOF */ + } + if (p->lex_strterm) { if (is_strterm_type(p, STR_FUNC_HEREDOC)) { if (p->parsing_heredoc != NULL) @@ -5415,7 +6313,7 @@ parser_yylex(parser_state *p) if (c < 0 || ISSPACE(c)) { do { if (!skips(p, end)) { - yyerror(p, "embedded document meets end of file"); + yyerror(NULL, p, "embedded document meets end of file"); return 0; } c = nextc(p); @@ -5540,7 +6438,7 @@ parser_yylex(parser_state *p) } c = nextc(p); if (c < 0) { - yyerror(p, "incomplete character syntax"); + yyerror(NULL, p, "incomplete character syntax"); return 0; } if (ISSPACE(c)) { @@ -5575,7 +6473,7 @@ parser_yylex(parser_state *p) strcpy(buf, "invalid character syntax; use ?\\"); strncat(buf, cc, 2); - yyerror(p, buf); + yyerror(NULL, p, buf); } } ternary: @@ -5600,8 +6498,8 @@ parser_yylex(parser_state *p) tokadd(p, c); } tokfix(p); - pylval.nd = new_str(p, tok(p), toklen(p)); - p->lstate = EXPR_ENDARG; + pylval.nd = new_str_tok(p); + p->lstate = EXPR_END; return tCHAR; case '&': @@ -5741,7 +6639,7 @@ parser_yylex(parser_state *p) pushback(p, c); p->lstate = EXPR_BEG; if (c >= 0 && ISDIGIT(c)) { - yyerror(p, "no . floating literal anymore; put 0 before dot"); + yyerror(NULL, p, "no . floating literal anymore; put 0 before dot"); } p->lstate = EXPR_DOT; return '.'; @@ -5755,14 +6653,17 @@ parser_yylex(parser_state *p) int suffix = 0; is_float = seen_point = seen_e = nondigit = 0; - p->lstate = EXPR_ENDARG; + p->lstate = EXPR_END; newtok(p); - if (c == '-' || c == '+') { + if (c == '-') { tokadd(p, c); c = nextc(p); } + else if (c == '+') { + c = nextc(p); + } if (c == '0') { -#define no_digits() do {yyerror(p,"numeric literal without digits"); return 0;} while (0) +#define no_digits() do {yyerror(NULL, p,"numeric literal without digits"); return 0;} while (0) int start = toklen(p); c = nextc(p); if (c == 'x' || c == 'X') { @@ -5881,7 +6782,7 @@ parser_yylex(parser_state *p) } if (c > '7' && c <= '9') { invalid_octal: - yyerror(p, "Invalid octal digit"); + yyerror(NULL, p, "Invalid octal digit"); } else if (c == '.' || c == 'e' || c == 'E') { tokadd(p, '0'); @@ -5967,17 +6868,10 @@ parser_yylex(parser_state *p) return tINTEGER; #else double d; - char *endp; - errno = 0; - d = mrb_float_read(tok(p), &endp); - if (d == 0 && endp == tok(p)) { + if (!mrb_read_float(tok(p), NULL, &d)) { yywarning_s(p, "corrupted float value", tok(p)); } - else if (errno == ERANGE) { - yywarning_s(p, "float out of range", tok(p)); - errno = 0; - } suffix = number_literal_suffix(p); if (seen_e && (suffix & NUM_SUFFIX_R)) { pushback(p, 'r'); @@ -6017,7 +6911,8 @@ parser_yylex(parser_state *p) } if (!space_seen && IS_END()) { pushback(p, c); - p->lstate = EXPR_BEG; + /* In pattern matching context, use EXPR_ARG so newlines are significant */ + p->lstate = p->in_kwarg ? EXPR_ARG : EXPR_BEG; return tLABEL_TAG; } if (IS_END() || ISSPACE(c) || c == '#') { @@ -6174,12 +7069,12 @@ parser_yylex(parser_state *p) else { term = nextc(p); if (ISALNUM(term)) { - yyerror(p, "unknown type of %string"); + yyerror(NULL, p, "unknown type of %string"); return 0; } } if (c < 0 || term < 0) { - yyerror(p, "unterminated quoted string meets end of file"); + yyerror(NULL, p, "unterminated quoted string meets end of file"); return 0; } paren = term; @@ -6227,7 +7122,7 @@ parser_yylex(parser_state *p) return tSYMBOLS_BEG; default: - yyerror(p, "unknown type of %string"); + yyerror(NULL, p, "unknown type of %string"); return 0; } } @@ -6253,7 +7148,7 @@ parser_yylex(parser_state *p) token_column = newtok(p); c = nextc(p); if (c < 0) { - yyerror(p, "incomplete global variable syntax"); + yyerror(NULL, p, "incomplete global variable syntax"); return 0; } switch (c) { @@ -6322,8 +7217,8 @@ parser_yylex(parser_state *p) if (last_state == EXPR_FNAME) goto gvar; tokfix(p); { - mrb_int n = mrb_int_read(tok(p), NULL, NULL); - if (n > INT32_MAX) { + mrb_int n; + if (!mrb_read_int(tok(p), NULL, NULL, &n)) { yywarning(p, "capture group index too big; always nil"); return keyword_nil; } @@ -6352,10 +7247,10 @@ parser_yylex(parser_state *p) } if (c < 0) { if (p->tidx == 1) { - yyerror(p, "incomplete instance variable syntax"); + yyerror(NULL, p, "incomplete instance variable syntax"); } else { - yyerror(p, "incomplete class variable syntax"); + yyerror(NULL, p, "incomplete class variable syntax"); } return 0; } @@ -6388,7 +7283,7 @@ parser_yylex(parser_state *p) buf[sizeof(s)-1] = hexdigits[(c & 0xf0) >> 4]; buf[sizeof(s)] = hexdigits[(c & 0x0f)]; buf[sizeof(s)+1] = 0; - yyerror(p, buf); + yyerror(NULL, p, buf); goto retry; } @@ -6435,31 +7330,13 @@ parser_yylex(parser_state *p) break; case '_': - if (p->lstate != EXPR_FNAME && toklen(p) == 2 && ISDIGIT(tok(p)[1]) && p->nvars) { + if (toklen(p) == 2 && ISDIGIT(tok(p)[1]) && p->nvars) { int n = tok(p)[1] - '0'; int nvar; if (n > 0) { - node *nvars = p->nvars->cdr; - - while (nvars) { - nvar = intn(nvars->car); - if (nvar == -2) break; /* top of the scope */ - if (nvar > 0) { - yywarning(p, "numbered parameter used in outer block"); - break; - } - nvars->car = nint(-1); - nvars = nvars->cdr; - } - nvar = intn(p->nvars->car); + nvar = node_to_int(p->nvars->car); if (nvar != -2) { /* numbered parameters never appear on toplevel */ - if (nvar == -1) { - yywarning(p, "numbered parameter used in inner block"); - } - else { - p->nvars->car = nint(nvar > n ? nvar : n); - } pylval.num = n; p->lstate = EXPR_END; return tNUMPARAM; @@ -6537,6 +7414,10 @@ parser_yylex(parser_state *p) return keyword_do_block; return keyword_do; } + if (kw->id[0] == keyword_in) { + /* Set in_kwarg for pattern matching context */ + p->in_kwarg++; + } if (state == EXPR_BEG || state == EXPR_VALUE || state == EXPR_CLASS) return kw->id[0]; else { @@ -6575,14 +7456,14 @@ parser_yylex(parser_state *p) } static int -yylex(void *lval, parser_state *p) +yylex(void *lval, void *lp, parser_state *p) { p->ylval = lval; return parser_yylex(p); } static void -parser_init_cxt(parser_state *p, mrbc_context *cxt) +parser_init_cxt(parser_state *p, mrb_ccontext *cxt) { if (!cxt) return; if (cxt->filename) mrb_parser_set_filename(p, cxt->filename); @@ -6598,6 +7479,7 @@ parser_init_cxt(parser_state *p, mrbc_context *cxt) p->capture_errors = cxt->capture_errors; p->no_optimize = cxt->no_optimize; p->no_ext_ops = cxt->no_ext_ops; + p->no_return_value = cxt->no_return_value; p->upper = cxt->upper; if (cxt->partial_hook) { p->cxt = cxt; @@ -6605,31 +7487,33 @@ parser_init_cxt(parser_state *p, mrbc_context *cxt) } static void -parser_update_cxt(parser_state *p, mrbc_context *cxt) +parser_update_cxt(parser_state *p, mrb_ccontext *cxt) { node *n, *n0; int i = 0; if (!cxt) return; if (!p->tree) return; - if (intn(p->tree->car) != NODE_SCOPE) return; - n0 = n = p->tree->cdr->car; + if (!node_type_p(p->tree, NODE_SCOPE)) return; + + /* Extract locals from variable-sized NODE_SCOPE */ + struct mrb_ast_scope_node *scope = scope_node(p->tree); + n0 = n = scope->locals; while (n) { i++; n = n->cdr; } - cxt->syms = (mrb_sym*)mrb_realloc(p->mrb, cxt->syms, i*sizeof(mrb_sym)); + cxt->syms = (mrb_sym*)mrbc_realloc(cxt->syms, i*sizeof(mrb_sym)); cxt->slen = i; for (i=0, n=n0; n; i++,n=n->cdr) { - cxt->syms[i] = sym(n->car); + cxt->syms[i] = node_to_sym(n->car); } } -void mrb_codedump_all(mrb_state*, struct RProc*); -void mrb_parser_dump(mrb_state *mrb, node *tree, int offset); +static void dump_node(mrb_state *mrb, node *tree, int offset); MRB_API void -mrb_parser_parse(parser_state *p, mrbc_context *c) +mrb_parser_parse(parser_state *p, mrb_ccontext *c) { struct mrb_jmpbuf buf1; struct mrb_jmpbuf *prev = p->mrb->jmp; @@ -6652,13 +7536,13 @@ mrb_parser_parse(parser_state *p, mrbc_context *c) } parser_update_cxt(p, c); if (c && c->dump_result) { - mrb_parser_dump(p->mrb, p->tree, 0); + dump_node(p->mrb, p->tree, 0); } } MRB_CATCH(p->mrb->jmp) { p->nerr++; if (p->mrb->exc == NULL) { - yyerror(p, "memory allocation error"); + yyerror(NULL, p, "memory allocation error"); p->nerr++; p->tree = 0; } @@ -6670,13 +7554,13 @@ mrb_parser_parse(parser_state *p, mrbc_context *c) MRB_API parser_state* mrb_parser_new(mrb_state *mrb) { - mrb_pool *pool; + mempool *pool; parser_state *p; static const parser_state parser_state_zero = { 0 }; - pool = mrb_pool_open(mrb); + pool = mempool_open(); if (!pool) return NULL; - p = (parser_state*)mrb_pool_alloc(pool, sizeof(parser_state)); + p = (parser_state*)mempool_alloc(pool, sizeof(parser_state)); if (!p) return NULL; *p = parser_state_zero; @@ -6712,35 +7596,39 @@ mrb_parser_new(mrb_state *mrb) MRB_API void mrb_parser_free(parser_state *p) { if (p->tokbuf != p->buf) { - mrb_free(p->mrb, p->tokbuf); + mrbc_free(p->tokbuf); } - mrb_pool_close(p->pool); + mempool_close(p->pool); } -MRB_API mrbc_context* -mrbc_context_new(mrb_state *mrb) +MRB_API mrb_ccontext* +mrb_ccontext_new(mrb_state *mrb) { - return (mrbc_context*)mrb_calloc(mrb, 1, sizeof(mrbc_context)); + static const mrb_ccontext cc_zero = { 0 }; + mrb_ccontext *cc = (mrb_ccontext*)mrbc_malloc(sizeof(mrb_ccontext)); + *cc = cc_zero; + return cc; } MRB_API void -mrbc_context_free(mrb_state *mrb, mrbc_context *cxt) +mrb_ccontext_free(mrb_state *mrb, mrb_ccontext *cxt) { - mrb_free(mrb, cxt->filename); - mrb_free(mrb, cxt->syms); - mrb_free(mrb, cxt); + mrbc_free(cxt->filename); + mrbc_free(cxt->syms); + mrbc_free(cxt); } MRB_API const char* -mrbc_filename(mrb_state *mrb, mrbc_context *c, const char *s) +mrb_ccontext_filename(mrb_state *mrb, mrb_ccontext *c, const char *s) { if (s) { size_t len = strlen(s); - char *p = (char*)mrb_malloc(mrb, len + 1); + char *p = (char*)mrbc_malloc(len + 1); + if (p == NULL) return NULL; memcpy(p, s, len + 1); if (c->filename) { - mrb_free(mrb, c->filename); + mrbc_free(c->filename); } c->filename = p; } @@ -6748,20 +7636,21 @@ mrbc_filename(mrb_state *mrb, mrbc_context *c, const char *s) } MRB_API void -mrbc_partial_hook(mrb_state *mrb, mrbc_context *c, int (*func)(struct mrb_parser_state*), void *data) +mrb_ccontext_partial_hook(mrb_ccontext *c, int (*func)(struct mrb_parser_state*), void *data) { c->partial_hook = func; c->partial_data = data; } MRB_API void -mrbc_cleanup_local_variables(mrb_state *mrb, mrbc_context *c) +mrb_ccontext_cleanup_local_variables(mrb_ccontext *c) { if (c->syms) { - mrb_free(mrb, c->syms); + mrbc_free(c->syms); c->syms = NULL; c->slen = 0; } + c->keep_lv = FALSE; } MRB_API void @@ -6773,9 +7662,13 @@ mrb_parser_set_filename(struct mrb_parser_state *p, const char *f) sym = mrb_intern_cstr(p->mrb, f); p->filename_sym = sym; + /* Save current lineno so that AST nodes produced from a bison lookahead + across the file boundary (in partial_hook) can recover the correct + line in init_var_header instead of recording lineno=0. */ + p->prev_file_lineno = p->lineno; p->lineno = (p->filename_table_length > 0)? 0 : 1; - for (i = 0; i < p->filename_table_length; ++i) { + for (i = 0; i < p->filename_table_length; i++) { if (p->filename_table[i] == sym) { p->current_filename_index = i; return; @@ -6783,7 +7676,7 @@ mrb_parser_set_filename(struct mrb_parser_state *p, const char *f) } if (p->filename_table_length == UINT16_MAX) { - yyerror(p, "too many files to compile"); + yyerror(NULL, p, "too many files to compile"); return; } p->current_filename_index = p->filename_table_length++; @@ -6806,7 +7699,7 @@ mrb_parser_get_filename(struct mrb_parser_state* p, uint16_t idx) { #ifndef MRB_NO_STDIO static struct mrb_parser_state * -mrb_parse_file_continue(mrb_state *mrb, FILE *f, const void *prebuf, size_t prebufsize, mrbc_context *c) +mrb_parse_file_continue(mrb_state *mrb, FILE *f, const void *prebuf, size_t prebufsize, mrb_ccontext *c) { parser_state *p; @@ -6826,14 +7719,14 @@ mrb_parse_file_continue(mrb_state *mrb, FILE *f, const void *prebuf, size_t preb } MRB_API parser_state* -mrb_parse_file(mrb_state *mrb, FILE *f, mrbc_context *c) +mrb_parse_file(mrb_state *mrb, FILE *f, mrb_ccontext *c) { return mrb_parse_file_continue(mrb, f, NULL, 0, c); } #endif MRB_API parser_state* -mrb_parse_nstring(mrb_state *mrb, const char *s, size_t len, mrbc_context *c) +mrb_parse_nstring(mrb_state *mrb, const char *s, size_t len, mrb_ccontext *c) { parser_state *p; @@ -6847,13 +7740,13 @@ mrb_parse_nstring(mrb_state *mrb, const char *s, size_t len, mrbc_context *c) } MRB_API parser_state* -mrb_parse_string(mrb_state *mrb, const char *s, mrbc_context *c) +mrb_parse_string(mrb_state *mrb, const char *s, mrb_ccontext *c) { return mrb_parse_nstring(mrb, s, strlen(s), c); } MRB_API mrb_value -mrb_load_exec(mrb_state *mrb, struct mrb_parser_state *p, mrbc_context *c) +mrb_load_exec(mrb_state *mrb, struct mrb_parser_state *p, mrb_ccontext *c) { struct RClass *target = mrb->object_class; struct RProc *proc; @@ -6916,7 +7809,7 @@ mrb_load_exec(mrb_state *mrb, struct mrb_parser_state *p, mrbc_context *c) #ifndef MRB_NO_STDIO MRB_API mrb_value -mrb_load_file_cxt(mrb_state *mrb, FILE *f, mrbc_context *c) +mrb_load_file_cxt(mrb_state *mrb, FILE *f, mrb_ccontext *c) { return mrb_load_exec(mrb, mrb_parse_file(mrb, f, c), c); } @@ -6936,7 +7829,7 @@ mrb_load_file(mrb_state *mrb, FILE *f) * - `NUL` is included in the first 64 bytes of the file */ MRB_API mrb_value -mrb_load_detect_file_cxt(mrb_state *mrb, FILE *fp, mrbc_context *c) +mrb_load_detect_file_cxt(mrb_state *mrb, FILE *fp, mrb_ccontext *c) { union { char b[DETECT_SIZE]; @@ -6955,15 +7848,10 @@ mrb_load_detect_file_cxt(mrb_state *mrb, FILE *fp, mrbc_context *c) return mrb_load_exec(mrb, mrb_parse_file_continue(mrb, fp, leading.b, bufsize, c), c); } else { - mrb_int binsize; - uint8_t *bin; - mrb_value bin_obj = mrb_nil_value(); /* temporary string object */ - mrb_value result; - - binsize = bin_to_uint32(leading.h.binary_size); - bin_obj = mrb_str_new(mrb, NULL, binsize); - bin = (uint8_t*)RSTRING_PTR(bin_obj); - if ((size_t)binsize > bufsize) { + mrb_int binsize = bin_to_uint32(leading.h.binary_size); + mrb_value bin_obj = mrb_str_new(mrb, NULL, binsize); + uint8_t *bin = (uint8_t*)RSTRING_PTR(bin_obj); + if ((size_t)binsize > bufsize) { memcpy(bin, leading.b, bufsize); if (fread(bin + bufsize, binsize - bufsize, 1, fp) == 0) { binsize = bufsize; @@ -6971,7 +7859,7 @@ mrb_load_detect_file_cxt(mrb_state *mrb, FILE *fp, mrbc_context *c) } } - result = mrb_load_irep_buf_cxt(mrb, bin, binsize, c); + mrb_value result = mrb_load_irep_buf_cxt(mrb, bin, binsize, c); if (mrb_string_p(bin_obj)) mrb_str_resize(mrb, bin_obj, 0); return result; } @@ -6979,7 +7867,7 @@ mrb_load_detect_file_cxt(mrb_state *mrb, FILE *fp, mrbc_context *c) #endif MRB_API mrb_value -mrb_load_nstring_cxt(mrb_state *mrb, const char *s, size_t len, mrbc_context *c) +mrb_load_nstring_cxt(mrb_state *mrb, const char *s, size_t len, mrb_ccontext *c) { return mrb_load_exec(mrb, mrb_parse_nstring(mrb, s, len, c), c); } @@ -6991,7 +7879,7 @@ mrb_load_nstring(mrb_state *mrb, const char *s, size_t len) } MRB_API mrb_value -mrb_load_string_cxt(mrb_state *mrb, const char *s, mrbc_context *c) +mrb_load_string_cxt(mrb_state *mrb, const char *s, mrb_ccontext *c) { return mrb_load_nstring_cxt(mrb, s, strlen(s), c); } @@ -7005,9 +7893,9 @@ mrb_load_string(mrb_state *mrb, const char *s) #ifndef MRB_NO_STDIO static void -dump_prefix(node *tree, int offset) +dump_prefix(int offset, uint16_t lineno) { - printf("%05d ", tree->lineno); + printf("%05d ", lineno); while (offset--) { putc(' ', stdout); putc(' ', stdout); @@ -7018,56 +7906,60 @@ static void dump_recur(mrb_state *mrb, node *tree, int offset) { while (tree) { - mrb_parser_dump(mrb, tree->car, offset); + dump_node(mrb, tree->car, offset); tree = tree->cdr; } } static void -dump_args(mrb_state *mrb, node *n, int offset) +dump_locals(mrb_state *mrb, node *tree, int offset, uint16_t lineno) { - if (n->car) { - dump_prefix(n, offset+1); - printf("mandatory args:\n"); - dump_recur(mrb, n->car, offset+2); - } - n = n->cdr; - if (n->car) { - dump_prefix(n, offset+1); - printf("optional args:\n"); - { - node *n2 = n->car; + if (!tree || (!tree->car && !tree->cdr)) return; - while (n2) { - dump_prefix(n2, offset+2); - printf("%s=\n", mrb_sym_name(mrb, sym(n2->car->car))); - mrb_parser_dump(mrb, n2->car->cdr, offset+3); - n2 = n2->cdr; + dump_prefix(offset, lineno); + printf("locals:\n"); + dump_prefix(offset+1, lineno); + while (tree) { + if (tree->car) { + mrb_sym sym = node_to_sym(tree->car); + if (sym != 0) { + const char *name = mrb_sym_name(mrb, sym); + if (name && strlen(name) > 0 && name[0] != '!' && name[0] != '@' && name[0] != '$') { + printf(" %s", mrb_sym_dump(mrb, sym)); + } + else { + printf(" (invalid symbol: %s)", name ? name : "(null)"); + } + } + else { + printf(" (anonymous)"); } } + tree = tree->cdr; } - n = n->cdr; - if (n->car) { - mrb_sym rest = sym(n->car); + printf("\n"); +} - dump_prefix(n, offset+1); - if (rest == MRB_OPSYM(mul)) - printf("rest=*\n"); - else - printf("rest=*%s\n", mrb_sym_name(mrb, rest)); +static void +dump_cpath(mrb_state *mrb, node *tree, int offset, uint16_t lineno) +{ + dump_prefix(offset, lineno); + printf("cpath: "); + if (!tree) { + printf("(null)\n"); } - n = n->cdr; - if (n->car) { - dump_prefix(n, offset+1); - printf("post mandatory args:\n"); - dump_recur(mrb, n->car, offset+2); + else if (node_to_int(tree->car) == 0) { + printf("(null)\n"); } - - n = n->cdr; - if (n) { - mrb_assert(intn(n->car) == NODE_ARGS_TAIL); - mrb_parser_dump(mrb, n, offset); + else if (node_to_int(tree->car) == 1) { + printf("Object\n"); + } + else { + printf("\n"); + dump_node(mrb, tree->car, offset+1); } + dump_prefix(offset, lineno); + printf("name: %s\n", mrb_sym_dump(mrb, node_to_sym(tree->cdr))); } /* @@ -7080,591 +7972,750 @@ static const char* str_dump(mrb_state *mrb, const char *str, int len) { int ai = mrb_gc_arena_save(mrb); - mrb_value s; -# if INT_MAX > MRB_INT_MAX / 4 - /* check maximum length with "\xNN" character */ - if (len > MRB_INT_MAX / 4) { - len = MRB_INT_MAX / 4; - } -# endif - s = mrb_str_new(mrb, str, (mrb_int)len); + mrb_value s = mrb_str_new(mrb, str, (mrb_int)len); s = mrb_str_dump(mrb, s); mrb_gc_arena_restore(mrb, ai); return RSTRING_PTR(s); } + +static void +dump_str(mrb_state *mrb, node *n, int offset, uint16_t lineno) +{ + while (n) { + dump_prefix(offset, lineno); + int len = node_to_int(n->car->car); + if (len >= 0) { + printf("str: %s\n", str_dump(mrb, (char*)n->car->cdr, len)); + } + else { + printf("interpolation:\n"); + dump_node(mrb, n->car->cdr, offset+1); + } + n = n->cdr; + } +} + +static void +dump_args(mrb_state *mrb, struct mrb_ast_args *args, int offset, uint16_t lineno) +{ + if (args->mandatory_args) { + dump_prefix(offset, lineno); + printf("mandatory args:\n"); + dump_recur(mrb, args->mandatory_args, offset+1); + } + if (args->optional_args) { + dump_prefix(offset, lineno); + printf("optional args:\n"); + { + node *n = args->optional_args; + while (n) { + dump_prefix(offset+1, lineno); + printf("%s=\n", mrb_sym_name(mrb, node_to_sym(n->car->car))); + dump_node(mrb, n->car->cdr, offset+2); + n = n->cdr; + } + } + } + if (args->rest_arg) { + mrb_sym rest = args->rest_arg; + + dump_prefix(offset, lineno); + if (rest == MRB_OPSYM(mul)) + printf("rest=*\n"); + else + printf("rest=*%s\n", mrb_sym_name(mrb, rest)); + } + if (args->post_mandatory_args) { + dump_prefix(offset, lineno); + printf("post mandatory args:\n"); + dump_recur(mrb, args->post_mandatory_args, offset+1); + } + if (args->keyword_args) { + dump_prefix(offset, lineno); + printf("keyword args:\n"); + { + node *n = args->keyword_args; + while (n) { + dump_prefix(offset+1, lineno); + printf("%s:\n", mrb_sym_name(mrb, node_to_sym(n->car->car))); + dump_node(mrb, n->car->cdr, offset+2); + n = n->cdr; + } + } + } + if (args->kwrest_arg) { + mrb_sym rest = args->kwrest_arg; + + dump_prefix(offset, lineno); + if (rest == MRB_OPSYM(pow)) + printf("kwrest=**\n"); + else + printf("kwrest=**%s\n", mrb_sym_name(mrb, rest)); + } + if (args->block_arg) { + mrb_sym blk = args->block_arg; + + dump_prefix(offset, lineno); + if (blk == MRB_OPSYM(and)) + printf("blk=&\n"); + else if (blk == MRB_SYM(nil)) + printf("blk=&nil\n"); + else + printf("blk=&%s\n", mrb_sym_name(mrb, blk)); + } +} + +static void +dump_callargs(mrb_state *mrb, node *n, int offset, uint16_t lineno) +{ + if (!n) return; + + struct mrb_ast_callargs *args = (struct mrb_ast_callargs*)n; + if (args->regular_args) { + dump_prefix(offset+1, lineno); + printf("args:\n"); + dump_recur(mrb, args->regular_args, offset+2); + } + if (args->keyword_args) { + dump_prefix(offset+1, lineno); + printf("kw_args:\n"); + node *kw = args->keyword_args; + while (kw) { + dump_prefix(offset+2, lineno); + printf("key:\n"); + if (node_to_sym(kw->car->car) == MRB_OPSYM(pow)) { + dump_prefix(offset+3, lineno); + printf("**:\n"); + } + else { + dump_node(mrb, kw->car->car, offset+3); + } + dump_prefix(offset+2, lineno); + printf("value:\n"); + dump_node(mrb, kw->car->cdr, offset+3); + kw = kw->cdr; + } + } + if (args->block_arg) { + dump_prefix(offset+1, lineno); + printf("block:\n"); + dump_node(mrb, args->block_arg, offset+2); + } +} + #endif void -mrb_parser_dump(mrb_state *mrb, node *tree, int offset) +dump_node(mrb_state *mrb, node *tree, int offset) { #ifndef MRB_NO_STDIO - int nodetype; + enum node_type nodetype; + uint16_t lineno = 0; if (!tree) return; - again: - dump_prefix(tree, offset); - nodetype = intn(tree->car); - tree = tree->cdr; - switch (nodetype) { - case NODE_BEGIN: - printf("NODE_BEGIN:\n"); - dump_recur(mrb, tree, offset+1); - break; - case NODE_RESCUE: - printf("NODE_RESCUE:\n"); - if (tree->car) { - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - } - tree = tree->cdr; - if (tree->car) { - node *n2 = tree->car; + /* Extract line number from variable-sized node header */ + if (node_type(tree) != NODE_LAST) { + lineno = ((struct mrb_ast_var_header*)tree)->lineno; + } - dump_prefix(n2, offset+1); - printf("rescue:\n"); - while (n2) { - node *n3 = n2->car; - if (n3->car) { - dump_prefix(n2, offset+2); - printf("handle classes:\n"); - dump_recur(mrb, n3->car, offset+3); - } - if (n3->cdr->car) { - dump_prefix(n3, offset+2); - printf("exc_var:\n"); - mrb_parser_dump(mrb, n3->cdr->car, offset+3); - } - if (n3->cdr->cdr->car) { - dump_prefix(n3, offset+2); - printf("rescue body:\n"); - mrb_parser_dump(mrb, n3->cdr->cdr->car, offset+3); - } - n2 = n2->cdr; - } + dump_prefix(offset, lineno); + + /* All nodes are now variable-sized nodes with headers */ + nodetype = node_type(tree); + + switch (nodetype) { + /* Variable-sized node cases */ + case NODE_SCOPE: + printf("NODE_SCOPE:\n"); + if (scope_node(tree)->locals) { + dump_locals(mrb, scope_node(tree)->locals, offset+1, lineno); } - tree = tree->cdr; - if (tree->car) { - dump_prefix(tree, offset+1); - printf("else:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); + if (scope_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, scope_node(tree)->body, offset+2); } break; - case NODE_ENSURE: - printf("NODE_ENSURE:\n"); - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - dump_prefix(tree, offset+1); - printf("ensure:\n"); - mrb_parser_dump(mrb, tree->cdr->cdr, offset+2); + case NODE_INT: + printf("NODE_INT: %d\n", int_node(tree)->value); break; - case NODE_LAMBDA: - printf("NODE_LAMBDA:\n"); - dump_prefix(tree, offset); - goto block; + case NODE_BIGINT: + printf("NODE_BIGINT: %s (base %d)\n", bigint_node(tree)->string, bigint_node(tree)->base); + break; - case NODE_BLOCK: - block: - printf("NODE_BLOCK:\n"); - tree = tree->cdr; - if (tree->car) { - dump_args(mrb, tree->car, offset+1); - } - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->cdr->car, offset+2); + case NODE_FLOAT: + printf("NODE_FLOAT: %s\n", float_node(tree)->value); break; - case NODE_IF: - printf("NODE_IF:\n"); - dump_prefix(tree, offset+1); - printf("cond:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - dump_prefix(tree, offset+1); - printf("then:\n"); - mrb_parser_dump(mrb, tree->cdr->car, offset+2); - if (tree->cdr->cdr->car) { - dump_prefix(tree, offset+1); - printf("else:\n"); - mrb_parser_dump(mrb, tree->cdr->cdr->car, offset+2); - } + case NODE_STR: + printf("NODE_STR:\n"); + dump_str(mrb, str_node(tree)->list, offset+1, lineno); break; - case NODE_AND: - printf("NODE_AND:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - mrb_parser_dump(mrb, tree->cdr, offset+1); + case NODE_XSTR: + printf("NODE_XSTR:\n"); + dump_str(mrb, xstr_node(tree)->list, offset+1, lineno); break; - case NODE_OR: - printf("NODE_OR:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - mrb_parser_dump(mrb, tree->cdr, offset+1); + case NODE_SYM: + printf("NODE_SYM: %s\n", mrb_sym_dump(mrb, sym_node(tree)->symbol)); break; - case NODE_CASE: - printf("NODE_CASE:\n"); - if (tree->car) { - mrb_parser_dump(mrb, tree->car, offset+1); - } - tree = tree->cdr; - while (tree) { - dump_prefix(tree, offset+1); - printf("case:\n"); - dump_recur(mrb, tree->car->car, offset+2); - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->car->cdr, offset+2); - tree = tree->cdr; - } + case NODE_DSYM: + printf("NODE_DSYM:\n"); + dump_str(mrb, str_node(tree)->list, offset+1, lineno); break; - case NODE_WHILE: - printf("NODE_WHILE:\n"); - dump_prefix(tree, offset+1); - printf("cond:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->cdr, offset+2); + case NODE_LVAR: + printf("NODE_LVAR: %s\n", mrb_sym_dump(mrb, var_node(tree)->symbol)); break; - case NODE_UNTIL: - printf("NODE_UNTIL:\n"); - dump_prefix(tree, offset+1); - printf("cond:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->cdr, offset+2); + case NODE_GVAR: + printf("NODE_GVAR: %s\n", mrb_sym_dump(mrb, var_node(tree)->symbol)); break; - case NODE_FOR: - printf("NODE_FOR:\n"); - dump_prefix(tree, offset+1); - printf("var:\n"); - { - node *n2 = tree->car; - - if (n2->car) { - dump_prefix(n2, offset+2); - printf("pre:\n"); - dump_recur(mrb, n2->car, offset+3); - } - n2 = n2->cdr; - if (n2) { - if (n2->car) { - dump_prefix(n2, offset+2); - printf("rest:\n"); - mrb_parser_dump(mrb, n2->car, offset+3); - } - n2 = n2->cdr; - if (n2) { - if (n2->car) { - dump_prefix(n2, offset+2); - printf("post:\n"); - dump_recur(mrb, n2->car, offset+3); - } - } - } - } - tree = tree->cdr; - dump_prefix(tree, offset+1); - printf("in:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - tree = tree->cdr; - dump_prefix(tree, offset+1); - printf("do:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); + case NODE_IVAR: + printf("NODE_IVAR: %s\n", mrb_sym_dump(mrb, var_node(tree)->symbol)); break; - case NODE_SCOPE: - printf("NODE_SCOPE:\n"); - { - node *n2 = tree->car; - mrb_bool first_lval = TRUE; - - if (n2 && (n2->car || n2->cdr)) { - dump_prefix(n2, offset+1); - printf("local variables:\n"); - dump_prefix(n2, offset+2); - while (n2) { - if (n2->car) { - if (!first_lval) printf(", "); - printf("%s", mrb_sym_name(mrb, sym(n2->car))); - first_lval = FALSE; - } - n2 = n2->cdr; - } - printf("\n"); - } - } - tree = tree->cdr; - offset++; - goto again; + case NODE_CVAR: + printf("NODE_CVAR: %s\n", mrb_sym_dump(mrb, var_node(tree)->symbol)); + break; + + case NODE_NVAR: + printf("NODE_NVAR: %d\n", nvar_node(tree)->num); + break; + + case NODE_CONST: + printf("NODE_CONST: %s\n", mrb_sym_dump(mrb, var_node(tree)->symbol)); + break; - case NODE_FCALL: case NODE_CALL: - case NODE_SCALL: - switch (nodetype) { - case NODE_FCALL: - printf("NODE_FCALL:\n"); break; - case NODE_CALL: - printf("NODE_CALL(.):\n"); break; - case NODE_SCALL: - printf("NODE_SCALL(&.):\n"); break; - default: - break; + printf("NODE_CALL: %s\n", mrb_sym_dump(mrb, call_node(tree)->method_name)); + if (call_node(tree)->receiver) { + dump_prefix(offset+1, lineno); + printf("receiver:\n"); + dump_node(mrb, call_node(tree)->receiver, offset+2); } - mrb_parser_dump(mrb, tree->car, offset+1); - dump_prefix(tree, offset+1); - printf("method='%s' (%d)\n", - mrb_sym_dump(mrb, sym(tree->cdr->car)), - intn(tree->cdr->car)); - tree = tree->cdr->cdr->car; - if (tree) { - dump_prefix(tree, offset+1); - printf("args:\n"); - dump_recur(mrb, tree->car, offset+2); - if (tree->cdr) { - if (tree->cdr->car) { - dump_prefix(tree, offset+1); - printf("kwargs:\n"); - mrb_parser_dump(mrb, tree->cdr->car, offset+2); - } - if (tree->cdr->cdr) { - dump_prefix(tree, offset+1); - printf("block:\n"); - mrb_parser_dump(mrb, tree->cdr->cdr, offset+2); - } - } + if (call_node(tree)->args) { + dump_callargs(mrb, call_node(tree)->args, offset, lineno); } break; - case NODE_DOT2: - printf("NODE_DOT2:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - mrb_parser_dump(mrb, tree->cdr, offset+1); + case NODE_ARRAY: + printf("NODE_ARRAY:\n"); + if (array_node(tree)->elements) { + dump_recur(mrb, array_node(tree)->elements, offset+1); + } break; - case NODE_DOT3: - printf("NODE_DOT3:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - mrb_parser_dump(mrb, tree->cdr, offset+1); + case NODE_TRUE: + printf("NODE_TRUE\n"); break; - case NODE_COLON2: - printf("NODE_COLON2:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - dump_prefix(tree, offset+1); - printf("::%s\n", mrb_sym_name(mrb, sym(tree->cdr))); + case NODE_FALSE: + printf("NODE_FALSE\n"); break; - case NODE_COLON3: - printf("NODE_COLON3: ::%s\n", mrb_sym_name(mrb, sym(tree))); + case NODE_NIL: + printf("NODE_NIL\n"); break; - case NODE_ARRAY: - printf("NODE_ARRAY:\n"); - dump_recur(mrb, tree, offset+1); + case NODE_SELF: + printf("NODE_SELF\n"); break; - case NODE_HASH: - printf("NODE_HASH:\n"); - while (tree) { - dump_prefix(tree, offset+1); - printf("key:\n"); - mrb_parser_dump(mrb, tree->car->car, offset+2); - dump_prefix(tree, offset+1); - printf("value:\n"); - mrb_parser_dump(mrb, tree->car->cdr, offset+2); - tree = tree->cdr; + case NODE_IF: + printf("NODE_IF:\n"); + if (if_node(tree)->condition) { + dump_prefix(offset+1, lineno); + printf("cond:\n"); + dump_node(mrb, if_node(tree)->condition, offset+2); + } + if (if_node(tree)->then_body) { + dump_prefix(offset+1, lineno); + printf("then:\n"); + dump_node(mrb, if_node(tree)->then_body, offset+2); + } + if (if_node(tree)->else_body) { + dump_prefix(offset+1, lineno); + printf("else:\n"); + dump_node(mrb, if_node(tree)->else_body, offset+2); } break; - case NODE_KW_HASH: - printf("NODE_KW_HASH:\n"); - while (tree) { - dump_prefix(tree, offset+1); - printf("key:\n"); - mrb_parser_dump(mrb, tree->car->car, offset+2); - dump_prefix(tree, offset+1); - printf("value:\n"); - mrb_parser_dump(mrb, tree->car->cdr, offset+2); - tree = tree->cdr; + case NODE_DEF: + printf("NODE_DEF: %s\n", mrb_sym_dump(mrb, def_node(tree)->name)); + if (def_node(tree)->args) { + dump_args(mrb, sdef_node(tree)->args, offset+1, lineno); + } + if (def_node(tree)->locals) { + dump_locals(mrb, def_node(tree)->locals, offset+1, lineno); + } + if (def_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, def_node(tree)->body, offset+2); } - break; - - case NODE_SPLAT: - printf("NODE_SPLAT:\n"); - mrb_parser_dump(mrb, tree, offset+1); break; case NODE_ASGN: printf("NODE_ASGN:\n"); - dump_prefix(tree, offset+1); - printf("lhs:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - dump_prefix(tree, offset+1); - printf("rhs:\n"); - mrb_parser_dump(mrb, tree->cdr, offset+2); + if (asgn_node(tree)->lhs) { + dump_prefix(offset+1, lineno); + printf("lhs:\n"); + dump_node(mrb, asgn_node(tree)->lhs, offset+2); + } + if (asgn_node(tree)->rhs) { + dump_prefix(offset+1, lineno); + printf("rhs:\n"); + dump_node(mrb, asgn_node(tree)->rhs, offset+2); + } break; case NODE_MASGN: - printf("NODE_MASGN:\n"); - dump_prefix(tree, offset+1); - printf("mlhs:\n"); - { - node *n2 = tree->car; - - if (n2->car) { - dump_prefix(tree, offset+2); - printf("pre:\n"); - dump_recur(mrb, n2->car, offset+3); - } - n2 = n2->cdr; - if (n2) { - if (n2->car) { - dump_prefix(n2, offset+2); - printf("rest:\n"); - if (n2->car == nint(-1)) { - dump_prefix(n2, offset+2); - printf("(empty)\n"); - } - else { - mrb_parser_dump(mrb, n2->car, offset+3); - } - } - n2 = n2->cdr; - if (n2 && n2->car) { - dump_prefix(n2, offset+2); - printf("post:\n"); - dump_recur(mrb, n2->car, offset+3); - } + case NODE_MARG: + printf("%s:\n", node_type(tree) == NODE_MASGN ? "NODE_MASGN" : "NODE_MARG"); + /* Handle pre-splat variables */ + if (masgn_node(tree)->pre) { + dump_prefix(offset+1, lineno); + printf("pre:\n"); + dump_recur(mrb, masgn_node(tree)->pre, offset+2); + } + /* Handle splat variable (can be -1 sentinel for anonymous splat) */ + if (masgn_node(tree)->rest) { + if ((intptr_t)masgn_node(tree)->rest == -1) { + dump_prefix(offset+1, lineno); + printf("rest: *\n"); } - } - dump_prefix(tree, offset+1); - printf("rhs:\n"); - mrb_parser_dump(mrb, tree->cdr, offset+2); - break; - - case NODE_OP_ASGN: - printf("NODE_OP_ASGN:\n"); - dump_prefix(tree, offset+1); - printf("lhs:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - tree = tree->cdr; - dump_prefix(tree, offset+1); - printf("op='%s' (%d)\n", mrb_sym_name(mrb, sym(tree->car)), intn(tree->car)); - tree = tree->cdr; - mrb_parser_dump(mrb, tree->car, offset+1); - break; - - case NODE_SUPER: - printf("NODE_SUPER:\n"); - if (tree) { - dump_prefix(tree, offset+1); - printf("args:\n"); - dump_recur(mrb, tree->car, offset+2); - if (tree->cdr) { - dump_prefix(tree, offset+1); - printf("block:\n"); - mrb_parser_dump(mrb, tree->cdr, offset+2); + else { + dump_prefix(offset+1, lineno); + printf("rest:\n"); + dump_node(mrb, masgn_node(tree)->rest, offset+2); } } - break; - - case NODE_ZSUPER: - printf("NODE_ZSUPER:\n"); - if (tree) { - dump_prefix(tree, offset+1); - printf("args:\n"); - dump_recur(mrb, tree->car, offset+2); - if (tree->cdr) { - dump_prefix(tree, offset+1); - printf("block:\n"); - mrb_parser_dump(mrb, tree->cdr, offset+2); - } + /* Handle post-splat variables */ + if (masgn_node(tree)->post) { + dump_prefix(offset+1, lineno); + printf("post:\n"); + dump_recur(mrb, masgn_node(tree)->post, offset+2); + } + if (masgn_node(tree)->rhs) { + dump_prefix(offset+1, lineno); + printf("rhs:\n"); + dump_node(mrb, masgn_node(tree)->rhs, offset+2); } break; case NODE_RETURN: printf("NODE_RETURN:\n"); - mrb_parser_dump(mrb, tree, offset+1); - break; - - case NODE_YIELD: - printf("NODE_YIELD:\n"); - dump_recur(mrb, tree, offset+1); + if (return_node(tree)->args) { + dump_node(mrb, return_node(tree)->args, offset); + } break; case NODE_BREAK: printf("NODE_BREAK:\n"); - mrb_parser_dump(mrb, tree, offset+1); + if (break_node(tree)->value) { + dump_prefix(offset+1, lineno); + printf("value:\n"); + dump_node(mrb, break_node(tree)->value, offset+2); + } break; case NODE_NEXT: printf("NODE_NEXT:\n"); - mrb_parser_dump(mrb, tree, offset+1); - break; - - case NODE_REDO: - printf("NODE_REDO\n"); + if (next_node(tree)->value) { + dump_prefix(offset+1, lineno); + printf("value:\n"); + dump_node(mrb, next_node(tree)->value, offset+2); + } break; - case NODE_RETRY: - printf("NODE_RETRY\n"); + case NODE_NEGATE: + printf("NODE_NEGATE:\n"); + if (negate_node(tree)->operand) { + dump_prefix(offset+1, lineno); + printf("operand:\n"); + dump_node(mrb, negate_node(tree)->operand, offset+2); + } break; - case NODE_LVAR: - printf("NODE_LVAR %s\n", mrb_sym_name(mrb, sym(tree))); + case NODE_STMTS: + printf("NODE_STMTS:\n"); + if (stmts_node(tree)->stmts) { + dump_recur(mrb, stmts_node(tree)->stmts, offset+1); + } break; - case NODE_GVAR: - printf("NODE_GVAR %s\n", mrb_sym_name(mrb, sym(tree))); + case NODE_BEGIN: + printf("NODE_BEGIN:\n"); + if (begin_node(tree)->body) { + dump_node(mrb, begin_node(tree)->body, offset+1); + } break; - case NODE_IVAR: - printf("NODE_IVAR %s\n", mrb_sym_name(mrb, sym(tree))); + case NODE_RESCUE: + printf("NODE_RESCUE:\n"); + if (rescue_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, rescue_node(tree)->body, offset+2); + } + if (rescue_node(tree)->rescue_clauses) { + node *n2 = rescue_node(tree)->rescue_clauses; + dump_prefix(offset+1, lineno); + printf("rescue:\n"); + while (n2) { + node *n3 = n2->car; + if (n3->car) { + dump_prefix(offset+2, lineno); + printf("handle classes:\n"); + dump_recur(mrb, n3->car, offset+3); + } + if (n3->cdr->car) { + dump_prefix(offset+2, lineno); + printf("exc_var:\n"); + dump_node(mrb, n3->cdr->car, offset+3); + } + if (n3->cdr->cdr->car) { + dump_prefix(offset+2, lineno); + printf("rescue body:\n"); + dump_node(mrb, n3->cdr->cdr->car, offset+3); + } + n2 = n2->cdr; + } + } + if (rescue_node(tree)->else_clause) { + dump_prefix(offset+1, lineno); + printf("else:\n"); + dump_node(mrb, rescue_node(tree)->else_clause, offset+2); + } break; - case NODE_CVAR: - printf("NODE_CVAR %s\n", mrb_sym_name(mrb, sym(tree))); + case NODE_ENSURE: + printf("NODE_ENSURE:\n"); + if (ensure_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, ensure_node(tree)->body, offset+2); + } + if (ensure_node(tree)->ensure_clause) { + dump_prefix(offset+1, lineno); + printf("ensure:\n"); + dump_node(mrb, ensure_node(tree)->ensure_clause, offset+2); + } break; - case NODE_NVAR: - printf("NODE_NVAR %d\n", intn(tree)); - break; + case NODE_LAMBDA: + printf("NODE_LAMBDA:\n"); + goto block; - case NODE_CONST: - printf("NODE_CONST %s\n", mrb_sym_name(mrb, sym(tree))); + case NODE_BLOCK: + printf("NODE_BLOCK:\n"); + block: + if (block_node(tree)->locals) { + dump_locals(mrb, block_node(tree)->locals, offset+1, lineno); + } + if (block_node(tree)->args) { + dump_args(mrb, block_node(tree)->args, offset+1, lineno); + } + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, block_node(tree)->body, offset+2); break; - case NODE_MATCH: - printf("NODE_MATCH:\n"); - dump_prefix(tree, offset + 1); - printf("lhs:\n"); - mrb_parser_dump(mrb, tree->car, offset + 2); - dump_prefix(tree, offset + 1); - printf("rhs:\n"); - mrb_parser_dump(mrb, tree->cdr, offset + 2); + case NODE_AND: + printf("NODE_AND:\n"); + dump_node(mrb, and_node(tree)->left, offset+1); + dump_node(mrb, and_node(tree)->right, offset+1); break; - case NODE_BACK_REF: - printf("NODE_BACK_REF: $%c\n", intn(tree)); + case NODE_OR: + printf("NODE_OR:\n"); + dump_node(mrb, or_node(tree)->left, offset+1); + dump_node(mrb, or_node(tree)->right, offset+1); break; - case NODE_NTH_REF: - printf("NODE_NTH_REF: $%d\n", intn(tree)); + case NODE_CASE: + printf("NODE_CASE:\n"); + if (case_node(tree)->value) { + dump_prefix(offset+1, lineno); + printf("value:\n"); + dump_node(mrb, case_node(tree)->value, offset+2); + } + if (case_node(tree)->body) { + node *when_node = case_node(tree)->body; + while (when_node) { + dump_prefix(offset+1, lineno); + printf("when:\n"); + node *when_clause = when_node->car; + if (when_clause && when_clause->car) { + dump_prefix(offset+2, lineno); + printf("cond:\n"); + dump_recur(mrb, when_clause->car, offset+3); + } + if (when_clause && when_clause->cdr) { + dump_prefix(offset+2, lineno); + printf("body:\n"); + dump_node(mrb, when_clause->cdr, offset+3); + } + when_node = when_node->cdr; + } + } break; - case NODE_ARG: - printf("NODE_ARG %s\n", mrb_sym_name(mrb, sym(tree))); + case NODE_WHILE: + printf("NODE_WHILE:\n"); + goto dump_loop_node; + case NODE_UNTIL: + printf("NODE_UNTIL:\n"); + goto dump_loop_node; + case NODE_WHILE_MOD: + printf("NODE_WHILE_MOD:\n"); + goto dump_loop_node; + case NODE_UNTIL_MOD: + printf("NODE_UNTIL_MOD:\n"); + + dump_loop_node: + dump_prefix(offset+1, lineno); + printf("cond:\n"); + dump_node(mrb, while_node(tree)->condition, offset+2); + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, while_node(tree)->body, offset+2); break; - case NODE_BLOCK_ARG: - printf("NODE_BLOCK_ARG:\n"); - mrb_parser_dump(mrb, tree, offset+1); + case NODE_FOR: + printf("NODE_FOR:\n"); + if (for_node(tree)->var) { + dump_prefix(offset+1, lineno); + printf("var:\n"); + /* FOR_NODE_VAR structure: + * var_list->car: cons-list of pre-splat variables + * var_list->cdr->car: splat varnode (not a cons-list) + * var_list->cdr->cdr->car: cons-list of post-splat variables */ + node *var_list = for_node(tree)->var; + if (var_list) { + dump_recur(mrb, var_list->car, offset+2); + if (var_list && var_list->cdr) { + /* Second element is a varnode, not a cons-list */ + dump_prefix(offset+1, lineno); + printf("splat var:\n"); + dump_node(mrb, var_list->cdr->car, offset+2); + if (var_list->cdr->cdr) { + /* Third element is a cons-list of post-splat variables */ + dump_prefix(offset+1, lineno); + printf("post var:\n"); + dump_recur(mrb, var_list->cdr->cdr->car, offset+2); + } + } + } + } + if (for_node(tree)->iterable) { + dump_prefix(offset+1, lineno); + printf("iterable:\n"); + dump_node(mrb, for_node(tree)->iterable, offset+2); + } + if (for_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, for_node(tree)->body, offset+2); + } break; - case NODE_INT: - printf("NODE_INT %s base %d\n", (char*)tree->car, intn(tree->cdr->car)); + case NODE_DOT2: + printf("NODE_DOT2:\n"); + { + if (dot2_node(tree)->left) { + dump_prefix(offset+1, lineno); + printf("left:\n"); + dump_node(mrb, dot2_node(tree)->left, offset+2); + } + if (dot2_node(tree)->right) { + dump_prefix(offset+1, lineno); + printf("right:\n"); + dump_node(mrb, dot2_node(tree)->right, offset+2); + } + } break; - case NODE_FLOAT: - printf("NODE_FLOAT %s\n", (char*)tree); + case NODE_DOT3: + printf("NODE_DOT3:\n"); + { + if (dot3_node(tree)->left) { + dump_prefix(offset+1, lineno); + printf("left:\n"); + dump_node(mrb, dot3_node(tree)->left, offset+2); + } + if (dot3_node(tree)->right) { + dump_prefix(offset+1, lineno); + printf("right:\n"); + dump_node(mrb, dot3_node(tree)->right, offset+2); + } + } break; - case NODE_NEGATE: - printf("NODE_NEGATE:\n"); - mrb_parser_dump(mrb, tree, offset+1); + case NODE_COLON2: + printf("NODE_COLON2:\n"); + if (colon2_node(tree)->base) { + dump_prefix(offset+1, lineno); + printf("base:\n"); + dump_node(mrb, colon2_node(tree)->base, offset+2); + } + dump_prefix(offset+1, lineno); + printf("name: %s\n", mrb_sym_name(mrb, colon2_node(tree)->name)); break; - case NODE_STR: - printf("NODE_STR %s len %d\n", str_dump(mrb, (char*)tree->car, intn(tree->cdr)), intn(tree->cdr)); + case NODE_COLON3: + printf("NODE_COLON3: ::%s\n", mrb_sym_name(mrb, colon3_node(tree)->name)); break; - case NODE_DSTR: - printf("NODE_DSTR:\n"); - dump_recur(mrb, tree, offset+1); + case NODE_HASH: + printf("NODE_HASH:\n"); + { + node *pairs = hash_node(tree)->pairs; + while (pairs) { + dump_prefix(offset+1, lineno); + printf("key:\n"); + if (node_to_sym(pairs->car->car) == MRB_OPSYM(pow)) { + dump_prefix(offset+2, lineno); + printf("**\n"); + } + else { + dump_node(mrb, pairs->car->car, offset+2); + } + dump_prefix(offset+1, lineno); + printf("value:\n"); + dump_node(mrb, pairs->car->cdr, offset+2); + pairs = pairs->cdr; + } + } break; - case NODE_XSTR: - printf("NODE_XSTR %s len %d\n", str_dump(mrb, (char*)tree->car, intn(tree->cdr)), intn(tree->cdr)); + case NODE_SPLAT: + printf("NODE_SPLAT:\n"); + dump_node(mrb, splat_node(tree)->value, offset+1); break; - case NODE_DXSTR: - printf("NODE_DXSTR:\n"); - dump_recur(mrb, tree, offset+1); + case NODE_OP_ASGN: + printf("NODE_OP_ASGN:\n"); + dump_prefix(offset+1, lineno); + printf("lhs:\n"); + dump_node(mrb, op_asgn_node(tree)->lhs, offset+2); + dump_prefix(offset+1, lineno); + printf("op='%s' (%d)\n", mrb_sym_name(mrb, op_asgn_node(tree)->op), (int)op_asgn_node(tree)->op); + dump_node(mrb, op_asgn_node(tree)->rhs, offset+1); break; - case NODE_REGX: - printf("NODE_REGX /%s/%s\n", (char*)tree->car, (char*)tree->cdr); + case NODE_SUPER: + printf("NODE_SUPER:\n"); + if (super_node(tree)->args) { + dump_callargs(mrb, super_node(tree)->args, offset, lineno); + } break; - case NODE_DREGX: - printf("NODE_DREGX:\n"); - dump_recur(mrb, tree->car, offset+1); - dump_prefix(tree, offset); - printf("tail: %s\n", (char*)tree->cdr->cdr->car); - if (tree->cdr->cdr->cdr->car) { - dump_prefix(tree, offset); - printf("opt: %s\n", (char*)tree->cdr->cdr->cdr->car); - } - if (tree->cdr->cdr->cdr->cdr) { - dump_prefix(tree, offset); - printf("enc: %s\n", (char*)tree->cdr->cdr->cdr->cdr); + case NODE_ZSUPER: + printf("NODE_ZSUPER:\n"); + if (super_node(tree)->args) { + dump_callargs(mrb, super_node(tree)->args, offset, lineno); } break; - case NODE_SYM: - printf("NODE_SYM :%s (%d)\n", mrb_sym_dump(mrb, sym(tree)), - intn(tree)); + case NODE_YIELD: + printf("NODE_YIELD:\n"); + if (yield_node(tree)->args) { + dump_callargs(mrb, yield_node(tree)->args, offset, lineno); + } break; - case NODE_DSYM: - printf("NODE_DSYM:\n"); - mrb_parser_dump(mrb, tree, offset+1); + case NODE_REDO: + printf("NODE_REDO\n"); break; - case NODE_WORDS: - printf("NODE_WORDS:\n"); - dump_recur(mrb, tree, offset+1); + case NODE_RETRY: + printf("NODE_RETRY\n"); break; - case NODE_SYMBOLS: - printf("NODE_SYMBOLS:\n"); - dump_recur(mrb, tree, offset+1); + case NODE_BACK_REF: + printf("NODE_BACK_REF: $%c\n", node_to_int(tree)); break; - case NODE_LITERAL_DELIM: - printf("NODE_LITERAL_DELIM\n"); + case NODE_NTH_REF: + printf("NODE_NTH_REF: $%d\n", node_to_int(tree)); break; - case NODE_SELF: - printf("NODE_SELF\n"); + case NODE_BLOCK_ARG: + printf("NODE_BLOCK_ARG:\n"); + dump_node(mrb, block_arg_node(tree)->value, offset+1); break; - case NODE_NIL: - printf("NODE_NIL\n"); + case NODE_REGX: + printf("NODE_REGX:\n"); + if (regx_node(tree)->list) { + dump_str(mrb, regx_node(tree)->list, offset+1, lineno); + } + if (regx_node(tree)->flags) { + dump_prefix(offset+1, lineno); + printf("flags: %s\n", regx_node(tree)->flags); + } + if (regx_node(tree)->encoding) { + dump_prefix(offset+1, lineno); + printf("encoding: %s\n", regx_node(tree)->encoding); + } break; - case NODE_TRUE: - printf("NODE_TRUE\n"); + case NODE_WORDS: + printf("NODE_WORDS:\n"); + if (words_node(tree)->args) { + node *list = words_node(tree)->args; + while (list && list->car) { + node *item = list->car; + if (item->car == 0 && item->cdr == 0) { + /* Skip separator (0 . 0) */ + } + else if (item->car && item->cdr) { + /* String item: (len . str) */ + dump_prefix(offset+1, lineno); + int len = node_to_int(item->car); + if (len >= 0 && len < 1000 && item->cdr) { + printf("word: \"%.*s\"\n", len, (char*)item->cdr); + } + } + list = list->cdr; + } + } break; - case NODE_FALSE: - printf("NODE_FALSE\n"); + case NODE_SYMBOLS: + printf("NODE_SYMBOLS:\n"); + if (symbols_node(tree)->args) { + node *list = symbols_node(tree)->args; + while (list && list->car) { + node *item = list->car; + if (item->car == 0 && item->cdr == 0) { + /* Skip separator (0 . 0) */ + } + else if (item->car && item->cdr) { + /* String item: (len . str) */ + dump_prefix(offset+1, lineno); + int len = node_to_int(item->car); + if (len >= 0 && len < 1000 && item->cdr) { + printf("symbol: \"%.*s\"\n", len, (char*)item->cdr); + } + } + list = list->cdr; + } + } break; case NODE_ALIAS: printf("NODE_ALIAS %s %s:\n", - mrb_sym_dump(mrb, sym(tree->car)), - mrb_sym_dump(mrb, sym(tree->cdr))); + mrb_sym_dump(mrb, node_to_sym(tree->car)), + mrb_sym_dump(mrb, node_to_sym(tree->cdr))); break; case NODE_UNDEF: @@ -7672,7 +8723,7 @@ mrb_parser_dump(mrb_state *mrb, node *tree, int offset) { node *t = tree; while (t) { - printf(" %s", mrb_sym_dump(mrb, sym(t->car))); + printf(" %s", mrb_sym_dump(mrb, node_to_sym(t->car))); t = t->cdr; } } @@ -7681,153 +8732,238 @@ mrb_parser_dump(mrb_state *mrb, node *tree, int offset) case NODE_CLASS: printf("NODE_CLASS:\n"); - if (tree->car->car == nint(0)) { - dump_prefix(tree, offset+1); - printf(":%s\n", mrb_sym_name(mrb, sym(tree->car->cdr))); - } - else if (tree->car->car == nint(1)) { - dump_prefix(tree, offset+1); - printf("::%s\n", mrb_sym_name(mrb, sym(tree->car->cdr))); - } - else { - mrb_parser_dump(mrb, tree->car->car, offset+1); - dump_prefix(tree, offset+1); - printf("::%s\n", mrb_sym_name(mrb, sym(tree->car->cdr))); + if (class_node(tree)->name) { + dump_cpath(mrb, module_node(tree)->name, offset+1, lineno); } - if (tree->cdr->car) { - dump_prefix(tree, offset+1); + if (class_node(tree)->superclass) { + dump_prefix(offset+1, lineno); printf("super:\n"); - mrb_parser_dump(mrb, tree->cdr->car, offset+2); + dump_node(mrb, class_node(tree)->superclass, offset+2); + } + if (class_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, class_node(tree)->body->cdr, offset+2); } - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->cdr->cdr->car->cdr, offset+2); break; case NODE_MODULE: printf("NODE_MODULE:\n"); - if (tree->car->car == nint(0)) { - dump_prefix(tree, offset+1); - printf(":%s\n", mrb_sym_name(mrb, sym(tree->car->cdr))); - } - else if (tree->car->car == nint(1)) { - dump_prefix(tree, offset+1); - printf("::%s\n", mrb_sym_name(mrb, sym(tree->car->cdr))); + if (module_node(tree)->name) { + dump_cpath(mrb, module_node(tree)->name, offset+1, lineno); } - else { - mrb_parser_dump(mrb, tree->car->car, offset+1); - dump_prefix(tree, offset+1); - printf("::%s\n", mrb_sym_name(mrb, sym(tree->car->cdr))); + if (module_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, module_node(tree)->body->cdr, offset+2); } - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->cdr->car->cdr, offset+2); break; case NODE_SCLASS: printf("NODE_SCLASS:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->cdr->car->cdr, offset+2); - break; - - case NODE_DEF: - printf("NODE_DEF:\n"); - dump_prefix(tree, offset+1); - printf("%s\n", mrb_sym_dump(mrb, sym(tree->car))); - tree = tree->cdr; - { - node *n2 = tree->car; - mrb_bool first_lval = TRUE; - - if (n2 && (n2->car || n2->cdr)) { - dump_prefix(n2, offset+1); - printf("local variables:\n"); - dump_prefix(n2, offset+2); - while (n2) { - if (n2->car) { - if (!first_lval) printf(", "); - printf("%s", mrb_sym_name(mrb, sym(n2->car))); - first_lval = FALSE; - } - n2 = n2->cdr; - } - printf("\n"); - } + if (sclass_node(tree)->obj) { + dump_prefix(offset+1, lineno); + printf("obj:\n"); + dump_node(mrb, sclass_node(tree)->obj, offset+2); } - tree = tree->cdr; - if (tree->car) { - dump_args(mrb, tree->car, offset); + if (sclass_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, sclass_node(tree)->body->cdr, offset+2); } - mrb_parser_dump(mrb, tree->cdr->car, offset+1); break; case NODE_SDEF: - printf("NODE_SDEF:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - tree = tree->cdr; - dump_prefix(tree, offset+1); - printf(":%s\n", mrb_sym_dump(mrb, sym(tree->car))); - tree = tree->cdr->cdr; - if (tree->car) { - dump_args(mrb, tree->car, offset+1); + printf("NODE_SDEF: %s\n", mrb_sym_dump(mrb, def_node(tree)->name)); + if (sdef_node(tree)->obj) { + dump_prefix(offset+1, lineno); + printf("recv:\n"); + dump_node(mrb, sdef_node(tree)->obj, offset+2); + } + if (sdef_node(tree)->args) { + dump_args(mrb, sdef_node(tree)->args, offset+1, lineno); + } + if (sdef_node(tree)->locals) { + dump_locals(mrb, sdef_node(tree)->locals, offset+1, lineno); + } + if (sdef_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, sdef_node(tree)->body, offset+2); } - tree = tree->cdr; - mrb_parser_dump(mrb, tree->car, offset+1); break; case NODE_POSTEXE: printf("NODE_POSTEXE:\n"); - mrb_parser_dump(mrb, tree, offset+1); + dump_node(mrb, tree, offset+1); break; case NODE_HEREDOC: - printf("NODE_HEREDOC (<<%s):\n", ((parser_heredoc_info*)tree)->term); - dump_recur(mrb, ((parser_heredoc_info*)tree)->doc, offset+1); + printf("NODE_HEREDOC:\n"); + if (heredoc_node(tree)->info.term) { + dump_prefix(offset+1, lineno); + printf("terminator: \"%s\"\n", heredoc_node(tree)->info.term); + } + if (heredoc_node(tree)->info.doc) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_str(mrb, heredoc_node(tree)->info.doc, offset+2, lineno); + } + if (heredoc_node(tree)->info.allow_indent) { + dump_prefix(offset+1, lineno); + printf("allow_indent: true\n"); + } + if (heredoc_node(tree)->info.remove_indent) { + dump_prefix(offset+1, lineno); + printf("remove_indent: true\n"); + } break; - case NODE_ARGS_TAIL: - printf("NODE_ARGS_TAIL:\n"); - { - node *kws = tree->car; - - while (kws) { - mrb_parser_dump(mrb, kws->car, offset+1); - kws = kws->cdr; + case NODE_CASE_MATCH: + printf("NODE_CASE_MATCH:\n"); + if (case_match_node(tree)->value) { + dump_prefix(offset+1, lineno); + printf("value:\n"); + dump_node(mrb, case_match_node(tree)->value, offset+2); + } + if (case_match_node(tree)->in_clauses) { + node *in_clause = case_match_node(tree)->in_clauses; + while (in_clause) { + dump_node(mrb, in_clause->car, offset+1); + in_clause = in_clause->cdr; } } - tree = tree->cdr; - if (tree->car) { - mrb_assert(intn(tree->car->car) == NODE_KW_REST_ARGS); - mrb_parser_dump(mrb, tree->car, offset+1); + break; + + case NODE_IN: + printf("NODE_IN:\n"); + if (in_node(tree)->pattern) { + dump_prefix(offset+1, lineno); + printf("pattern:\n"); + dump_node(mrb, in_node(tree)->pattern, offset+2); } - tree = tree->cdr; - if (tree->car) { - dump_prefix(tree, offset+1); - printf("block='%s'\n", mrb_sym_name(mrb, sym(tree->car))); + if (in_node(tree)->guard) { + dump_prefix(offset+1, lineno); + printf("guard (%s):\n", in_node(tree)->guard_is_unless ? "unless" : "if"); + dump_node(mrb, in_node(tree)->guard, offset+2); + } + if (in_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, in_node(tree)->body, offset+2); } break; - case NODE_KW_ARG: - printf("NODE_KW_ARG %s:\n", mrb_sym_name(mrb, sym(tree->car))); - mrb_parser_dump(mrb, tree->cdr->car, offset + 1); + case NODE_PAT_VALUE: + printf("NODE_PAT_VALUE:\n"); + if (pat_value_node(tree)->value) { + dump_node(mrb, pat_value_node(tree)->value, offset+1); + } break; - case NODE_KW_REST_ARGS: - if (tree) - printf("NODE_KW_REST_ARGS %s\n", mrb_sym_name(mrb, sym(tree))); - else - printf("NODE_KW_REST_ARGS\n"); + case NODE_PAT_VAR: + if (pat_var_node(tree)->name) { + printf("NODE_PAT_VAR: %s\n", mrb_sym_dump(mrb, pat_var_node(tree)->name)); + } + else { + printf("NODE_PAT_VAR: _ (wildcard)\n"); + } + break; + + case NODE_PAT_PIN: + printf("NODE_PAT_PIN: ^%s\n", mrb_sym_dump(mrb, pat_pin_node(tree)->name)); + break; + + case NODE_PAT_AS: + printf("NODE_PAT_AS: => %s\n", mrb_sym_dump(mrb, pat_as_node(tree)->name)); + if (pat_as_node(tree)->pattern) { + dump_prefix(offset+1, lineno); + printf("pattern:\n"); + dump_node(mrb, pat_as_node(tree)->pattern, offset+2); + } + break; + + case NODE_PAT_ALT: + printf("NODE_PAT_ALT:\n"); + if (pat_alt_node(tree)->left) { + dump_prefix(offset+1, lineno); + printf("left:\n"); + dump_node(mrb, pat_alt_node(tree)->left, offset+2); + } + if (pat_alt_node(tree)->right) { + dump_prefix(offset+1, lineno); + printf("right:\n"); + dump_node(mrb, pat_alt_node(tree)->right, offset+2); + } + break; + + case NODE_PAT_ARRAY: + printf("NODE_PAT_ARRAY:\n"); + if (pat_array_node(tree)->pre) { + dump_prefix(offset+1, lineno); + printf("pre:\n"); + dump_recur(mrb, pat_array_node(tree)->pre, offset+2); + } + if (pat_array_node(tree)->rest) { + dump_prefix(offset+1, lineno); + if (pat_array_node(tree)->rest == (node*)-1) { + printf("rest: * (anonymous)\n"); + } + else { + printf("rest:\n"); + dump_node(mrb, pat_array_node(tree)->rest, offset+2); + } + } + if (pat_array_node(tree)->post) { + dump_prefix(offset+1, lineno); + printf("post:\n"); + dump_recur(mrb, pat_array_node(tree)->post, offset+2); + } + break; + + case NODE_PAT_HASH: + printf("NODE_PAT_HASH:\n"); + if (pat_hash_node(tree)->pairs) { + dump_prefix(offset+1, lineno); + printf("pairs:\n"); + dump_recur(mrb, pat_hash_node(tree)->pairs, offset+2); + } + if (pat_hash_node(tree)->rest) { + dump_prefix(offset+1, lineno); + if (pat_hash_node(tree)->rest == (node*)-1) { + printf("rest: **nil\n"); + } + else { + printf("rest:\n"); + dump_node(mrb, pat_hash_node(tree)->rest, offset+2); + } + } + break; + + case NODE_MATCH_PAT: + printf("NODE_MATCH_PAT%s:\n", match_pat_node(tree)->raise_on_fail ? " (=>)" : " (in)"); + dump_prefix(offset+1, lineno); + printf("value:\n"); + dump_node(mrb, match_pat_node(tree)->value, offset+2); + dump_prefix(offset+1, lineno); + printf("pattern:\n"); + dump_node(mrb, match_pat_node(tree)->pattern, offset+2); break; default: - printf("node type: %d (0x%x)\n", nodetype, (unsigned)nodetype); + /* Fallback: unknown node type - skip like codegen.c does */ + printf("unknown node type %d (0x%x)\n", nodetype, (unsigned)nodetype); break; } #endif } +void +mrb_parser_dump(mrb_state *mrb, node *tree, int offset) +{ + dump_node(mrb, tree, offset); +} + typedef mrb_bool mrb_parser_foreach_top_variable_func(mrb_state *mrb, mrb_sym sym, void *user); void mrb_parser_foreach_top_variable(mrb_state *mrb, struct mrb_parser_state *p, mrb_parser_foreach_top_variable_func *func, void *user); @@ -7835,11 +8971,15 @@ void mrb_parser_foreach_top_variable(mrb_state *mrb, struct mrb_parser_state *p, mrb_parser_foreach_top_variable_func *func, void *user) { const mrb_ast_node *n = p->tree; - if ((intptr_t)n->car == NODE_SCOPE) { - n = n->cdr->car; + if (node_type_p((node*)n, NODE_SCOPE)) { + /* Extract locals from variable-sized NODE_SCOPE */ + struct mrb_ast_scope_node *scope = scope_node(n); + n = scope->locals; for (; n; n = n->cdr) { - mrb_sym sym = sym(n->car); - if (sym && !func(mrb, sym, user)) break; + mrb_sym sym = node_to_sym(n->car); + if (sym != 0) { + if (!func(mrb, sym, user)) break; + } } } } diff --git a/mrbgems/mruby-compiler/core/y.tab.c b/mrbgems/mruby-compiler/core/y.tab.c index dc9996ff46..e847aff084 100644 --- a/mrbgems/mruby-compiler/core/y.tab.c +++ b/mrbgems/mruby-compiler/core/y.tab.c @@ -1,8 +1,8 @@ -/* A Bison parser, made by GNU Bison 3.5.1. */ +/* A Bison parser, made by Lrama 0.7.0. */ /* Bison implementation for Yacc-like parsers in C - Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2020 Free Software Foundation, + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -16,7 +16,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* As a special exception, you may create a larger work that contains part or all of the Bison parser skeleton and distribute that work @@ -34,6 +34,10 @@ /* C LALR(1) parser skeleton written by Richard Stallman, by simplifying the original so-called "semantic" parser. */ +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + /* All symbols defined below should begin with yy or YY, to avoid infringing on user name space. This should be done even for local variables, as they might otherwise be expanded by user macros. @@ -41,14 +45,11 @@ define necessary library symbols; they are noted "INFRINGES ON USER NAME SPACE" below. */ -/* Undocumented macros, especially those whose name start with YY_, - are private implementation details. Do not rely on them. */ - -/* Identify Bison output. */ -#define YYBISON 1 +/* Identify Bison output, and Bison version. */ +#define YYBISON 30802 -/* Bison version. */ -#define YYBISON_VERSION "3.5.1" +/* Bison version string. */ +#define YYBISON_VERSION "3.8.2" /* Skeleton name. */ #define YYSKELETON_NAME "yacc.c" @@ -63,8 +64,6 @@ #define YYPULL 1 - - /* First part of user prologue. */ #line 7 "mrbgems/mruby-compiler/core/parse.y" @@ -75,7 +74,8 @@ #define YYSTACK_USE_ALLOCA 1 #include -#include +#include +#include #include #include #include @@ -85,22 +85,34 @@ #include #include #include -#include #include "node.h" #define YYLEX_PARAM p +#define mrbc_malloc(s) mrb_basic_alloc_func(NULL,(s)) +#define mrbc_realloc(p,s) mrb_basic_alloc_func((p),(s)) +#define mrbc_free(p) mrb_basic_alloc_func((p),0) + typedef mrb_ast_node node; typedef struct mrb_parser_state parser_state; typedef struct mrb_parser_heredoc_info parser_heredoc_info; static int yyparse(parser_state *p); -static int yylex(void *lval, parser_state *p); -static void yyerror(parser_state *p, const char *s); +static int yylex(void *lval, void *lp, parser_state *p); +static void yyerror(void *lp, parser_state *p, const char *s); static void yywarning(parser_state *p, const char *s); static void backref_error(parser_state *p, node *n); static void void_expr_error(parser_state *p, node *n); static void tokadd(parser_state *p, int32_t c); +static const char* tok(parser_state *p); +static int toklen(parser_state *p); + +/* Forward declarations for variable-sized simple node functions */ + +/* Forward declarations for variable-sized advanced node functions */ + +/* Helper function to check node type for both traditional and variable-sized nodes */ +static mrb_bool node_type_p(node *n, enum node_type type); #define identchar(c) (ISALNUM(c) || (c) == '_' || !ISASCII(c)) @@ -121,19 +133,7 @@ typedef unsigned int stack_type; #define CMDARG_LEXPOP() BITSTACK_LEXPOP(p->cmdarg_stack) #define CMDARG_P() BITSTACK_SET_P(p->cmdarg_stack) -#define SET_LINENO(c,n) ((c)->lineno = (n)) -#define NODE_LINENO(c,n) do {\ - if (n) {\ - (c)->filename_index = (n)->filename_index;\ - (c)->lineno = (n)->lineno;\ - }\ -} while (0) - -#define sym(x) ((mrb_sym)(intptr_t)(x)) -#define nsym(x) ((node*)(intptr_t)(x)) -#define nint(x) ((node*)(intptr_t)(x)) -#define intn(x) ((int)(intptr_t)(x)) -#define typen(x) ((enum node_type)(intptr_t)(x)) +#define SET_LINENO(c,n) (((struct mrb_ast_var_header*)(c))->lineno = (n)) #define NUM_SUFFIX_R (1<<0) #define NUM_SUFFIX_I (1<<1) @@ -152,7 +152,7 @@ intern_gen(parser_state *p, const char *s, size_t len) } #define intern(s,len) intern_gen(p,(s),(len)) -#define intern_op(op) MRB_OPSYM_2(p->mrb, op) +#define intern_op(op) MRB_OPSYM(op) static mrb_sym intern_numparam_gen(parser_state *p, int num) @@ -174,7 +174,7 @@ cons_free_gen(parser_state *p, node *cons) static void* parser_palloc(parser_state *p, size_t size) { - void *m = mrb_pool_alloc(p->pool, size); + void *m = mempool_alloc(p->pool, size); if (!m) { MRB_THROW(p->mrb->jmp); @@ -187,27 +187,54 @@ parser_palloc(parser_state *p, size_t size) static node* cons_gen(parser_state *p, node *car, node *cdr) { - node *c; + struct mrb_ast_node *c; + /* Try to reuse from free list first - only for 16-byte nodes */ if (p->cells) { - c = p->cells; + c = (struct mrb_ast_node*)p->cells; p->cells = p->cells->cdr; } else { - c = (node*)parser_palloc(p, sizeof(mrb_ast_node)); + c = (struct mrb_ast_node*)parser_palloc(p, sizeof(struct mrb_ast_node)); } - c->car = car; c->cdr = cdr; - c->lineno = p->lineno; - c->filename_index = p->current_filename_index; - /* beginning of next partial file; need to point the previous file */ + /* Don't initialize location fields for structure nodes - saves CPU */ + return (node*)c; +} + +/* Head-only location optimization: separate functions for head vs structure nodes */ +#define cons(a,b) cons_gen(p,(a),(b)) /* Structure nodes - no location */ +/* Initialize variable node header */ +static void +init_var_header(struct mrb_ast_var_header *header, parser_state *p, enum node_type type) +{ + header->lineno = p->lineno; + header->filename_index = p->current_filename_index; + header->node_type = (uint8_t)type; + + /* Handle file boundary edge case: this node is reduced from a token that + was buffered by bison lookahead before partial_hook switched the file, + so attribute it to the previous file at its last known lineno rather + than the new file at lineno=0. */ if (p->lineno == 0 && p->current_filename_index > 0) { - c->filename_index-- ; + header->filename_index--; + header->lineno = p->prev_file_lineno; } - return c; } -#define cons(a,b) cons_gen(p,(a),(b)) + +/* Combined allocate + init header helper */ +static inline void* +new_node(parser_state *p, size_t size, enum node_type type) +{ + void *n = parser_palloc(p, size); + init_var_header((struct mrb_ast_var_header*)n, p, type); + return n; +} + +/* Type-safe macro wrapper for node allocation */ +#define NEW_NODE(type_name, node_type) \ + (struct mrb_ast_##type_name##_node*)new_node(p, sizeof(struct mrb_ast_##type_name##_node), node_type) static node* list1_gen(parser_state *p, node *a) @@ -219,38 +246,17 @@ list1_gen(parser_state *p, node *a) static node* list2_gen(parser_state *p, node *a, node *b) { - return cons(a, cons(b,0)); + return cons(a, cons(b, 0)); } #define list2(a,b) list2_gen(p, (a),(b)) static node* list3_gen(parser_state *p, node *a, node *b, node *c) { - return cons(a, cons(b, cons(c,0))); + return cons(a, cons(b, cons(c, 0))); } #define list3(a,b,c) list3_gen(p, (a),(b),(c)) -static node* -list4_gen(parser_state *p, node *a, node *b, node *c, node *d) -{ - return cons(a, cons(b, cons(c, cons(d, 0)))); -} -#define list4(a,b,c,d) list4_gen(p, (a),(b),(c),(d)) - -static node* -list5_gen(parser_state *p, node *a, node *b, node *c, node *d, node *e) -{ - return cons(a, cons(b, cons(c, cons(d, cons(e, 0))))); -} -#define list5(a,b,c,d,e) list5_gen(p, (a),(b),(c),(d),(e)) - -static node* -list6_gen(parser_state *p, node *a, node *b, node *c, node *d, node *e, node *f) -{ - return cons(a, cons(b, cons(c, cons(d, cons(e, cons(f, 0)))))); -} -#define list6(a,b,c,d,e,f) list6_gen(p, (a),(b),(c),(d),(e),(f)) - static node* append_gen(parser_state *p, node *a, node *b) { @@ -347,7 +353,7 @@ local_var_p(parser_state *p, mrb_sym sym) while (l) { node *n = l->car; while (n) { - if (sym(n->car) == sym) return TRUE; + if (node_to_sym(n->car) == sym) return TRUE; n = n->cdr; } l = l->cdr; @@ -376,17 +382,17 @@ local_add_f(parser_state *p, mrb_sym sym) if (p->locals) { node *n = p->locals->car; while (n) { - if (sym(n->car) == sym) { + if (node_to_sym(n->car) == sym) { mrb_int len; const char* name = mrb_sym_name_len(p->mrb, sym, &len); if (len > 0 && name[0] != '_') { - yyerror(p, "duplicated argument name"); + yyerror(NULL, p, "duplicated argument name"); return; } } n = n->cdr; } - p->locals->car = push(p->locals->car, nsym(sym)); + p->locals->car = push(p->locals->car, sym_to_node(sym)); } } @@ -398,12 +404,8 @@ local_add(parser_state *p, mrb_sym sym) } } -static void -local_add_blk(parser_state *p, mrb_sym blk) -{ - /* allocate register for block */ - local_add_f(p, blk ? blk : 0); -} +/* allocate register for block */ +#define local_add_blk(p) local_add_f(p, 0) static void local_add_kw(parser_state *p, mrb_sym kwd) @@ -418,16 +420,47 @@ locals_node(parser_state *p) return p->locals ? p->locals->car : NULL; } +/* Helper function to check node type for both traditional and variable-sized nodes */ +static mrb_bool +node_type_p(node *n, enum node_type type) +{ + if (!n) return FALSE; + + /* Check if this is a variable-sized node */ + struct mrb_ast_var_header *header = (struct mrb_ast_var_header*)n; + return ((enum node_type)header->node_type == type); +} + +/* Helper functions for variable-sized node detection */ +static enum node_type +node_type(node *n) +{ + if (!n) return (enum node_type)0; + + /* Try to interpret as variable-sized node */ + struct mrb_ast_var_header *header = (struct mrb_ast_var_header*)n; + enum node_type type = (enum node_type)header->node_type; + + /* Validate that the node type is within valid range for variable-sized nodes */ + if (type >= NODE_SCOPE && type < NODE_LAST) { + return type; + } + + /* If node type is invalid, this is likely a cons-list node */ + /* Return a special sentinel value to indicate cons-list fallback */ + return NODE_LAST; /* Use NODE_LAST as sentinel for cons-list nodes */ +} + static void nvars_nest(parser_state *p) { - p->nvars = cons(nint(0), p->nvars); + p->nvars = cons(int_to_node(0), p->nvars); } static void nvars_block(parser_state *p) { - p->nvars = cons(nint(-2), p->nvars); + p->nvars = cons(int_to_node(-2), p->nvars); } static void @@ -436,30 +469,56 @@ nvars_unnest(parser_state *p) p->nvars = p->nvars->cdr; } -/* (:scope (vars..) (prog...)) */ +/* struct: scope_node(locals, body) */ static node* new_scope(parser_state *p, node *body) { - return cons((node*)NODE_SCOPE, cons(locals_node(p), body)); + struct mrb_ast_scope_node *scope_node = NEW_NODE(scope, NODE_SCOPE); + scope_node->locals = locals_node(p); + scope_node->body = body; + return (node*)scope_node; +} + +/* struct: stmts_node(stmts) - uses cons list */ +static node* +new_stmts(parser_state *p, node *body) +{ + struct mrb_ast_stmts_node *n = NEW_NODE(stmts, NODE_STMTS); + n->stmts = body ? list1(body) : 0; /* Wrap single statement in cons-list */ + + return (node*)n; +} + +/* Helper: push statement to stmts node */ +static node* +stmts_push(parser_state *p, node *stmts, node *stmt) +{ + struct mrb_ast_stmts_node *n = stmts_node(stmts); + n->stmts = push(n->stmts, stmt); + return stmts; } -/* (:begin prog...) */ +/* struct: begin_node(body) */ static node* new_begin(parser_state *p, node *body) { - if (body) { - return list2((node*)NODE_BEGIN, body); - } - return cons((node*)NODE_BEGIN, 0); + struct mrb_ast_begin_node *begin_node = NEW_NODE(begin, NODE_BEGIN); + begin_node->body = body; + return (node*)begin_node; } #define newline_node(n) (n) -/* (:rescue body rescue else) */ +/* struct: rescue_node(body, rescue_clauses, else_clause) */ static node* new_rescue(parser_state *p, node *body, node *resq, node *els) { - return list4((node*)NODE_RESCUE, body, resq, els); + struct mrb_ast_rescue_node *n = NEW_NODE(rescue, NODE_RESCUE); + n->body = body; + n->rescue_clauses = resq; + n->else_clause = els; + + return (node*)n; } static node* @@ -468,435 +527,695 @@ new_mod_rescue(parser_state *p, node *body, node *resq) return new_rescue(p, body, list1(list3(0, 0, resq)), 0); } -/* (:ensure body ensure) */ +/* struct: ensure_node(body, ensure_clause) */ static node* new_ensure(parser_state *p, node *a, node *b) { - return cons((node*)NODE_ENSURE, cons(a, cons(0, b))); + struct mrb_ast_ensure_node *ensure_node = NEW_NODE(ensure, NODE_ENSURE); + ensure_node->body = a; + ensure_node->ensure_clause = b; + return (node*)ensure_node; } -/* (:nil) */ +/* struct: nil_node() */ static node* new_nil(parser_state *p) { - return list1((node*)NODE_NIL); + struct mrb_ast_nil_node *n = NEW_NODE(nil, NODE_NIL); + + return (node*)n; } -/* (:true) */ +/* struct: true_node() */ static node* new_true(parser_state *p) { - return list1((node*)NODE_TRUE); + struct mrb_ast_true_node *n = NEW_NODE(true, NODE_TRUE); + + return (node*)n; } -/* (:false) */ +/* struct: false_node() */ static node* new_false(parser_state *p) { - return list1((node*)NODE_FALSE); + struct mrb_ast_false_node *n = NEW_NODE(false, NODE_FALSE); + + return (node*)n; } -/* (:alias new old) */ +/* struct: alias_node(new_name, old_name) */ static node* new_alias(parser_state *p, mrb_sym a, mrb_sym b) { - return cons((node*)NODE_ALIAS, cons(nsym(a), nsym(b))); + struct mrb_ast_alias_node *alias_node = NEW_NODE(alias, NODE_ALIAS); + alias_node->new_name = a; + alias_node->old_name = b; + return (node*)alias_node; } -/* (:if cond then else) */ +/* struct: if_node(cond, then_body, else_body) */ static node* -new_if(parser_state *p, node *a, node *b, node *c) +new_if(parser_state *p, node *condition, node *then_body, node *else_body) { - void_expr_error(p, a); - return list4((node*)NODE_IF, a, b, c); + void_expr_error(p, condition); + + struct mrb_ast_if_node *n = NEW_NODE(if, NODE_IF); + n->condition = condition; + n->then_body = then_body; + n->else_body = else_body; + + return (node*)n; } -/* (:unless cond then else) */ +/* struct: while_node(cond, body) */ static node* -new_unless(parser_state *p, node *a, node *b, node *c) +new_while(parser_state *p, node *condition, node *body) { - void_expr_error(p, a); - return list4((node*)NODE_IF, a, c, b); + void_expr_error(p, condition); + + struct mrb_ast_while_node *n = NEW_NODE(while, NODE_WHILE); + n->condition = condition; + n->body = body; + + return (node*)n; } -/* (:while cond body) */ +/* struct: until_node(cond, body) */ static node* -new_while(parser_state *p, node *a, node *b) +new_until(parser_state *p, node *condition, node *body) { - void_expr_error(p, a); - return cons((node*)NODE_WHILE, cons(a, b)); + void_expr_error(p, condition); + + struct mrb_ast_until_node *n = NEW_NODE(until, NODE_UNTIL); + n->condition = condition; + n->body = body; + + return (node*)n; } -/* (:until cond body) */ +/* struct: while_node(cond, body) */ static node* -new_until(parser_state *p, node *a, node *b) +new_while_mod(parser_state *p, node *condition, node *body) { - void_expr_error(p, a); - return cons((node*)NODE_UNTIL, cons(a, b)); + node *while_node = new_while(p, condition, body); + struct mrb_ast_while_node *n = (struct mrb_ast_while_node*)while_node; + n->header.node_type = NODE_WHILE_MOD; + return while_node; +} + +/* struct: until_node(cond, body) */ +static node* +new_until_mod(parser_state *p, node *a, node *b) +{ + node *until_node = new_until(p, a, b); + struct mrb_ast_until_node *n = (struct mrb_ast_until_node*)until_node; + n->header.node_type = NODE_UNTIL_MOD; + return until_node; } -/* (:for var obj body) */ + +/* struct: for_node(var, obj, body) */ static node* new_for(parser_state *p, node *v, node *o, node *b) { void_expr_error(p, o); - return list4((node*)NODE_FOR, v, o, b); + + struct mrb_ast_for_node *n = NEW_NODE(for, NODE_FOR); + n->var = v; + n->iterable = o; + n->body = b; + + return (node*)n; } -/* (:case a ((when ...) body) ((when...) body)) */ +/* struct: case_node(expr, when_clauses) - uses cons list */ static node* new_case(parser_state *p, node *a, node *b) { - node *n = list2((node*)NODE_CASE, a); - node *n2 = n; - void_expr_error(p, a); - while (n2->cdr) { - n2 = n2->cdr; + + struct mrb_ast_case_node *n = NEW_NODE(case, NODE_CASE); + n->value = a; + n->body = b; + + return (node*)n; +} + +/* Pattern matching case/in expression */ +static node* +new_case_match(parser_state *p, node *val, node *in_clauses) +{ + void_expr_error(p, val); + + struct mrb_ast_case_match_node *n = NEW_NODE(case_match, NODE_CASE_MATCH); + n->value = val; + n->in_clauses = in_clauses; + + return (node*)n; +} + +/* Create value pattern node */ +static node* +new_pat_value(parser_state *p, node *val) +{ + struct mrb_ast_pat_value_node *n = NEW_NODE(pat_value, NODE_PAT_VALUE); + n->value = val; + return (node*)n; +} + +/* Create variable pattern node */ +static node* +new_pat_var(parser_state *p, mrb_sym name) +{ + struct mrb_ast_pat_var_node *n = NEW_NODE(pat_var, NODE_PAT_VAR); + n->name = name; + /* Register as local variable if not wildcard */ + if (name) { + local_add(p, name); } - n2->cdr = b; - return n; + return (node*)n; +} + +/* Create pin pattern node (^var) */ +static node* +new_pat_pin(parser_state *p, mrb_sym name) +{ + struct mrb_ast_pat_pin_node *n = NEW_NODE(pat_pin, NODE_PAT_PIN); + n->name = name; + /* Pin operator references existing variable, does not create new binding */ + return (node*)n; +} + +/* Create as pattern node (pattern => var) */ +static node* +new_pat_as(parser_state *p, node *pattern, mrb_sym name) +{ + struct mrb_ast_pat_as_node *n = NEW_NODE(pat_as, NODE_PAT_AS); + n->pattern = pattern; + n->name = name; + local_add(p, name); + return (node*)n; +} + +/* Create alternative pattern node (pat1 | pat2) */ +static node* +new_pat_alt(parser_state *p, node *left, node *right) +{ + struct mrb_ast_pat_alt_node *n = NEW_NODE(pat_alt, NODE_PAT_ALT); + n->left = left; + n->right = right; + return (node*)n; +} + +/* Create array pattern node [a, b, *rest, c] */ +static node* +new_pat_array(parser_state *p, node *pre, node *rest, node *post) +{ + struct mrb_ast_pat_array_node *n = NEW_NODE(pat_array, NODE_PAT_ARRAY); + n->pre = pre; + n->rest = rest; + n->post = post; + return (node*)n; +} + +/* Create find pattern node [*pre, elems, *post] */ +static node* +new_pat_find(parser_state *p, node *pre, node *elems, node *post) +{ + struct mrb_ast_pat_find_node *n = NEW_NODE(pat_find, NODE_PAT_FIND); + n->pre = pre; + n->elems = elems; + n->post = post; + return (node*)n; +} + +/* Create hash pattern node {a:, b: x, **rest} */ +static node* +new_pat_hash(parser_state *p, node *pairs, node *rest) +{ + struct mrb_ast_pat_hash_node *n = NEW_NODE(pat_hash, NODE_PAT_HASH); + n->pairs = pairs; + n->rest = rest; + return (node*)n; +} + +/* Create one-line pattern matching node (expr in pattern / expr => pattern) */ +static node* +new_match_pat(parser_state *p, node *value, node *pattern, mrb_bool raise_on_fail) +{ + struct mrb_ast_match_pat_node *n = NEW_NODE(match_pat, NODE_MATCH_PAT); + n->value = value; + n->pattern = pattern; + n->raise_on_fail = raise_on_fail; + return (node*)n; +} + +/* Create in-clause node for case/in */ +static node* +new_in(parser_state *p, node *pattern, node *guard, node *body, mrb_bool guard_is_unless) +{ + struct mrb_ast_in_node *n = NEW_NODE(in, NODE_IN); + n->pattern = pattern; + n->guard = guard; + n->body = body; + n->guard_is_unless = guard_is_unless; + return (node*)n; } -/* (:postexe a) */ +/* struct: postexe_node(body) */ static node* new_postexe(parser_state *p, node *a) { - return cons((node*)NODE_POSTEXE, a); + struct mrb_ast_postexe_node *postexe_node = NEW_NODE(postexe, NODE_POSTEXE); + postexe_node->body = a; + return (node*)postexe_node; } -/* (:self) */ +/* struct: self_node() */ static node* new_self(parser_state *p) { - return list1((node*)NODE_SELF); + struct mrb_ast_self_node *n = NEW_NODE(self, NODE_SELF); + + return (node*)n; } -/* (:call a b c) */ +/* struct: call_node(receiver, method, args) */ static node* -new_call(parser_state *p, node *a, mrb_sym b, node *c, int pass) +new_call(parser_state *p, node *receiver, mrb_sym method, node *args, int pass) { - node *n = list4(nint(pass?NODE_CALL:NODE_SCALL), a, nsym(b), c); - void_expr_error(p, a); - NODE_LINENO(n, a); - return n; + /* Calculate size needed (fixed size now) */ struct mrb_ast_call_node *n = NEW_NODE(call, NODE_CALL); + n->receiver = receiver; + n->method_name = method; + n->safe_call = (pass == 0); /* pass == 0 means safe call (&.) */ + + /* Store args pointer directly - no need to unpack and repack */ + n->args = args; + + void_expr_error(p, receiver); + return (node*)n; } -/* (:fcall self mid args) */ +/* struct: fcall_node(method, args) */ static node* new_fcall(parser_state *p, mrb_sym b, node *c) { - node *n = list4((node*)NODE_FCALL, 0, nsym(b), c); - NODE_LINENO(n, c); - return n; + return new_call(p, NULL, b, c, '.'); } /* (a b . c) */ static node* new_callargs(parser_state *p, node *a, node *b, node *c) { - return cons(a, cons(b, c)); + /* Allocate struct mrb_ast_callargs (fixed size, like new_args) */ + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)parser_palloc(p, sizeof(struct mrb_ast_callargs)); + + /* Initialize members directly */ + callargs->regular_args = a; /* Cons list of regular arguments (preserves splat compatibility) */ + callargs->keyword_args = b; /* Keyword arguments hash node */ + callargs->block_arg = c; /* Block argument node */ + + /* Return direct cast to node (like new_args) */ + return (node*)callargs; } -/* (:super . c) */ +/* struct: super_node(args) */ static node* new_super(parser_state *p, node *c) { - return cons((node*)NODE_SUPER, c); + struct mrb_ast_super_node *n = NEW_NODE(super, NODE_SUPER); + n->args = c; + + return (node*)n; } -/* (:zsuper) */ +/* struct: zsuper_node() */ static node* new_zsuper(parser_state *p) { - return cons((node*)NODE_ZSUPER, 0); + struct mrb_ast_super_node *n = NEW_NODE(super, NODE_ZSUPER); + n->args = NULL; /* zsuper initially has no args, but may be added by call_with_block */ + return (node*)n; } -/* (:yield . c) */ +/* struct: yield_node(args) */ static node* new_yield(parser_state *p, node *c) { + /* Handle callargs structure - direct casting like new_args() */ if (c) { - if (c->cdr) { - if (c->cdr->cdr) { - yyerror(p, "both block arg and actual block given"); - } - if (c->cdr->car) { - return cons((node*)NODE_YIELD, push(c->car, c->cdr->car)); - } + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)c; + if (callargs->block_arg) { + yyerror(NULL, p, "both block arg and actual block given"); } - return cons((node*)NODE_YIELD, c->car); - } - return cons((node*)NODE_YIELD, 0); + } struct mrb_ast_yield_node *n = NEW_NODE(yield, NODE_YIELD); + n->args = c; + + return (node*)n; } -/* (:return . c) */ +/* struct: return_node(value) */ static node* new_return(parser_state *p, node *c) { - return cons((node*)NODE_RETURN, c); + struct mrb_ast_return_node *n = NEW_NODE(return, NODE_RETURN); + n->args = c; + + return (node*)n; } -/* (:break . c) */ +/* struct: break_node(value) */ static node* new_break(parser_state *p, node *c) { - return cons((node*)NODE_BREAK, c); + struct mrb_ast_break_node *n = NEW_NODE(break, NODE_BREAK); + n->value = c; + return (node*)n; } -/* (:next . c) */ +/* struct: next_node(value) */ static node* new_next(parser_state *p, node *c) { - return cons((node*)NODE_NEXT, c); + struct mrb_ast_next_node *n = NEW_NODE(next, NODE_NEXT); + n->value = c; + return (node*)n; } -/* (:redo) */ +/* struct: redo_node() */ static node* new_redo(parser_state *p) { - return list1((node*)NODE_REDO); + struct mrb_ast_redo_node *n = NEW_NODE(redo, NODE_REDO); + return (node*)n; } -/* (:retry) */ +/* struct: retry_node() */ static node* new_retry(parser_state *p) { - return list1((node*)NODE_RETRY); + struct mrb_ast_retry_node *n = NEW_NODE(retry, NODE_RETRY); + return (node*)n; } -/* (:dot2 a b) */ +/* struct: dot2_node(beg, end) */ static node* new_dot2(parser_state *p, node *a, node *b) { - return cons((node*)NODE_DOT2, cons(a, b)); + struct mrb_ast_dot2_node *n = NEW_NODE(dot2, NODE_DOT2); + n->left = a; + n->right = b; + + return (node*)n; } -/* (:dot3 a b) */ +/* struct: dot3_node(beg, end) */ static node* new_dot3(parser_state *p, node *a, node *b) { - return cons((node*)NODE_DOT3, cons(a, b)); + struct mrb_ast_dot3_node *n = NEW_NODE(dot3, NODE_DOT3); + n->left = a; + n->right = b; + + return (node*)n; } -/* (:colon2 b c) */ +/* struct: colon2_node(base, name) */ static node* new_colon2(parser_state *p, node *b, mrb_sym c) { void_expr_error(p, b); - return cons((node*)NODE_COLON2, cons(b, nsym(c))); + + struct mrb_ast_colon2_node *colon2_node = NEW_NODE(colon2, NODE_COLON2); + colon2_node->base = b; + colon2_node->name = c; + return (node*)colon2_node; } -/* (:colon3 . c) */ +/* struct: colon3_node(name) */ static node* new_colon3(parser_state *p, mrb_sym c) { - return cons((node*)NODE_COLON3, nsym(c)); + struct mrb_ast_colon3_node *colon3_node = NEW_NODE(colon3, NODE_COLON3); + colon3_node->name = c; + return (node*)colon3_node; } -/* (:and a b) */ +/* struct: and_node(left, right) */ static node* new_and(parser_state *p, node *a, node *b) { void_expr_error(p, a); - return cons((node*)NODE_AND, cons(a, b)); + + struct mrb_ast_and_node *n = NEW_NODE(and, NODE_AND); + n->left = a; + n->right = b; + + return (node*)n; } -/* (:or a b) */ +/* struct: or_node(left, right) */ static node* new_or(parser_state *p, node *a, node *b) { void_expr_error(p, a); - return cons((node*)NODE_OR, cons(a, b)); + + struct mrb_ast_or_node *n = NEW_NODE(or, NODE_OR); + n->left = a; + n->right = b; + + return (node*)n; } -/* (:array a...) */ +/* struct: array_node(elements) - uses cons list */ static node* new_array(parser_state *p, node *a) { - return cons((node*)NODE_ARRAY, a); + struct mrb_ast_array_node *n = NEW_NODE(array, NODE_ARRAY); + n->elements = a; + + return (node*)n; } -/* (:splat . a) */ +/* struct: splat_node(value) */ static node* new_splat(parser_state *p, node *a) { void_expr_error(p, a); - return cons((node*)NODE_SPLAT, a); + + struct mrb_ast_splat_node *splat_node = NEW_NODE(splat, NODE_SPLAT); + splat_node->value = a; + return (node*)splat_node; } -/* (:hash (k . v) (k . v)...) */ +/* struct: hash_node(pairs) - uses cons list */ static node* new_hash(parser_state *p, node *a) { - return cons((node*)NODE_HASH, a); -} + struct mrb_ast_hash_node *n = NEW_NODE(hash, NODE_HASH); + n->pairs = a; -/* (:kw_hash (k . v) (k . v)...) */ -static node* -new_kw_hash(parser_state *p, node *a) -{ - return cons((node*)NODE_KW_HASH, a); + return (node*)n; } /* (:sym . a) */ +/* Symbol node creation - supports both variable and legacy modes */ static node* new_sym(parser_state *p, mrb_sym sym) { - return cons((node*)NODE_SYM, nsym(sym)); -} - -static mrb_sym -new_strsym(parser_state *p, node* str) -{ - const char *s = (const char*)str->cdr->car; - size_t len = (size_t)str->cdr->cdr; + struct mrb_ast_sym_node *n = NEW_NODE(sym, NODE_SYM); + n->symbol = sym; - return mrb_intern(p->mrb, s, len); + return (node*)n; } -/* (:lvar . a) */ static node* -new_lvar(parser_state *p, mrb_sym sym) +new_xvar(parser_state *p, mrb_sym sym, enum node_type type) { - return cons((node*)NODE_LVAR, nsym(sym)); -} + struct mrb_ast_var_node *n = NEW_NODE(var, type); + n->symbol = sym; -/* (:gvar . a) */ -static node* -new_gvar(parser_state *p, mrb_sym sym) -{ - return cons((node*)NODE_GVAR, nsym(sym)); + return (node*)n; } -/* (:ivar . a) */ -static node* -new_ivar(parser_state *p, mrb_sym sym) -{ - return cons((node*)NODE_IVAR, nsym(sym)); -} +#define new_lvar(p, sym) new_xvar(p, sym, NODE_LVAR) +#define new_ivar(p, sym) new_xvar(p, sym, NODE_IVAR) +#define new_gvar(p, sym) new_xvar(p, sym, NODE_GVAR) +#define new_cvar(p, sym) new_xvar(p, sym, NODE_CVAR) -/* (:cvar . a) */ -static node* -new_cvar(parser_state *p, mrb_sym sym) +static mrb_sym +new_strsym(parser_state *p, node* str) { - return cons((node*)NODE_CVAR, nsym(sym)); + size_t len = (size_t)str->car; + const char *s = (const char*)str->cdr; + + return mrb_intern(p->mrb, s, len); } /* (:nvar . a) */ static node* new_nvar(parser_state *p, int num) { - return cons((node*)NODE_NVAR, nint(num)); + int nvar; + node *nvars = p->nvars->cdr; + while (nvars) { + nvar = node_to_int(nvars->car); + if (nvar == -2) break; /* top of the scope */ + if (nvar > 0) { + yyerror(NULL, p, "numbered parameter used in outer block"); + break; + } + nvars->car = int_to_node(-1); + nvars = nvars->cdr; + } + nvar = node_to_int(p->nvars->car); + if (nvar == -1) { + yyerror(NULL, p, "numbered parameter used in inner block"); + } + else { + p->nvars->car = int_to_node(nvar > num ? nvar : num); + } + struct mrb_ast_nvar_node *n = NEW_NODE(nvar, NODE_NVAR); + n->num = num; + return (node*)n; } -/* (:const . a) */ +/* struct: const_node(name) */ static node* new_const(parser_state *p, mrb_sym sym) { - return cons((node*)NODE_CONST, nsym(sym)); + struct mrb_ast_const_node *n = NEW_NODE(const, NODE_CONST); + n->symbol = sym; + + return (node*)n; } -/* (:undef a...) */ +/* struct: undef_node(syms) - uses cons list */ static node* -new_undef(parser_state *p, mrb_sym sym) +new_undef(parser_state *p, node *syms) { - return list2((node*)NODE_UNDEF, nsym(sym)); + struct mrb_ast_undef_node *undef_node = NEW_NODE(undef, NODE_UNDEF); + undef_node->syms = syms; + return (node*)undef_node; } -/* (:class class super body) */ +/* struct: class_node(path, super, body) */ static node* new_class(parser_state *p, node *c, node *s, node *b) { void_expr_error(p, s); - return list4((node*)NODE_CLASS, c, s, cons(locals_node(p), b)); + + struct mrb_ast_class_node *n = NEW_NODE(class, NODE_CLASS); + n->name = c; + n->superclass = s; + n->body = cons(locals_node(p), b); + + return (node*)n; } -/* (:sclass obj body) */ +/* struct: sclass_node(obj, body) */ static node* new_sclass(parser_state *p, node *o, node *b) { void_expr_error(p, o); - return list3((node*)NODE_SCLASS, o, cons(locals_node(p), b)); + + struct mrb_ast_sclass_node *n = NEW_NODE(sclass, NODE_SCLASS); + n->obj = o; + n->body = cons(locals_node(p), b); + + return (node*)n; } -/* (:module module body) */ +/* struct: module_node(path, body) */ static node* new_module(parser_state *p, node *m, node *b) { - return list3((node*)NODE_MODULE, m, cons(locals_node(p), b)); + struct mrb_ast_module_node *n = NEW_NODE(module, NODE_MODULE); + n->name = m; + n->body = cons(locals_node(p), b); + + return (node*)n; } -/* (:def m lv (arg . body)) */ +/* struct: def_node(name, args, body) */ static node* -new_def(parser_state *p, mrb_sym m, node *a, node *b) +new_def(parser_state *p, mrb_sym name) { - return list5((node*)NODE_DEF, nsym(m), 0, a, b); + struct mrb_ast_def_node *n = NEW_NODE(def, NODE_DEF); + n->name = name; + n->args = (struct mrb_ast_args *)int_to_node(p->cmdarg_stack); + n->locals = local_switch(p); + n->body = NULL; + + return (node*)n; } static void defn_setup(parser_state *p, node *d, node *a, node *b) { - node *n = d->cdr->cdr; + struct mrb_ast_def_node *n = def_node(d); + node *locals = n->locals; - n->car = locals_node(p); - p->cmdarg_stack = intn(n->cdr->car); - n->cdr->car = a; - local_resume(p, n->cdr->cdr->car); - n->cdr->cdr->car = b; + n->locals = locals_node(p); + p->cmdarg_stack = node_to_int(n->args); + n->args = (struct mrb_ast_args *)a; + n->body = b; + local_resume(p, locals); } -/* (:sdef obj m lv (arg . body)) */ +/* struct: sdef_node(obj, name, args, body) */ static node* -new_sdef(parser_state *p, node *o, mrb_sym m, node *a, node *b) +new_sdef(parser_state *p, node *o, mrb_sym name) { void_expr_error(p, o); - return list6((node*)NODE_SDEF, o, nsym(m), 0, a, b); -} - -static void -defs_setup(parser_state *p, node *d, node *a, node *b) -{ - node *n = d->cdr->cdr->cdr; - - n->car = locals_node(p); - p->cmdarg_stack = intn(n->cdr->car); - n->cdr->car = a; - local_resume(p, n->cdr->cdr->car); - n->cdr->cdr->car = b; -} -/* (:arg . sym) */ -static node* -new_arg(parser_state *p, mrb_sym sym) -{ - return cons((node*)NODE_ARG, nsym(sym)); + struct mrb_ast_sdef_node *sdef_node = NEW_NODE(sdef, NODE_SDEF); + sdef_node->obj = o; + sdef_node->name = name; + sdef_node->args = (struct mrb_ast_args *)int_to_node(p->cmdarg_stack); + sdef_node->locals = local_switch(p); + sdef_node->body = NULL; + return (node*)sdef_node; } static void local_add_margs(parser_state *p, node *n) { while (n) { - if (typen(n->car->car) == NODE_MASGN) { - node *t = n->car->cdr->cdr; + if (node_type(n->car) == NODE_MARG) { + struct mrb_ast_masgn_node *masgn_n = (struct mrb_ast_masgn_node*)n->car; + node *rhs = masgn_n->rhs; + + /* For parameter destructuring, rhs contains the locals */ + if (rhs) { + node *t = rhs; + while (t) { + local_add_f(p, node_to_sym(t->car)); + t = t->cdr; + } + /* Clear cons list RHS immediately after use */ + masgn_n->rhs = NULL; + } - n->car->cdr->cdr = NULL; - while (t) { - local_add_f(p, sym(t->car)); - t = t->cdr; + /* Process nested destructuring in lhs components */ + if (masgn_n->pre) { + local_add_margs(p, masgn_n->pre); + } + if (masgn_n->post) { + local_add_margs(p, masgn_n->post); } - local_add_margs(p, n->car->cdr->car->car); - local_add_margs(p, n->car->cdr->car->cdr->cdr->car); } n = n->cdr; } } + static void local_add_lv(parser_state *p, node *lv) { while (lv) { - local_add_f(p, sym(lv->car)); + local_add_f(p, node_to_sym(lv->car)); lv = lv->cdr; } } @@ -910,65 +1229,91 @@ local_add_lv(parser_state *p, node *lv) static node* new_args(parser_state *p, node *m, node *opt, mrb_sym rest, node *m2, node *tail) { - node *n; - local_add_margs(p, m); local_add_margs(p, m2); - n = cons(m2, tail); - n = cons(nsym(rest), n); - n = cons(opt, n); + + /* Save original optional arguments before processing */ + node *orig_opt = opt; + + /* Process optional arguments (keep original side effects) */ while (opt) { /* opt: (sym . (opt . lv)) -> (sym . opt) */ local_add_lv(p, opt->car->cdr->cdr); opt->car->cdr = opt->car->cdr->car; opt = opt->cdr; } - return cons(m, n); + + /* Allocate struct mrb_ast_args (no hdr) */ + struct mrb_ast_args *args = (struct mrb_ast_args*)parser_palloc(p, sizeof(struct mrb_ast_args)); + + /* Initialize members */ + args->mandatory_args = m; + args->optional_args = orig_opt; + args->rest_arg = rest; + args->post_mandatory_args = m2; + + /* Deconstruct tail cons list: (kws . (kwrest . blk)) */ + if (tail) { + args->keyword_args = (node*)tail->car; /* kws */ + args->kwrest_arg = (mrb_sym)(intptr_t)tail->cdr->car; /* kwrest */ + args->block_arg = (mrb_sym)(intptr_t)tail->cdr->cdr; /* blk */ + cons_free(tail->cdr); + cons_free(tail); + } + else { + args->keyword_args = NULL; + args->kwrest_arg = 0; + args->block_arg = 0; + } + + return (node*)args; } -/* (:args_tail keywords rest_keywords_sym block_sym) */ +/* struct: args_tail_node(kwargs, kwrest, block) */ static node* -new_args_tail(parser_state *p, node *kws, node *kwrest, mrb_sym blk) +new_args_tail(parser_state *p, node *kws, mrb_sym kwrest, mrb_sym blk) { node *k; if (kws || kwrest) { - local_add_kw(p, (kwrest && kwrest->cdr)? sym(kwrest->cdr) : 0); + local_add_kw(p, kwrest); } - local_add_blk(p, blk); + local_add_blk(p); + if (blk && blk != MRB_SYM(nil)) local_add_f(p, blk); /* allocate register for keywords arguments */ /* order is for Proc#parameters */ for (k = kws; k; k = k->cdr) { - if (!k->car->cdr->cdr->car) { /* allocate required keywords */ - local_add_f(p, sym(k->car->cdr->car)); + if (!k->car->cdr) { /* allocate required keywords - simplified structure: (key . NULL) */ + local_add_f(p, node_to_sym(k->car->car)); } } for (k = kws; k; k = k->cdr) { - if (k->car->cdr->cdr->car) { /* allocate keywords with default */ - local_add_lv(p, k->car->cdr->cdr->car->cdr); - k->car->cdr->cdr->car = k->car->cdr->cdr->car->car; - local_add_f(p, sym(k->car->cdr->car)); + if (k->car->cdr) { /* allocate keywords with default - simplified structure: (key . value) */ + local_add_lv(p, k->car->cdr->cdr); /* value->cdr for default args */ + k->car->cdr = k->car->cdr->car; /* value->car for default args */ + local_add_f(p, node_to_sym(k->car->car)); } } - return list4((node*)NODE_ARGS_TAIL, kws, kwrest, nsym(blk)); + /* Return cons list: (keyword . (kwrest . blk)) */ + return cons(kws, cons(sym_to_node(kwrest), sym_to_node(blk))); } -/* (:kw_arg kw_sym def_arg) */ +/* (kw_sym . def_arg) - simplified from NODE_KW_ARG wrapper */ static node* new_kw_arg(parser_state *p, mrb_sym kw, node *def_arg) { mrb_assert(kw); - return list3((node*)NODE_KW_ARG, nsym(kw), def_arg); + return cons(sym_to_node(kw), def_arg); } /* (:kw_rest_args . a) */ static node* new_kw_rest_args(parser_state *p, mrb_sym sym) { - return cons((node*)NODE_KW_REST_ARGS, nsym(sym)); + return sym_to_node(intern_op(pow)); /* Use ** symbol as direct marker */ } static node* @@ -978,27 +1323,30 @@ new_args_dots(parser_state *p, node *m) mrb_sym k = intern_op(pow); mrb_sym b = intern_op(and); local_add_f(p, r); - return new_args(p, m, 0, r, 0, - new_args_tail(p, 0, new_kw_rest_args(p, k), b)); + return new_args(p, m, 0, r, 0, new_args_tail(p, NULL, k, b)); } -/* (:block_arg . a) */ +/* struct: block_arg_node(value) */ static node* new_block_arg(parser_state *p, node *a) { - return cons((node*)NODE_BLOCK_ARG, a); + struct mrb_ast_block_arg_node *block_arg_node = NEW_NODE(block_arg, NODE_BLOCK_ARG); + block_arg_node->value = a; + return (node*)block_arg_node; } static node* setup_numparams(parser_state *p, node *a) { - int nvars = intn(p->nvars->car); + int nvars = node_to_int(p->nvars->car); if (nvars > 0) { int i; mrb_sym sym; - // m || opt || rest || tail - if (a && (a->car || (a->cdr && a->cdr->car) || (a->cdr->cdr && a->cdr->cdr->car) || (a->cdr->cdr->cdr->cdr && a->cdr->cdr->cdr->cdr->car))) { - yyerror(p, "ordinary parameter is defined"); + // Check if any arguments are already defined + struct mrb_ast_args *args = (struct mrb_ast_args *)a; + if (a && (args->mandatory_args || args->optional_args || args->rest_arg || + args->post_mandatory_args || args->keyword_args || args->kwrest_arg)) { + yyerror(NULL, p, "ordinary parameter is defined"); } else if (p->locals) { /* p->locals should not be NULL unless error happens before the point */ @@ -1010,8 +1358,8 @@ setup_numparams(parser_state *p, node *a) buf[1] = i+'0'; buf[2] = '\0'; sym = intern_cstr(buf); - args = cons(new_arg(p, sym), args); - p->locals->car = cons(nsym(sym), p->locals->car); + args = cons(new_lvar(p, sym), args); + p->locals->car = cons(sym_to_node(sym), p->locals->car); } a = new_args(p, args, 0, 0, 0, 0); } @@ -1019,244 +1367,332 @@ setup_numparams(parser_state *p, node *a) return a; } -/* (:block arg body) */ +/* struct: block_node(args, body) */ static node* new_block(parser_state *p, node *a, node *b) { - a = setup_numparams(p, a); - return list4((node*)NODE_BLOCK, locals_node(p), a, b); + a = setup_numparams(p, a); struct mrb_ast_block_node *n = NEW_NODE(block, NODE_BLOCK); + n->locals = locals_node(p); + n->args = (struct mrb_ast_args *)a; + n->body = b; + + return (node*)n; } -/* (:lambda arg body) */ +/* struct: lambda_node(args, body) */ static node* new_lambda(parser_state *p, node *a, node *b) { - return list4((node*)NODE_LAMBDA, locals_node(p), a, b); + a = setup_numparams(p, a); struct mrb_ast_lambda_node *lambda_node = NEW_NODE(lambda, NODE_LAMBDA); + lambda_node->locals = locals_node(p); + lambda_node->args = (struct mrb_ast_args *)a; + lambda_node->body = b; + return (node*)lambda_node; } -/* (:asgn lhs rhs) */ +/* struct: asgn_node(lhs, rhs) */ static node* new_asgn(parser_state *p, node *a, node *b) { void_expr_error(p, b); - return cons((node*)NODE_ASGN, cons(a, b)); + + struct mrb_ast_asgn_node *n = NEW_NODE(asgn, NODE_ASGN); + n->lhs = a; + n->rhs = b; + + return (node*)n; +} + +/* Helper function to create MASGN/MARG nodes */ +static node* +new_masgn_helper(parser_state *p, node *a, node *b, enum node_type node_type) +{ + struct mrb_ast_masgn_node *n = NEW_NODE(masgn, node_type); + + /* Extract pre, rest, post from cons list structure (a b c) */ + if (a) { + n->pre = a->car; /* Pre-splat variables */ + if (a->cdr) { + n->rest = a->cdr->car; /* Splat variable (or -1 for anonymous) */ + if (a->cdr->cdr) { + n->post = a->cdr->cdr->car; /* Post-splat variables */ + cons_free(a->cdr->cdr); + } + else { + n->post = NULL; + } + cons_free(a->cdr); + } + else { + n->rest = NULL; + n->post = NULL; + } + cons_free(a); + } + else { + n->pre = NULL; + n->rest = NULL; + n->post = NULL; + } + n->rhs = b; + + return (node*)n; } -/* (:masgn mlhs=(pre rest post) mrhs) */ +/* struct: masgn_node(lhs, rhs) */ static node* new_masgn(parser_state *p, node *a, node *b) { void_expr_error(p, b); - return cons((node*)NODE_MASGN, cons(a, b)); + return new_masgn_helper(p, a, b, NODE_MASGN); } -/* (:masgn mlhs mrhs) no check */ +/* (:marg mlhs mrhs) no check - for parameter destructuring */ static node* -new_masgn_param(parser_state *p, node *a, node *b) +new_marg(parser_state *p, node *a) { - return cons((node*)NODE_MASGN, cons(a, b)); + return new_masgn_helper(p, a, p->locals->car, NODE_MARG); } -/* (:asgn lhs rhs) */ +/* struct: op_asgn_node(lhs, op, rhs) */ static node* new_op_asgn(parser_state *p, node *a, mrb_sym op, node *b) { void_expr_error(p, b); - return list4((node*)NODE_OP_ASGN, a, nsym(op), b); + + struct mrb_ast_op_asgn_node *n = NEW_NODE(op_asgn, NODE_OP_ASGN); + n->lhs = a; + n->op = op; + n->rhs = b; + return (node*)n; } static node* -new_imaginary(parser_state *p, node *imaginary) +new_int_n(parser_state *p, int32_t val) { - return new_call(p, new_const(p, MRB_SYM_2(p->mrb, Kernel)), MRB_SYM_2(p->mrb, Complex), - new_callargs(p, list2(list3((node*)NODE_INT, (node*)strdup("0"), nint(10)), imaginary), 0, 0), '.'); + struct mrb_ast_int_node *n = NEW_NODE(int, NODE_INT); + n->value = val; + + return (node*)n; } static node* -new_rational(parser_state *p, node *rational) +new_imaginary(parser_state *p, node *imaginary) { - return new_call(p, new_const(p, MRB_SYM_2(p->mrb, Kernel)), MRB_SYM_2(p->mrb, Rational), new_callargs(p, list1(rational), 0, 0), '.'); + return new_fcall(p, MRB_SYM(Complex), + new_callargs(p, list2(new_int_n(p, 0), imaginary), 0, 0)); } -/* (:int . i) */ static node* -new_int(parser_state *p, const char *s, int base, int suffix) +new_rational(parser_state *p, node *rational) { - node* result = list3((node*)NODE_INT, (node*)strdup(s), nint(base)); - if (suffix & NUM_SUFFIX_R) { - result = new_rational(p, result); - } - if (suffix & NUM_SUFFIX_I) { - result = new_imaginary(p, result); - } - return result; + return new_fcall(p, MRB_SYM(Rational), new_callargs(p, list1(rational), 0, 0)); } -#ifndef MRB_NO_FLOAT -/* (:float . i) */ -static node* -new_float(parser_state *p, const char *s, int suffix) +/* Read integer into int32_t with overflow detection */ +static mrb_bool +read_int32(const char *p, int base, int32_t *result) { - node* result = cons((node*)NODE_FLOAT, (node*)strdup(s)); - if (suffix & NUM_SUFFIX_R) { - result = new_rational(p, result); + const char *e = p + strlen(p); + int32_t value = 0; + mrb_bool neg = FALSE; + + if (base < 2 || base > 16) { + return FALSE; } - if (suffix & NUM_SUFFIX_I) { - result = new_imaginary(p, result); + + if (*p == '+') { + p++; + } + else if (*p == '-') { + neg = TRUE; + p++; } - return result; -} -#endif -/* (:str . (s . len)) */ -static node* -new_str(parser_state *p, const char *s, size_t len) -{ - return cons((node*)NODE_STR, cons((node*)strndup(s, len), nint(len))); -} + while (p < e) { + int n; + char c = *p; -/* (:dstr . a) */ -static node* -new_dstr(parser_state *p, node *a) -{ - return cons((node*)NODE_DSTR, a); -} + /* Skip underscores */ + if (c == '_') { + p++; + continue; + } -static int -string_node_p(node *n) -{ - return (int)(typen(n->car) == NODE_STR); -} + /* Parse digit */ + if (c >= '0' && c <= '9') { + n = c - '0'; + } + else if (c >= 'a' && c <= 'f') { + n = c - 'a' + 10; + } + else if (c >= 'A' && c <= 'F') { + n = c - 'A' + 10; + } + else { + /* Invalid character */ + return FALSE; + } -static node* -composite_string_node(parser_state *p, node *a, node *b) -{ - size_t newlen = (size_t)a->cdr + (size_t)b->cdr; - char *str = (char*)mrb_pool_realloc(p->pool, a->car, (size_t)a->cdr + 1, newlen + 1); - memcpy(str + (size_t)a->cdr, b->car, (size_t)b->cdr); - str[newlen] = '\0'; - a->car = (node*)str; - a->cdr = (node*)newlen; - cons_free(b); - return a; -} + if (n >= base) { + /* Digit not valid for this base */ + return FALSE; + } -static node* -concat_string(parser_state *p, node *a, node *b) -{ - if (string_node_p(a)) { - if (string_node_p(b)) { - /* a == NODE_STR && b == NODE_STR */ - composite_string_node(p, a->cdr, b->cdr); - cons_free(b); - return a; + /* Check for multiplication overflow */ + if (value > INT32_MAX / base) { + return FALSE; } - else { - /* a == NODE_STR && b == NODE_DSTR */ - if (string_node_p(b->cdr->car)) { - /* a == NODE_STR && b->[NODE_STR, ...] */ - composite_string_node(p, a->cdr, b->cdr->car->cdr); - cons_free(b->cdr->car); - b->cdr->car = a; - return b; + value *= base; + + /* Check for addition overflow */ + if (value > INT32_MAX - n) { + /* Special case: -INT32_MIN is valid */ + if (neg && value == (INT32_MAX - n + 1) && p + 1 == e) { + *result = INT32_MIN; + return TRUE; } + return FALSE; } + + value += n; + p++; + } + + *result = neg ? -value : value; + return TRUE; +} + +static node* +new_int(parser_state *p, const char *s, int base, int suffix) +{ + int32_t val; + node* result; + + /* Try to parse as int32_t first */ + if (read_int32(s, base, &val)) { + result = new_int_n(p, val); } else { - node *c; /* last node of a */ - for (c = a; c->cdr != NULL; c = c->cdr) ; + /* Big integer - create NODE_BIGINT */ + struct mrb_ast_bigint_node *n = NEW_NODE(bigint, NODE_BIGINT); + n->string = strdup(s); + n->base = base; - if (string_node_p(b)) { - /* a == NODE_DSTR && b == NODE_STR */ - if (string_node_p(c->car)) { - /* a->[..., NODE_STR] && b == NODE_STR */ - composite_string_node(p, c->car->cdr, b->cdr); - cons_free(b); - return a; - } + result = (node*)n; + } - push(a, b); - return a; - } - else { - /* a == NODE_DSTR && b == NODE_DSTR */ - if (string_node_p(c->car) && string_node_p(b->cdr->car)) { - /* a->[..., NODE_STR] && b->[NODE_STR, ...] */ - node *d = b->cdr; - cons_free(b); - composite_string_node(p, c->car->cdr, d->car->cdr); - cons_free(d->car); - c->cdr = d->cdr; - cons_free(d); - return a; - } - else { - c->cdr = b->cdr; - cons_free(b); - return a; - } - } + /* Handle suffix modifiers */ + if (suffix & NUM_SUFFIX_R) { + result = new_rational(p, result); + } + if (suffix & NUM_SUFFIX_I) { + result = new_imaginary(p, result); } - return new_dstr(p, list2(a, b)); + return result; } -/* (:str . (s . len)) */ +#ifndef MRB_NO_FLOAT +/* struct: float_node(value) */ static node* -new_xstr(parser_state *p, const char *s, int len) +new_float(parser_state *p, const char *s, int suffix) { - return cons((node*)NODE_XSTR, cons((node*)strndup(s, len), nint(len))); + struct mrb_ast_float_node *n = NEW_NODE(float, NODE_FLOAT); + n->value = strdup(s); + + node* result = (node*)n; + + if (suffix & NUM_SUFFIX_R) { + result = new_rational(p, result); + } + if (suffix & NUM_SUFFIX_I) { + result = new_imaginary(p, result); + } + return result; } +#endif -/* (:xstr . a) */ +/* Create string node from cons list */ +/* struct: str_node(str) */ static node* -new_dxstr(parser_state *p, node *a) +new_str(parser_state *p, node *a) { - return cons((node*)NODE_DXSTR, a); + struct mrb_ast_str_node *n = NEW_NODE(str, NODE_STR); + n->list = a; + + return (node*)n; } -/* (:dsym . a) */ +/* struct: xstr_node(str) */ static node* -new_dsym(parser_state *p, node *a) +new_xstr(parser_state *p, node *a) { - return cons((node*)NODE_DSYM, a); + struct mrb_ast_xstr_node *n = NEW_NODE(xstr, NODE_XSTR); + n->list = a; + return (node*)n; } -/* (:regx . (s . (opt . enc))) */ +/* struct: dsym_node(parts) - uses cons list */ static node* -new_regx(parser_state *p, const char *p1, const char* p2, const char* p3) +new_dsym(parser_state *p, node *a) { - return cons((node*)NODE_REGX, cons((node*)p1, cons((node*)p2, (node*)p3))); + struct mrb_ast_str_node *n = NEW_NODE(str, NODE_DSYM); + n->list = a; + return (node*)n; } -/* (:dregx . (a . b)) */ +/* struct: regx_node(pattern, flags, encoding) */ static node* -new_dregx(parser_state *p, node *a, node *b) +new_regx(parser_state *p, node *list, const char *flags, const char *encoding) { - return cons((node*)NODE_DREGX, cons(a, b)); + struct mrb_ast_regx_node *n = NEW_NODE(regx, NODE_REGX); + n->list = list; + n->flags = flags; + n->encoding = encoding; + return (node*)n; } -/* (:backref . n) */ +/* struct: back_ref_node(n) */ static node* new_back_ref(parser_state *p, int n) { - return cons((node*)NODE_BACK_REF, nint(n)); + struct mrb_ast_back_ref_node *backref_node = NEW_NODE(back_ref, NODE_BACK_REF); + backref_node->type = n; + return (node*)backref_node; } -/* (:nthref . n) */ +/* struct: nth_ref_node(n) */ static node* new_nth_ref(parser_state *p, int n) { - return cons((node*)NODE_NTH_REF, nint(n)); + struct mrb_ast_nth_ref_node *nthref_node = NEW_NODE(nth_ref, NODE_NTH_REF); + nthref_node->nth = n; + return (node*)nthref_node; } -/* (:heredoc . a) */ +/* struct: heredoc_node(str) */ static node* -new_heredoc(parser_state *p) +new_heredoc(parser_state *p, struct mrb_parser_heredoc_info **infop) { - parser_heredoc_info *inf = (parser_heredoc_info*)parser_palloc(p, sizeof(parser_heredoc_info)); - return cons((node*)NODE_HEREDOC, (node*)inf); + struct mrb_ast_heredoc_node *n = NEW_NODE(heredoc, NODE_HEREDOC); + + /* Initialize embedded heredoc info struct */ + n->info.allow_indent = FALSE; + n->info.remove_indent = FALSE; + n->info.line_head = FALSE; + n->info.indent = 0; + n->info.indented = NULL; + n->info.type = str_not_parsing; // Will be set by heredoc processing + n->info.term = NULL; // Will be set by heredoc processing + n->info.term_len = 0; + n->info.doc = NULL; + + /* Return pointer to embedded info if requested */ + *infop = &n->info; + + return (node*)n; } static void @@ -1267,21 +1703,46 @@ new_bv(parser_state *p, mrb_sym id) static node* new_literal_delim(parser_state *p) { - return cons((node*)NODE_LITERAL_DELIM, 0); + return cons((node*)0, (node*)0); +} + +/* Helper for creating string representation cons (length . string_ptr) */ +static node* +new_str_rep(parser_state *p, const char *str, int len) +{ + return cons(int_to_node(len), (node*)strndup(str, len)); +} + +/* Helper for creating string representation from current token */ +static node* +new_str_tok(parser_state *p) +{ + return new_str_rep(p, tok(p), toklen(p)); +} + +/* Helper for creating empty string representation */ +static node* +new_str_empty(parser_state *p) +{ + return new_str_rep(p, "", 0); } /* (:words . a) */ static node* new_words(parser_state *p, node *a) { - return cons((node*)NODE_WORDS, a); + struct mrb_ast_words_node *words_node = NEW_NODE(words, NODE_WORDS); + words_node->args = a; + return (node*)words_node; } /* (:symbols . a) */ static node* new_symbols(parser_state *p, node *a) { - return cons((node*)NODE_SYMBOLS, a); + struct mrb_ast_symbols_node *symbols_node = NEW_NODE(symbols, NODE_SYMBOLS); + symbols_node->args = a; + return (node*)symbols_node; } /* xxx ----------------------------- */ @@ -1305,17 +1766,20 @@ static void args_with_block(parser_state *p, node *a, node *b) { if (b) { - if (a->cdr && a->cdr->cdr) { - yyerror(p, "both block arg and actual block given"); + /* Handle callargs structure - direct casting like new_args() */ + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)a; + if (callargs->block_arg) { + yyerror(NULL, p, "both block arg and actual block given"); } - a->cdr->cdr = b; + callargs->block_arg = b; } } static void endless_method_name(parser_state *p, node *defn) { - mrb_sym sym = sym(defn->cdr->car); + struct mrb_ast_def_node *def = (struct mrb_ast_def_node*)defn; + mrb_sym sym = def->name; mrb_int len; const char *name = mrb_sym_name_len(p->mrb, sym, &len); @@ -1323,30 +1787,87 @@ endless_method_name(parser_state *p, node *defn) for (int i=0; icar)) { + enum node_type var_type = (enum node_type)header->node_type; + switch (var_type) { case NODE_SUPER: case NODE_ZSUPER: - if (!a->cdr) a->cdr = new_callargs(p, 0, 0, b); - else args_with_block(p, a->cdr, b); + /* For variable-sized super/zsuper nodes, update the args field directly */ + { + struct mrb_ast_super_node *super_n = super_node(a); + if (!super_n->args) { + super_n->args = new_callargs(p, 0, 0, b); + } + else { + args_with_block(p, super_n->args, b); + } + } + break; + case NODE_YIELD: + /* Variable-sized yield nodes should generate an error when given a block */ + yyerror(NULL, p, "block given to yield"); + break; + case NODE_RETURN: + /* Variable-sized return nodes - recursively call with args */ + { + struct mrb_ast_return_node *return_n = return_node(a); + if (return_n->args != NULL) { + call_with_block(p, return_n->args, b); + } + } + break; + case NODE_BREAK: + /* Variable-sized break nodes - recursively call with value */ + { + struct mrb_ast_break_node *break_n = (struct mrb_ast_break_node*)a; + if (break_n->value != NULL) { + call_with_block(p, break_n->value, b); + } + } + break; + case NODE_NEXT: + /* Variable-sized next nodes - recursively call with value */ + { + struct mrb_ast_next_node *next_n = (struct mrb_ast_next_node*)a; + if (next_n->value != NULL) { + call_with_block(p, next_n->value, b); + } + } break; case NODE_CALL: - case NODE_FCALL: - case NODE_SCALL: - /* (NODE_CALL recv mid (args kw . blk)) */ - n = a->cdr->cdr->cdr; /* (args kw . blk) */ - if (!n->car) n->car = new_callargs(p, 0, 0, b); - else args_with_block(p, n->car, b); + /* Variable-sized call nodes - add block to existing args */ + { + struct mrb_ast_call_node *call = call_node(a); + + if (call->args && callargs_node(call->args)->block_arg) { + yyerror(NULL, p, "both block arg and actual block given"); + return; + } + + /* Use existing args and add block */ + if (call->args) { + /* Modify existing callargs structure to add block */ + args_with_block(p, call->args, b); + } + else { + /* Create new callargs with just the block */ + call->args = new_callargs(p, NULL, NULL, b); + } + } break; default: + /* For other variable-sized nodes, do nothing */ break; } } @@ -1354,7 +1875,9 @@ call_with_block(parser_state *p, node *a, node *b) static node* new_negate(parser_state *p, node *n) { - return cons((node*)NODE_NEGATE, n); + struct mrb_ast_negate_node *negate_node = NEW_NODE(negate, NODE_NEGATE); + negate_node->operand = n; + return (node*)negate_node; } static node* @@ -1366,36 +1889,46 @@ cond(node *n) static node* ret_args(parser_state *p, node *n) { - if (n->cdr->cdr) { - yyerror(p, "block argument should not be given"); + /* Handle callargs structure - direct casting like new_args() */ + struct mrb_ast_callargs *callargs = (struct mrb_ast_callargs*)n; + if (callargs->block_arg) { + yyerror(NULL, p, "block argument should not be given"); return NULL; } - if (!n->car) return NULL; - if (!n->car->cdr) return n->car->car; - return new_array(p, n->car); + if (!callargs->regular_args) return NULL; + if (!callargs->regular_args->cdr) return callargs->regular_args->car; + return new_array(p, callargs->regular_args); } static void assignable(parser_state *p, node *lhs) { - if (intn(lhs->car) == NODE_LVAR) { - local_add(p, sym(lhs->cdr)); + switch (node_type(lhs)) { + case NODE_LVAR: + local_add(p, var_node(lhs)->symbol); + break; + case NODE_CONST: + if (p->in_def) + yyerror(NULL, p, "dynamic constant assignment"); + break; + default: + /* Other node types don't need special handling in assignable */ + break; } } static node* var_reference(parser_state *p, node *lhs) { - node *n; - - if (intn(lhs->car) == NODE_LVAR) { - if (!local_var_p(p, sym(lhs->cdr))) { - n = new_fcall(p, sym(lhs->cdr), 0); - cons_free(lhs); + /* Check if this is a variable-sized node */ + if (node_type_p(lhs, NODE_LVAR)) { + mrb_sym sym = var_node(lhs)->symbol; + if (!local_var_p(p, sym)) { + node *n = new_fcall(p, sym, 0); + /* Don't free variable-sized nodes - they're managed by the parser allocator */ return n; } } - return lhs; } @@ -1403,18 +1936,16 @@ static node* label_reference(parser_state *p, mrb_sym sym) { const char *name = mrb_sym_name(p->mrb, sym); - node *n; if (local_var_p(p, sym)) { - n = new_lvar(p, sym); + return new_lvar(p, sym); } else if (ISUPPER(name[0])) { - n = new_const(p, sym); + return new_const(p, sym); } else { - n = new_fcall(p, sym, 0); + return new_fcall(p, sym, 0); } - return n; } typedef enum mrb_string_type string_type; @@ -1467,9 +1998,13 @@ static parser_heredoc_info * parsing_heredoc_info(parser_state *p) { node *nd = p->parsing_heredoc; - if (nd == NULL) - return NULL; + if (nd == NULL) return NULL; /* mrb_assert(nd->car->car == NODE_HEREDOC); */ + if (node_type(nd->car) == NODE_HEREDOC) { + /* Variable-sized heredoc node - return address of embedded info struct */ + struct mrb_ast_heredoc_node *heredoc = (struct mrb_ast_heredoc_node*)nd->car; + return &heredoc->info; + } return (parser_heredoc_info*)nd->car->cdr; } @@ -1500,10 +2035,33 @@ heredoc_end(parser_state *p) } #define is_strterm_type(p,str_func) ((p)->lex_strterm->type & (str_func)) +static void +prohibit_literals(parser_state *p, node *n) +{ + if (n == 0) { + yyerror(NULL, p, "can't define singleton method for ()."); + } + else { + enum node_type nt = node_type(n); + switch (nt) { + case NODE_INT: + case NODE_STR: + case NODE_XSTR: + case NODE_REGX: + case NODE_FLOAT: + case NODE_ARRAY: + case NODE_HEREDOC: + yyerror(NULL, p, "can't define singleton method for literals"); + default: + break; + } + } +} + /* xxx ----------------------------- */ -#line 1507 "mrbgems/mruby-compiler/core/y.tab.c" +#line 2065 "mrbgems/mruby-compiler/core/y.tab.c" # ifndef YY_CAST # ifdef __cplusplus @@ -1526,156 +2084,155 @@ heredoc_end(parser_state *p) # endif # endif -/* Enabling verbose error messages. */ -#ifdef YYERROR_VERBOSE -# undef YYERROR_VERBOSE -# define YYERROR_VERBOSE 1 -#else -# define YYERROR_VERBOSE 1 -#endif - - +/* Use api.header.include to #include this header + instead of duplicating it here. */ /* Debug traces. */ #ifndef YYDEBUG # define YYDEBUG 0 #endif -#if YYDEBUG +#if YYDEBUG && !defined(yydebug) extern int yydebug; #endif -/* Token type. */ + +/* Token kinds. */ #ifndef YYTOKENTYPE # define YYTOKENTYPE enum yytokentype { - keyword_class = 258, - keyword_module = 259, - keyword_def = 260, - keyword_begin = 261, - keyword_if = 262, - keyword_unless = 263, - keyword_while = 264, - keyword_until = 265, - keyword_for = 266, - keyword_undef = 267, - keyword_rescue = 268, - keyword_ensure = 269, - keyword_end = 270, - keyword_then = 271, - keyword_elsif = 272, - keyword_else = 273, - keyword_case = 274, - keyword_when = 275, - keyword_break = 276, - keyword_next = 277, - keyword_redo = 278, - keyword_retry = 279, - keyword_in = 280, - keyword_do = 281, - keyword_do_cond = 282, - keyword_do_block = 283, - keyword_do_LAMBDA = 284, - keyword_return = 285, - keyword_yield = 286, - keyword_super = 287, - keyword_self = 288, - keyword_nil = 289, - keyword_true = 290, - keyword_false = 291, - keyword_and = 292, - keyword_or = 293, - keyword_not = 294, - modifier_if = 295, - modifier_unless = 296, - modifier_while = 297, - modifier_until = 298, - modifier_rescue = 299, - keyword_alias = 300, - keyword_BEGIN = 301, - keyword_END = 302, - keyword__LINE__ = 303, - keyword__FILE__ = 304, - keyword__ENCODING__ = 305, - tIDENTIFIER = 306, - tFID = 307, - tGVAR = 308, - tIVAR = 309, - tCONSTANT = 310, - tCVAR = 311, - tLABEL_TAG = 312, - tINTEGER = 313, - tFLOAT = 314, - tCHAR = 315, - tXSTRING = 316, - tREGEXP = 317, - tSTRING = 318, - tSTRING_PART = 319, - tSTRING_MID = 320, - tNTH_REF = 321, - tBACK_REF = 322, - tREGEXP_END = 323, - tNUMPARAM = 324, - tUPLUS = 325, - tUMINUS = 326, - tCMP = 327, - tEQ = 328, - tEQQ = 329, - tNEQ = 330, - tGEQ = 331, - tLEQ = 332, - tANDOP = 333, - tOROP = 334, - tMATCH = 335, - tNMATCH = 336, - tDOT2 = 337, - tDOT3 = 338, - tBDOT2 = 339, - tBDOT3 = 340, - tAREF = 341, - tASET = 342, - tLSHFT = 343, - tRSHFT = 344, - tCOLON2 = 345, - tCOLON3 = 346, - tOP_ASGN = 347, - tASSOC = 348, - tLPAREN = 349, - tLPAREN_ARG = 350, - tRPAREN = 351, - tLBRACK = 352, - tLBRACE = 353, - tLBRACE_ARG = 354, - tSTAR = 355, - tPOW = 356, - tDSTAR = 357, - tAMPER = 358, - tLAMBDA = 359, - tANDDOT = 360, - tSYMBEG = 361, - tSTRING_BEG = 362, - tXSTRING_BEG = 363, - tSTRING_DVAR = 364, - tREGEXP_BEG = 365, - tWORDS_BEG = 366, - tSYMBOLS_BEG = 367, - tLAMBEG = 368, - tHEREDOC_BEG = 369, - tHEREDOC_END = 370, - tLITERAL_DELIM = 371, - tHD_LITERAL_DELIM = 372, - tHD_STRING_PART = 373, - tHD_STRING_MID = 374, - tLOWEST = 375, - tUMINUS_NUM = 376, - tLAST_TOKEN = 377 + YYEMPTY = -2, + YYEOF = 0, /* "end of file" */ + YYerror = 256, /* error */ + YYUNDEF = 257, /* "invalid token" */ + keyword_class = 258, /* "'class'" */ + keyword_module = 259, /* "'module'" */ + keyword_def = 260, /* "'def'" */ + keyword_begin = 261, /* "'begin'" */ + keyword_if = 262, /* "'if'" */ + keyword_unless = 263, /* "'unless'" */ + keyword_while = 264, /* "'while'" */ + keyword_until = 265, /* "'until'" */ + keyword_for = 266, /* "'for'" */ + keyword_undef = 267, /* "'undef'" */ + keyword_rescue = 268, /* "'rescue'" */ + keyword_ensure = 269, /* "'ensure'" */ + keyword_end = 270, /* "'end'" */ + keyword_then = 271, /* "'then'" */ + keyword_elsif = 272, /* "'elsif'" */ + keyword_else = 273, /* "'else'" */ + keyword_case = 274, /* "'case'" */ + keyword_when = 275, /* "'when'" */ + keyword_break = 276, /* "'break'" */ + keyword_next = 277, /* "'next'" */ + keyword_redo = 278, /* "'redo'" */ + keyword_retry = 279, /* "'retry'" */ + keyword_in = 280, /* "'in'" */ + keyword_do = 281, /* "'do'" */ + keyword_do_cond = 282, /* "'do' for condition" */ + keyword_do_block = 283, /* "'do' for block" */ + keyword_do_LAMBDA = 284, /* "'do' for lambda" */ + keyword_return = 285, /* "'return'" */ + keyword_yield = 286, /* "'yield'" */ + keyword_super = 287, /* "'super'" */ + keyword_self = 288, /* "'self'" */ + keyword_nil = 289, /* "'nil'" */ + keyword_true = 290, /* "'true'" */ + keyword_false = 291, /* "'false'" */ + keyword_and = 292, /* "'and'" */ + keyword_or = 293, /* "'or'" */ + keyword_not = 294, /* "'not'" */ + modifier_if = 295, /* "'if' modifier" */ + modifier_unless = 296, /* "'unless' modifier" */ + modifier_while = 297, /* "'while' modifier" */ + modifier_until = 298, /* "'until' modifier" */ + modifier_rescue = 299, /* "'rescue' modifier" */ + keyword_alias = 300, /* "'alias'" */ + keyword_BEGIN = 301, /* "'BEGIN'" */ + keyword_END = 302, /* "'END'" */ + keyword__LINE__ = 303, /* "'__LINE__'" */ + keyword__FILE__ = 304, /* "'__FILE__'" */ + keyword__ENCODING__ = 305, /* "'__ENCODING__'" */ + tIDENTIFIER = 306, /* "local variable or method" */ + tFID = 307, /* "method" */ + tGVAR = 308, /* "global variable" */ + tIVAR = 309, /* "instance variable" */ + tCONSTANT = 310, /* "constant" */ + tCVAR = 311, /* "class variable" */ + tLABEL_TAG = 312, /* "label" */ + tINTEGER = 313, /* "integer literal" */ + tFLOAT = 314, /* "float literal" */ + tCHAR = 315, /* "character literal" */ + tXSTRING = 316, /* tXSTRING */ + tREGEXP = 317, /* tREGEXP */ + tSTRING = 318, /* tSTRING */ + tSTRING_PART = 319, /* tSTRING_PART */ + tSTRING_MID = 320, /* tSTRING_MID */ + tNTH_REF = 321, /* tNTH_REF */ + tBACK_REF = 322, /* tBACK_REF */ + tREGEXP_END = 323, /* tREGEXP_END */ + tNUMPARAM = 324, /* "numbered parameter" */ + tUPLUS = 325, /* "unary plus" */ + tUMINUS = 326, /* "unary minus" */ + tCMP = 327, /* "<=>" */ + tEQ = 328, /* "==" */ + tEQQ = 329, /* "===" */ + tNEQ = 330, /* "!=" */ + tGEQ = 331, /* ">=" */ + tLEQ = 332, /* "<=" */ + tANDOP = 333, /* "&&" */ + tOROP = 334, /* "||" */ + tMATCH = 335, /* "=~" */ + tNMATCH = 336, /* "!~" */ + tDOT2 = 337, /* ".." */ + tDOT3 = 338, /* "..." */ + tBDOT2 = 339, /* tBDOT2 */ + tBDOT3 = 340, /* tBDOT3 */ + tAREF = 341, /* tAREF */ + tASET = 342, /* tASET */ + tLSHFT = 343, /* "<<" */ + tRSHFT = 344, /* ">>" */ + tCOLON2 = 345, /* "::" */ + tCOLON3 = 346, /* tCOLON3 */ + tOP_ASGN = 347, /* tOP_ASGN */ + tASSOC = 348, /* "=>" */ + tLPAREN = 349, /* tLPAREN */ + tLPAREN_ARG = 350, /* "(" */ + tRPAREN = 351, /* ")" */ + tLBRACK = 352, /* "[" */ + tLBRACE = 353, /* tLBRACE */ + tLBRACE_ARG = 354, /* "{" */ + tSTAR = 355, /* "*" */ + tPOW = 356, /* tPOW */ + tDSTAR = 357, /* "**" */ + tAMPER = 358, /* "&" */ + tLAMBDA = 359, /* "->" */ + tANDDOT = 360, /* "&." */ + tSYMBEG = 361, /* "symbol" */ + tSTRING_BEG = 362, /* "string literal" */ + tXSTRING_BEG = 363, /* tXSTRING_BEG */ + tSTRING_DVAR = 364, /* tSTRING_DVAR */ + tREGEXP_BEG = 365, /* tREGEXP_BEG */ + tWORDS_BEG = 366, /* tWORDS_BEG */ + tSYMBOLS_BEG = 367, /* tSYMBOLS_BEG */ + tLAMBEG = 368, /* tLAMBEG */ + tHEREDOC_BEG = 369, /* "here document" */ + tHEREDOC_END = 370, /* tHEREDOC_END */ + tLITERAL_DELIM = 371, /* tLITERAL_DELIM */ + tHD_LITERAL_DELIM = 372, /* tHD_LITERAL_DELIM */ + tHD_STRING_PART = 373, /* tHD_STRING_PART */ + tHD_STRING_MID = 374, /* tHD_STRING_MID */ + tLOWEST = 375, /* tLOWEST */ + tUMINUS_NUM = 376, /* tUMINUS_NUM */ + tLAST_TOKEN = 377 /* tLAST_TOKEN */ }; + typedef enum yytokentype yytoken_kind_t; #endif /* Value type. */ #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED union YYSTYPE { -#line 1449 "mrbgems/mruby-compiler/core/parse.y" +#line 2008 "mrbgems/mruby-compiler/core/parse.y" node *nd; mrb_sym id; @@ -1683,7 +2240,7 @@ union YYSTYPE stack_type stack; const struct vtable *vars; -#line 1687 "mrbgems/mruby-compiler/core/y.tab.c" +#line 2244 "mrbgems/mruby-compiler/core/y.tab.c" }; typedef union YYSTYPE YYSTYPE; @@ -1691,6 +2248,21 @@ typedef union YYSTYPE YYSTYPE; # define YYSTYPE_IS_DECLARED 1 #endif +/* Location type. */ +#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED +typedef struct YYLTYPE YYLTYPE; +struct YYLTYPE +{ + int first_line; + int first_column; + int last_line; + int last_column; +}; +# define YYLTYPE_IS_DECLARED 1 +# define YYLTYPE_IS_TRIVIAL 1 +#endif + + int yyparse (parser_state *p); @@ -1698,6 +2270,363 @@ int yyparse (parser_state *p); +/* Symbol kind. */ +enum yysymbol_kind_t +{ + YYSYMBOL_YYEMPTY = -2, + YYSYMBOL_YYEOF = 0, /* "end of file" */ + YYSYMBOL_YYerror = 1, /* error */ + YYSYMBOL_YYUNDEF = 2, /* "invalid token" */ + YYSYMBOL_keyword_class = 3, /* "'class'" */ + YYSYMBOL_keyword_module = 4, /* "'module'" */ + YYSYMBOL_keyword_def = 5, /* "'def'" */ + YYSYMBOL_keyword_begin = 6, /* "'begin'" */ + YYSYMBOL_keyword_if = 7, /* "'if'" */ + YYSYMBOL_keyword_unless = 8, /* "'unless'" */ + YYSYMBOL_keyword_while = 9, /* "'while'" */ + YYSYMBOL_keyword_until = 10, /* "'until'" */ + YYSYMBOL_keyword_for = 11, /* "'for'" */ + YYSYMBOL_keyword_undef = 12, /* "'undef'" */ + YYSYMBOL_keyword_rescue = 13, /* "'rescue'" */ + YYSYMBOL_keyword_ensure = 14, /* "'ensure'" */ + YYSYMBOL_keyword_end = 15, /* "'end'" */ + YYSYMBOL_keyword_then = 16, /* "'then'" */ + YYSYMBOL_keyword_elsif = 17, /* "'elsif'" */ + YYSYMBOL_keyword_else = 18, /* "'else'" */ + YYSYMBOL_keyword_case = 19, /* "'case'" */ + YYSYMBOL_keyword_when = 20, /* "'when'" */ + YYSYMBOL_keyword_break = 21, /* "'break'" */ + YYSYMBOL_keyword_next = 22, /* "'next'" */ + YYSYMBOL_keyword_redo = 23, /* "'redo'" */ + YYSYMBOL_keyword_retry = 24, /* "'retry'" */ + YYSYMBOL_keyword_in = 25, /* "'in'" */ + YYSYMBOL_keyword_do = 26, /* "'do'" */ + YYSYMBOL_keyword_do_cond = 27, /* "'do' for condition" */ + YYSYMBOL_keyword_do_block = 28, /* "'do' for block" */ + YYSYMBOL_keyword_do_LAMBDA = 29, /* "'do' for lambda" */ + YYSYMBOL_keyword_return = 30, /* "'return'" */ + YYSYMBOL_keyword_yield = 31, /* "'yield'" */ + YYSYMBOL_keyword_super = 32, /* "'super'" */ + YYSYMBOL_keyword_self = 33, /* "'self'" */ + YYSYMBOL_keyword_nil = 34, /* "'nil'" */ + YYSYMBOL_keyword_true = 35, /* "'true'" */ + YYSYMBOL_keyword_false = 36, /* "'false'" */ + YYSYMBOL_keyword_and = 37, /* "'and'" */ + YYSYMBOL_keyword_or = 38, /* "'or'" */ + YYSYMBOL_keyword_not = 39, /* "'not'" */ + YYSYMBOL_modifier_if = 40, /* "'if' modifier" */ + YYSYMBOL_modifier_unless = 41, /* "'unless' modifier" */ + YYSYMBOL_modifier_while = 42, /* "'while' modifier" */ + YYSYMBOL_modifier_until = 43, /* "'until' modifier" */ + YYSYMBOL_modifier_rescue = 44, /* "'rescue' modifier" */ + YYSYMBOL_keyword_alias = 45, /* "'alias'" */ + YYSYMBOL_keyword_BEGIN = 46, /* "'BEGIN'" */ + YYSYMBOL_keyword_END = 47, /* "'END'" */ + YYSYMBOL_keyword__LINE__ = 48, /* "'__LINE__'" */ + YYSYMBOL_keyword__FILE__ = 49, /* "'__FILE__'" */ + YYSYMBOL_keyword__ENCODING__ = 50, /* "'__ENCODING__'" */ + YYSYMBOL_tIDENTIFIER = 51, /* "local variable or method" */ + YYSYMBOL_tFID = 52, /* "method" */ + YYSYMBOL_tGVAR = 53, /* "global variable" */ + YYSYMBOL_tIVAR = 54, /* "instance variable" */ + YYSYMBOL_tCONSTANT = 55, /* "constant" */ + YYSYMBOL_tCVAR = 56, /* "class variable" */ + YYSYMBOL_tLABEL_TAG = 57, /* "label" */ + YYSYMBOL_tINTEGER = 58, /* "integer literal" */ + YYSYMBOL_tFLOAT = 59, /* "float literal" */ + YYSYMBOL_tCHAR = 60, /* "character literal" */ + YYSYMBOL_tXSTRING = 61, /* tXSTRING */ + YYSYMBOL_tREGEXP = 62, /* tREGEXP */ + YYSYMBOL_tSTRING = 63, /* tSTRING */ + YYSYMBOL_tSTRING_PART = 64, /* tSTRING_PART */ + YYSYMBOL_tSTRING_MID = 65, /* tSTRING_MID */ + YYSYMBOL_tNTH_REF = 66, /* tNTH_REF */ + YYSYMBOL_tBACK_REF = 67, /* tBACK_REF */ + YYSYMBOL_tREGEXP_END = 68, /* tREGEXP_END */ + YYSYMBOL_tNUMPARAM = 69, /* "numbered parameter" */ + YYSYMBOL_tUPLUS = 70, /* "unary plus" */ + YYSYMBOL_tUMINUS = 71, /* "unary minus" */ + YYSYMBOL_tCMP = 72, /* "<=>" */ + YYSYMBOL_tEQ = 73, /* "==" */ + YYSYMBOL_tEQQ = 74, /* "===" */ + YYSYMBOL_tNEQ = 75, /* "!=" */ + YYSYMBOL_tGEQ = 76, /* ">=" */ + YYSYMBOL_tLEQ = 77, /* "<=" */ + YYSYMBOL_tANDOP = 78, /* "&&" */ + YYSYMBOL_tOROP = 79, /* "||" */ + YYSYMBOL_tMATCH = 80, /* "=~" */ + YYSYMBOL_tNMATCH = 81, /* "!~" */ + YYSYMBOL_tDOT2 = 82, /* ".." */ + YYSYMBOL_tDOT3 = 83, /* "..." */ + YYSYMBOL_tBDOT2 = 84, /* tBDOT2 */ + YYSYMBOL_tBDOT3 = 85, /* tBDOT3 */ + YYSYMBOL_tAREF = 86, /* tAREF */ + YYSYMBOL_tASET = 87, /* tASET */ + YYSYMBOL_tLSHFT = 88, /* "<<" */ + YYSYMBOL_tRSHFT = 89, /* ">>" */ + YYSYMBOL_tCOLON2 = 90, /* "::" */ + YYSYMBOL_tCOLON3 = 91, /* tCOLON3 */ + YYSYMBOL_tOP_ASGN = 92, /* tOP_ASGN */ + YYSYMBOL_tASSOC = 93, /* "=>" */ + YYSYMBOL_tLPAREN = 94, /* tLPAREN */ + YYSYMBOL_tLPAREN_ARG = 95, /* "(" */ + YYSYMBOL_tRPAREN = 96, /* ")" */ + YYSYMBOL_tLBRACK = 97, /* "[" */ + YYSYMBOL_tLBRACE = 98, /* tLBRACE */ + YYSYMBOL_tLBRACE_ARG = 99, /* "{" */ + YYSYMBOL_tSTAR = 100, /* "*" */ + YYSYMBOL_tPOW = 101, /* tPOW */ + YYSYMBOL_tDSTAR = 102, /* "**" */ + YYSYMBOL_tAMPER = 103, /* "&" */ + YYSYMBOL_tLAMBDA = 104, /* "->" */ + YYSYMBOL_tANDDOT = 105, /* "&." */ + YYSYMBOL_tSYMBEG = 106, /* "symbol" */ + YYSYMBOL_tSTRING_BEG = 107, /* "string literal" */ + YYSYMBOL_tXSTRING_BEG = 108, /* tXSTRING_BEG */ + YYSYMBOL_tSTRING_DVAR = 109, /* tSTRING_DVAR */ + YYSYMBOL_tREGEXP_BEG = 110, /* tREGEXP_BEG */ + YYSYMBOL_tWORDS_BEG = 111, /* tWORDS_BEG */ + YYSYMBOL_tSYMBOLS_BEG = 112, /* tSYMBOLS_BEG */ + YYSYMBOL_tLAMBEG = 113, /* tLAMBEG */ + YYSYMBOL_tHEREDOC_BEG = 114, /* "here document" */ + YYSYMBOL_tHEREDOC_END = 115, /* tHEREDOC_END */ + YYSYMBOL_tLITERAL_DELIM = 116, /* tLITERAL_DELIM */ + YYSYMBOL_tHD_LITERAL_DELIM = 117, /* tHD_LITERAL_DELIM */ + YYSYMBOL_tHD_STRING_PART = 118, /* tHD_STRING_PART */ + YYSYMBOL_tHD_STRING_MID = 119, /* tHD_STRING_MID */ + YYSYMBOL_tLOWEST = 120, /* tLOWEST */ + YYSYMBOL_121_ = 121, /* '=' */ + YYSYMBOL_122_ = 122, /* '?' */ + YYSYMBOL_123_ = 123, /* ':' */ + YYSYMBOL_124_ = 124, /* '>' */ + YYSYMBOL_125_ = 125, /* '<' */ + YYSYMBOL_126_ = 126, /* '|' */ + YYSYMBOL_127_ = 127, /* '^' */ + YYSYMBOL_128_ = 128, /* '&' */ + YYSYMBOL_129_ = 129, /* '+' */ + YYSYMBOL_130_ = 130, /* '-' */ + YYSYMBOL_131_ = 131, /* '*' */ + YYSYMBOL_132_ = 132, /* '/' */ + YYSYMBOL_133_ = 133, /* '%' */ + YYSYMBOL_tUMINUS_NUM = 134, /* tUMINUS_NUM */ + YYSYMBOL_135_ = 135, /* '!' */ + YYSYMBOL_136_ = 136, /* '~' */ + YYSYMBOL_tLAST_TOKEN = 137, /* tLAST_TOKEN */ + YYSYMBOL_138_ = 138, /* '{' */ + YYSYMBOL_139_ = 139, /* '}' */ + YYSYMBOL_140_ = 140, /* '[' */ + YYSYMBOL_141_ = 141, /* ']' */ + YYSYMBOL_142_ = 142, /* ',' */ + YYSYMBOL_143_ = 143, /* '`' */ + YYSYMBOL_144_ = 144, /* '(' */ + YYSYMBOL_145_ = 145, /* ')' */ + YYSYMBOL_146_ = 146, /* ';' */ + YYSYMBOL_147_ = 147, /* '.' */ + YYSYMBOL_148_n_ = 148, /* '\n' */ + YYSYMBOL_YYACCEPT = 149, /* $accept */ + YYSYMBOL_150_1 = 150, /* $@1 */ + YYSYMBOL_program = 151, /* program */ + YYSYMBOL_top_compstmt = 152, /* top_compstmt */ + YYSYMBOL_top_stmts = 153, /* top_stmts */ + YYSYMBOL_top_stmt = 154, /* top_stmt */ + YYSYMBOL_155_2 = 155, /* @2 */ + YYSYMBOL_bodystmt = 156, /* bodystmt */ + YYSYMBOL_compstmt = 157, /* compstmt */ + YYSYMBOL_stmts = 158, /* stmts */ + YYSYMBOL_159_3 = 159, /* $@3 */ + YYSYMBOL_stmt = 160, /* stmt */ + YYSYMBOL_command_asgn = 161, /* command_asgn */ + YYSYMBOL_command_rhs = 162, /* command_rhs */ + YYSYMBOL_expr = 163, /* expr */ + YYSYMBOL_164_4 = 164, /* $@4 */ + YYSYMBOL_165_5 = 165, /* $@5 */ + YYSYMBOL_defn_head = 166, /* defn_head */ + YYSYMBOL_167_6 = 167, /* $@6 */ + YYSYMBOL_defs_head = 168, /* defs_head */ + YYSYMBOL_expr_value = 169, /* expr_value */ + YYSYMBOL_command_call = 170, /* command_call */ + YYSYMBOL_block_command = 171, /* block_command */ + YYSYMBOL_172_7 = 172, /* $@7 */ + YYSYMBOL_cmd_brace_block = 173, /* cmd_brace_block */ + YYSYMBOL_command = 174, /* command */ + YYSYMBOL_mlhs = 175, /* mlhs */ + YYSYMBOL_mlhs_inner = 176, /* mlhs_inner */ + YYSYMBOL_mlhs_basic = 177, /* mlhs_basic */ + YYSYMBOL_mlhs_item = 178, /* mlhs_item */ + YYSYMBOL_mlhs_list = 179, /* mlhs_list */ + YYSYMBOL_mlhs_post = 180, /* mlhs_post */ + YYSYMBOL_mlhs_node = 181, /* mlhs_node */ + YYSYMBOL_lhs = 182, /* lhs */ + YYSYMBOL_cname = 183, /* cname */ + YYSYMBOL_cpath = 184, /* cpath */ + YYSYMBOL_fname = 185, /* fname */ + YYSYMBOL_fsym = 186, /* fsym */ + YYSYMBOL_undef_list = 187, /* undef_list */ + YYSYMBOL_188_8 = 188, /* $@8 */ + YYSYMBOL_op = 189, /* op */ + YYSYMBOL_reswords = 190, /* reswords */ + YYSYMBOL_arg = 191, /* arg */ + YYSYMBOL_aref_args = 192, /* aref_args */ + YYSYMBOL_arg_rhs = 193, /* arg_rhs */ + YYSYMBOL_paren_args = 194, /* paren_args */ + YYSYMBOL_opt_paren_args = 195, /* opt_paren_args */ + YYSYMBOL_opt_call_args = 196, /* opt_call_args */ + YYSYMBOL_call_args = 197, /* call_args */ + YYSYMBOL_198_9 = 198, /* @9 */ + YYSYMBOL_command_args = 199, /* command_args */ + YYSYMBOL_block_arg = 200, /* block_arg */ + YYSYMBOL_opt_block_arg = 201, /* opt_block_arg */ + YYSYMBOL_comma = 202, /* comma */ + YYSYMBOL_args = 203, /* args */ + YYSYMBOL_mrhs = 204, /* mrhs */ + YYSYMBOL_primary = 205, /* primary */ + YYSYMBOL_206_10 = 206, /* @10 */ + YYSYMBOL_207_11 = 207, /* @11 */ + YYSYMBOL_208_12 = 208, /* $@12 */ + YYSYMBOL_209_13 = 209, /* $@13 */ + YYSYMBOL_210_14 = 210, /* @14 */ + YYSYMBOL_211_15 = 211, /* @15 */ + YYSYMBOL_212_16 = 212, /* $@16 */ + YYSYMBOL_213_17 = 213, /* $@17 */ + YYSYMBOL_214_18 = 214, /* $@18 */ + YYSYMBOL_215_19 = 215, /* $@19 */ + YYSYMBOL_216_20 = 216, /* $@20 */ + YYSYMBOL_217_21 = 217, /* $@21 */ + YYSYMBOL_218_22 = 218, /* @22 */ + YYSYMBOL_219_23 = 219, /* @23 */ + YYSYMBOL_220_24 = 220, /* @24 */ + YYSYMBOL_221_25 = 221, /* @25 */ + YYSYMBOL_primary_value = 222, /* primary_value */ + YYSYMBOL_then = 223, /* then */ + YYSYMBOL_do = 224, /* do */ + YYSYMBOL_if_tail = 225, /* if_tail */ + YYSYMBOL_opt_else = 226, /* opt_else */ + YYSYMBOL_for_var = 227, /* for_var */ + YYSYMBOL_f_margs = 228, /* f_margs */ + YYSYMBOL_229_26 = 229, /* $@26 */ + YYSYMBOL_block_args_tail = 230, /* block_args_tail */ + YYSYMBOL_opt_block_args_tail = 231, /* opt_block_args_tail */ + YYSYMBOL_block_param = 232, /* block_param */ + YYSYMBOL_opt_block_param = 233, /* opt_block_param */ + YYSYMBOL_234_27 = 234, /* $@27 */ + YYSYMBOL_block_param_def = 235, /* block_param_def */ + YYSYMBOL_opt_bv_decl = 236, /* opt_bv_decl */ + YYSYMBOL_bv_decls = 237, /* bv_decls */ + YYSYMBOL_bvar = 238, /* bvar */ + YYSYMBOL_f_larglist = 239, /* f_larglist */ + YYSYMBOL_lambda_body = 240, /* lambda_body */ + YYSYMBOL_241_28 = 241, /* @28 */ + YYSYMBOL_do_block = 242, /* do_block */ + YYSYMBOL_block_call = 243, /* block_call */ + YYSYMBOL_method_call = 244, /* method_call */ + YYSYMBOL_245_29 = 245, /* @29 */ + YYSYMBOL_brace_block = 246, /* brace_block */ + YYSYMBOL_247_30 = 247, /* @30 */ + YYSYMBOL_case_body = 248, /* case_body */ + YYSYMBOL_cases = 249, /* cases */ + YYSYMBOL_in_clauses = 250, /* in_clauses */ + YYSYMBOL_251_31 = 251, /* $@31 */ + YYSYMBOL_252_32 = 252, /* $@32 */ + YYSYMBOL_253_33 = 253, /* $@33 */ + YYSYMBOL_p_expr = 254, /* p_expr */ + YYSYMBOL_p_args_head = 255, /* p_args_head */ + YYSYMBOL_p_args_post = 256, /* p_args_post */ + YYSYMBOL_p_as = 257, /* p_as */ + YYSYMBOL_p_alt = 258, /* p_alt */ + YYSYMBOL_p_value = 259, /* p_value */ + YYSYMBOL_p_array = 260, /* p_array */ + YYSYMBOL_p_array_body = 261, /* p_array_body */ + YYSYMBOL_p_array_elems = 262, /* p_array_elems */ + YYSYMBOL_p_rest = 263, /* p_rest */ + YYSYMBOL_p_const = 264, /* p_const */ + YYSYMBOL_p_hash = 265, /* p_hash */ + YYSYMBOL_p_hash_body = 266, /* p_hash_body */ + YYSYMBOL_p_hash_elems = 267, /* p_hash_elems */ + YYSYMBOL_p_hash_elem = 268, /* p_hash_elem */ + YYSYMBOL_p_kwrest = 269, /* p_kwrest */ + YYSYMBOL_p_var = 270, /* p_var */ + YYSYMBOL_opt_rescue = 271, /* opt_rescue */ + YYSYMBOL_exc_list = 272, /* exc_list */ + YYSYMBOL_exc_var = 273, /* exc_var */ + YYSYMBOL_opt_ensure = 274, /* opt_ensure */ + YYSYMBOL_literal = 275, /* literal */ + YYSYMBOL_string = 276, /* string */ + YYSYMBOL_string_fragment = 277, /* string_fragment */ + YYSYMBOL_string_rep = 278, /* string_rep */ + YYSYMBOL_string_interp = 279, /* string_interp */ + YYSYMBOL_280_34 = 280, /* @34 */ + YYSYMBOL_xstring = 281, /* xstring */ + YYSYMBOL_regexp = 282, /* regexp */ + YYSYMBOL_heredoc = 283, /* heredoc */ + YYSYMBOL_heredoc_bodies = 284, /* heredoc_bodies */ + YYSYMBOL_heredoc_body = 285, /* heredoc_body */ + YYSYMBOL_heredoc_string_rep = 286, /* heredoc_string_rep */ + YYSYMBOL_heredoc_string_interp = 287, /* heredoc_string_interp */ + YYSYMBOL_288_35 = 288, /* @35 */ + YYSYMBOL_words = 289, /* words */ + YYSYMBOL_symbol = 290, /* symbol */ + YYSYMBOL_basic_symbol = 291, /* basic_symbol */ + YYSYMBOL_sym = 292, /* sym */ + YYSYMBOL_symbols = 293, /* symbols */ + YYSYMBOL_numeric = 294, /* numeric */ + YYSYMBOL_variable = 295, /* variable */ + YYSYMBOL_var_lhs = 296, /* var_lhs */ + YYSYMBOL_var_ref = 297, /* var_ref */ + YYSYMBOL_backref = 298, /* backref */ + YYSYMBOL_superclass = 299, /* superclass */ + YYSYMBOL_300_36 = 300, /* $@36 */ + YYSYMBOL_f_opt_arglist_paren = 301, /* f_opt_arglist_paren */ + YYSYMBOL_f_arglist_paren = 302, /* f_arglist_paren */ + YYSYMBOL_f_arglist = 303, /* f_arglist */ + YYSYMBOL_f_label = 304, /* f_label */ + YYSYMBOL_f_kw = 305, /* f_kw */ + YYSYMBOL_f_block_kw = 306, /* f_block_kw */ + YYSYMBOL_f_block_kwarg = 307, /* f_block_kwarg */ + YYSYMBOL_f_kwarg = 308, /* f_kwarg */ + YYSYMBOL_kwrest_mark = 309, /* kwrest_mark */ + YYSYMBOL_f_kwrest = 310, /* f_kwrest */ + YYSYMBOL_args_tail = 311, /* args_tail */ + YYSYMBOL_opt_args_tail = 312, /* opt_args_tail */ + YYSYMBOL_f_args = 313, /* f_args */ + YYSYMBOL_f_bad_arg = 314, /* f_bad_arg */ + YYSYMBOL_f_norm_arg = 315, /* f_norm_arg */ + YYSYMBOL_f_arg_item = 316, /* f_arg_item */ + YYSYMBOL_317_37 = 317, /* @37 */ + YYSYMBOL_f_arg = 318, /* f_arg */ + YYSYMBOL_f_opt_asgn = 319, /* f_opt_asgn */ + YYSYMBOL_f_opt = 320, /* f_opt */ + YYSYMBOL_f_block_opt = 321, /* f_block_opt */ + YYSYMBOL_f_block_optarg = 322, /* f_block_optarg */ + YYSYMBOL_f_optarg = 323, /* f_optarg */ + YYSYMBOL_restarg_mark = 324, /* restarg_mark */ + YYSYMBOL_f_rest_arg = 325, /* f_rest_arg */ + YYSYMBOL_blkarg_mark = 326, /* blkarg_mark */ + YYSYMBOL_f_block_arg = 327, /* f_block_arg */ + YYSYMBOL_opt_f_block_arg = 328, /* opt_f_block_arg */ + YYSYMBOL_singleton = 329, /* singleton */ + YYSYMBOL_330_38 = 330, /* $@38 */ + YYSYMBOL_assoc_list = 331, /* assoc_list */ + YYSYMBOL_assocs = 332, /* assocs */ + YYSYMBOL_assoc = 333, /* assoc */ + YYSYMBOL_operation = 334, /* operation */ + YYSYMBOL_operation2 = 335, /* operation2 */ + YYSYMBOL_operation3 = 336, /* operation3 */ + YYSYMBOL_dot_or_colon = 337, /* dot_or_colon */ + YYSYMBOL_call_op = 338, /* call_op */ + YYSYMBOL_call_op2 = 339, /* call_op2 */ + YYSYMBOL_opt_terms = 340, /* opt_terms */ + YYSYMBOL_opt_nl = 341, /* opt_nl */ + YYSYMBOL_rparen = 342, /* rparen */ + YYSYMBOL_trailer = 343, /* trailer */ + YYSYMBOL_term = 344, /* term */ + YYSYMBOL_nl = 345, /* nl */ + YYSYMBOL_terms = 346, /* terms */ + YYSYMBOL_none = 347 /* none */ +}; +typedef enum yysymbol_kind_t yysymbol_kind_t; + + + #ifdef short # undef short @@ -1736,6 +2665,18 @@ typedef int_least16_t yytype_int16; typedef short yytype_int16; #endif +/* Work around bug in HP-UX 11.23, which defines these macros + incorrectly for preprocessor constants. This workaround can likely + be removed in 2023, as HPE has promised support for HP-UX 11.23 + (aka HP-UX 11i v2) only through the end of 2022; see Table 2 of + . */ +#ifdef __hpux +# undef UINT_LEAST8_MAX +# undef UINT_LEAST16_MAX +# define UINT_LEAST8_MAX 255 +# define UINT_LEAST16_MAX 65535 +#endif + #if defined __UINT_LEAST8_MAX__ && __UINT_LEAST8_MAX__ <= __INT_MAX__ typedef __UINT_LEAST8_TYPE__ yytype_uint8; #elif (!defined __UINT_LEAST8_MAX__ && defined YY_STDINT_H \ @@ -1795,6 +2736,7 @@ typedef int yytype_uint16; #define YYSIZEOF(X) YY_CAST (YYPTRDIFF_T, sizeof (X)) + /* Stored state numbers (used for stacks). */ typedef yytype_int16 yy_state_t; @@ -1813,6 +2755,7 @@ typedef int yy_state_fast_t; # endif #endif + #ifndef YY_ATTRIBUTE_PURE # if defined __GNUC__ && 2 < __GNUC__ + (96 <= __GNUC_MINOR__) # define YY_ATTRIBUTE_PURE __attribute__ ((__pure__)) @@ -1831,17 +2774,23 @@ typedef int yy_state_fast_t; /* Suppress unused-variable warnings by "using" E. */ #if ! defined lint || defined __GNUC__ -# define YYUSE(E) ((void) (E)) +# define YY_USE(E) ((void) (E)) #else -# define YYUSE(E) /* empty */ +# define YY_USE(E) /* empty */ #endif -#if defined __GNUC__ && ! defined __ICC && 407 <= __GNUC__ * 100 + __GNUC_MINOR__ /* Suppress an incorrect diagnostic about yylval being uninitialized. */ -# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ +#if defined __GNUC__ && ! defined __ICC && 406 <= __GNUC__ * 100 + __GNUC_MINOR__ +# if __GNUC__ * 100 + __GNUC_MINOR__ < 407 +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") +# else +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ _Pragma ("GCC diagnostic push") \ _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") \ _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +# endif # define YY_IGNORE_MAYBE_UNINITIALIZED_END \ _Pragma ("GCC diagnostic pop") #else @@ -1870,7 +2819,7 @@ typedef int yy_state_fast_t; #define YY_ASSERT(E) ((void) (0 && (E))) -#if ! defined yyoverflow || YYERROR_VERBOSE +#if 1 /* The parser invokes alloca or malloc; define the necessary symbols. */ @@ -1935,18 +2884,19 @@ void free (void *); /* INFRINGES ON USER NAME SPACE */ # endif # endif # endif -#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ - +#endif /* 1 */ #if (! defined yyoverflow \ && (! defined __cplusplus \ - || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) + || (defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL \ + && defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) /* A type that is properly aligned for any stack member. */ union yyalloc { yy_state_t yyss_alloc; YYSTYPE yyvs_alloc; + YYLTYPE yyls_alloc; }; /* The size of the maximum gap between one aligned stack and the next. */ @@ -1955,8 +2905,9 @@ union yyalloc /* The size of an array large to enough to hold all stacks, each with N elements. */ # define YYSTACK_BYTES(N) \ - ((N) * (YYSIZEOF (yy_state_t) + YYSIZEOF (YYSTYPE)) \ - + YYSTACK_GAP_MAXIMUM) + ((N) * (YYSIZEOF (yy_state_t) + YYSIZEOF (YYSTYPE) \ + + YYSIZEOF (YYLTYPE)) \ + + 2 * YYSTACK_GAP_MAXIMUM) # define YYCOPY_NEEDED 1 @@ -1999,27 +2950,29 @@ union yyalloc #endif /* !YYCOPY_NEEDED */ /* YYFINAL -- State number of the termination state. */ -#define YYFINAL 3 +#define YYFINAL 106 /* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 13092 +#define YYLAST 13819 /* YYNTOKENS -- Number of terminals. */ #define YYNTOKENS 149 /* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 176 +#define YYNNTS 199 /* YYNRULES -- Number of rules. */ -#define YYNRULES 618 +#define YYNRULES 692 /* YYNSTATES -- Number of states. */ -#define YYNSTATES 1084 +#define YYNSTATES 1204 -#define YYUNDEFTOK 2 +/* YYMAXUTOK -- Last valid token kind. */ #define YYMAXUTOK 377 /* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM as returned by yylex, with out-of-bounds checking. */ -#define YYTRANSLATE(YYX) \ - (0 <= (YYX) && (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) +#define YYTRANSLATE(YYX) \ + (0 <= (YYX) && (YYX) <= YYMAXUTOK \ + ? YY_CAST (yysymbol_kind_t, yytranslate[YYX]) \ + : YYSYMBOL_YYUNDEF) /* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM as returned by yylex. */ @@ -2066,1625 +3019,1343 @@ static const yytype_uint8 yytranslate[] = }; #if YYDEBUG - /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ +/* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ static const yytype_int16 yyrline[] = { - 0, 1620, 1620, 1620, 1631, 1637, 1641, 1646, 1650, 1656, - 1658, 1657, 1671, 1698, 1704, 1708, 1713, 1717, 1723, 1723, - 1727, 1731, 1735, 1739, 1743, 1747, 1751, 1756, 1757, 1761, - 1765, 1769, 1773, 1779, 1782, 1786, 1790, 1794, 1798, 1802, - 1807, 1811, 1820, 1829, 1838, 1847, 1854, 1855, 1859, 1863, - 1864, 1868, 1872, 1876, 1880, 1884, 1894, 1893, 1908, 1917, - 1918, 1921, 1922, 1929, 1928, 1943, 1947, 1952, 1956, 1961, - 1965, 1970, 1974, 1978, 1982, 1986, 1992, 1996, 2002, 2003, - 2009, 2013, 2017, 2021, 2025, 2029, 2033, 2037, 2041, 2045, - 2051, 2052, 2058, 2062, 2068, 2072, 2078, 2082, 2086, 2090, - 2094, 2098, 2104, 2110, 2117, 2121, 2125, 2129, 2133, 2137, - 2143, 2149, 2154, 2160, 2164, 2167, 2171, 2175, 2182, 2183, - 2184, 2185, 2190, 2197, 2198, 2201, 2205, 2205, 2211, 2212, - 2213, 2214, 2215, 2216, 2217, 2218, 2219, 2220, 2221, 2222, - 2223, 2224, 2225, 2226, 2227, 2228, 2229, 2230, 2231, 2232, - 2233, 2234, 2235, 2236, 2237, 2238, 2239, 2240, 2243, 2243, - 2243, 2244, 2244, 2245, 2245, 2245, 2246, 2246, 2246, 2246, - 2247, 2247, 2247, 2248, 2248, 2248, 2249, 2249, 2249, 2249, - 2250, 2250, 2250, 2250, 2251, 2251, 2251, 2251, 2252, 2252, - 2252, 2252, 2253, 2253, 2253, 2253, 2254, 2254, 2257, 2261, - 2265, 2269, 2273, 2277, 2281, 2286, 2291, 2296, 2300, 2304, - 2308, 2312, 2316, 2320, 2324, 2328, 2332, 2336, 2340, 2344, - 2348, 2352, 2356, 2360, 2364, 2368, 2372, 2376, 2380, 2384, - 2388, 2392, 2396, 2400, 2404, 2408, 2412, 2416, 2420, 2424, - 2428, 2432, 2436, 2440, 2444, 2453, 2462, 2471, 2480, 2486, - 2487, 2492, 2496, 2503, 2507, 2514, 2518, 2527, 2544, 2545, - 2548, 2549, 2550, 2555, 2560, 2567, 2573, 2578, 2583, 2588, - 2595, 2595, 2606, 2610, 2616, 2620, 2626, 2629, 2635, 2639, - 2644, 2649, 2655, 2660, 2664, 2670, 2671, 2672, 2673, 2674, - 2675, 2676, 2677, 2681, 2686, 2685, 2697, 2701, 2696, 2706, - 2706, 2710, 2714, 2718, 2722, 2727, 2732, 2736, 2740, 2744, - 2748, 2752, 2753, 2759, 2766, 2758, 2779, 2787, 2795, 2795, - 2795, 2802, 2802, 2802, 2809, 2815, 2820, 2822, 2819, 2831, - 2829, 2847, 2852, 2845, 2869, 2867, 2883, 2893, 2904, 2908, - 2912, 2916, 2922, 2929, 2930, 2931, 2934, 2935, 2938, 2939, - 2947, 2948, 2954, 2958, 2961, 2965, 2969, 2973, 2978, 2982, - 2986, 2990, 2996, 2995, 3005, 3009, 3013, 3017, 3023, 3028, - 3033, 3037, 3041, 3045, 3049, 3053, 3057, 3061, 3065, 3069, - 3073, 3077, 3081, 3085, 3089, 3095, 3100, 3107, 3107, 3111, - 3116, 3123, 3127, 3133, 3134, 3137, 3142, 3145, 3149, 3155, - 3159, 3166, 3165, 3180, 3190, 3194, 3199, 3206, 3210, 3214, - 3218, 3222, 3226, 3230, 3234, 3238, 3245, 3244, 3259, 3258, - 3274, 3282, 3291, 3294, 3301, 3304, 3308, 3309, 3312, 3316, - 3319, 3323, 3326, 3327, 3328, 3329, 3332, 3333, 3339, 3340, - 3341, 3345, 3358, 3359, 3365, 3370, 3369, 3379, 3383, 3389, - 3393, 3406, 3410, 3416, 3419, 3420, 3423, 3429, 3435, 3436, - 3439, 3446, 3445, 3458, 3462, 3476, 3481, 3495, 3501, 3502, - 3503, 3504, 3505, 3509, 3515, 3519, 3529, 3530, 3531, 3535, - 3541, 3545, 3549, 3553, 3557, 3563, 3567, 3573, 3577, 3581, - 3585, 3589, 3593, 3601, 3608, 3614, 3615, 3619, 3623, 3622, - 3639, 3640, 3643, 3649, 3653, 3659, 3660, 3664, 3668, 3674, - 3678, 3684, 3690, 3697, 3703, 3710, 3714, 3720, 3724, 3730, - 3731, 3734, 3738, 3744, 3748, 3752, 3756, 3762, 3767, 3772, - 3776, 3780, 3784, 3788, 3792, 3796, 3800, 3804, 3808, 3812, - 3816, 3820, 3824, 3829, 3835, 3840, 3845, 3850, 3855, 3862, - 3866, 3873, 3878, 3877, 3889, 3893, 3899, 3907, 3915, 3923, - 3927, 3933, 3937, 3943, 3944, 3947, 3952, 3959, 3960, 3963, - 3967, 3973, 3977, 3983, 3988, 3988, 4013, 4014, 4020, 4025, - 4031, 4037, 4042, 4046, 4051, 4056, 4066, 4071, 4077, 4078, - 4079, 4082, 4083, 4084, 4085, 4088, 4089, 4090, 4093, 4094, - 4097, 4101, 4107, 4108, 4114, 4115, 4118, 4119, 4122, 4125, - 4126, 4127, 4130, 4131, 4134, 4139, 4142, 4143, 4147 + 0, 2182, 2182, 2182, 2192, 2198, 2202, 2206, 2210, 2216, + 2218, 2217, 2231, 2257, 2263, 2267, 2271, 2275, 2281, 2281, + 2285, 2289, 2293, 2297, 2306, 2315, 2319, 2324, 2325, 2329, + 2333, 2337, 2341, 2344, 2348, 2352, 2356, 2360, 2364, 2369, + 2373, 2382, 2391, 2400, 2409, 2416, 2417, 2421, 2424, 2425, + 2429, 2433, 2437, 2441, 2441, 2447, 2447, 2453, 2456, 2466, + 2465, 2480, 2489, 2490, 2493, 2494, 2501, 2500, 2515, 2519, + 2524, 2528, 2533, 2537, 2542, 2546, 2550, 2554, 2558, 2564, + 2568, 2574, 2575, 2581, 2585, 2589, 2593, 2597, 2601, 2605, + 2609, 2613, 2617, 2623, 2624, 2630, 2634, 2640, 2644, 2650, + 2654, 2658, 2662, 2666, 2670, 2676, 2682, 2689, 2693, 2697, + 2701, 2705, 2709, 2715, 2721, 2726, 2732, 2736, 2739, 2743, + 2747, 2754, 2755, 2756, 2757, 2762, 2769, 2770, 2773, 2777, + 2777, 2783, 2784, 2785, 2786, 2787, 2788, 2789, 2790, 2791, + 2792, 2793, 2794, 2795, 2796, 2797, 2798, 2799, 2800, 2801, + 2802, 2803, 2804, 2805, 2806, 2807, 2808, 2809, 2810, 2811, + 2812, 2815, 2815, 2815, 2816, 2816, 2817, 2817, 2817, 2818, + 2818, 2818, 2818, 2819, 2819, 2819, 2820, 2820, 2820, 2821, + 2821, 2821, 2821, 2822, 2822, 2822, 2822, 2823, 2823, 2823, + 2823, 2824, 2824, 2824, 2824, 2825, 2825, 2825, 2825, 2826, + 2826, 2829, 2833, 2837, 2841, 2845, 2849, 2853, 2858, 2863, + 2868, 2872, 2876, 2880, 2884, 2888, 2892, 2896, 2900, 2904, + 2908, 2912, 2916, 2920, 2924, 2928, 2932, 2936, 2940, 2944, + 2948, 2952, 2956, 2960, 2964, 2968, 2972, 2976, 2980, 2984, + 2988, 2992, 2996, 3000, 3004, 3008, 3012, 3016, 3025, 3034, + 3043, 3052, 3058, 3059, 3063, 3067, 3073, 3077, 3084, 3088, + 3097, 3114, 3115, 3118, 3119, 3120, 3124, 3128, 3134, 3139, + 3143, 3147, 3151, 3157, 3157, 3168, 3172, 3178, 3182, 3188, + 3191, 3196, 3200, 3204, 3209, 3213, 3219, 3224, 3228, 3234, + 3235, 3239, 3243, 3244, 3245, 3246, 3247, 3252, 3251, 3263, + 3267, 3262, 3272, 3272, 3276, 3280, 3284, 3288, 3292, 3296, + 3300, 3304, 3308, 3312, 3316, 3317, 3323, 3330, 3322, 3343, + 3351, 3359, 3359, 3359, 3366, 3366, 3366, 3373, 3379, 3383, + 3392, 3401, 3411, 3413, 3410, 3422, 3420, 3438, 3443, 3436, + 3460, 3458, 3474, 3484, 3495, 3499, 3503, 3507, 3513, 3520, + 3521, 3522, 3525, 3526, 3529, 3530, 3538, 3539, 3545, 3549, + 3552, 3556, 3560, 3564, 3569, 3573, 3577, 3581, 3587, 3586, + 3596, 3600, 3604, 3608, 3614, 3619, 3624, 3628, 3632, 3636, + 3640, 3644, 3648, 3652, 3656, 3660, 3664, 3668, 3672, 3676, + 3680, 3686, 3691, 3698, 3698, 3702, 3707, 3713, 3717, 3723, + 3724, 3727, 3732, 3735, 3739, 3745, 3749, 3756, 3755, 3772, + 3777, 3781, 3786, 3793, 3797, 3801, 3805, 3809, 3813, 3817, + 3821, 3825, 3832, 3831, 3846, 3845, 3861, 3869, 3878, 3883, + 3887, 3887, 3892, 3892, 3897, 3897, 3907, 3908, 3912, 3916, + 3920, 3924, 3928, 3933, 3938, 3946, 3950, 3957, 3961, 3967, + 3968, 3974, 3975, 3981, 3982, 3986, 3990, 3994, 3998, 4002, + 4006, 4010, 4011, 4012, 4019, 4023, 4030, 4035, 4040, 4045, + 4050, 4055, 4063, 4067, 4074, 4078, 4086, 4090, 4094, 4101, + 4105, 4112, 4116, 4120, 4127, 4131, 4140, 4145, 4153, 4157, + 4162, 4169, 4175, 4182, 4185, 4189, 4190, 4193, 4197, 4200, + 4204, 4207, 4208, 4209, 4210, 4213, 4214, 4220, 4225, 4230, + 4235, 4241, 4242, 4248, 4254, 4253, 4265, 4269, 4275, 4279, + 4285, 4294, 4305, 4308, 4309, 4312, 4318, 4324, 4325, 4328, + 4335, 4334, 4349, 4353, 4361, 4365, 4377, 4384, 4391, 4392, + 4393, 4394, 4395, 4399, 4405, 4409, 4417, 4418, 4419, 4423, + 4429, 4433, 4437, 4441, 4445, 4451, 4455, 4461, 4465, 4469, + 4473, 4477, 4481, 4485, 4493, 4500, 4506, 4507, 4511, 4515, + 4514, 4531, 4532, 4535, 4541, 4545, 4551, 4552, 4556, 4560, + 4566, 4572, 4580, 4586, 4593, 4599, 4606, 4610, 4616, 4620, + 4626, 4627, 4630, 4634, 4640, 4644, 4648, 4652, 4658, 4662, + 4667, 4672, 4676, 4680, 4684, 4688, 4692, 4696, 4700, 4704, + 4708, 4712, 4716, 4720, 4724, 4729, 4735, 4740, 4745, 4750, + 4755, 4762, 4766, 4773, 4778, 4777, 4789, 4793, 4799, 4807, + 4815, 4823, 4827, 4833, 4837, 4843, 4844, 4847, 4852, 4859, + 4860, 4863, 4867, 4871, 4877, 4881, 4885, 4891, 4897, 4897, + 4904, 4905, 4911, 4915, 4921, 4927, 4932, 4936, 4941, 4946, + 4962, 4967, 4973, 4974, 4975, 4978, 4979, 4980, 4981, 4984, + 4985, 4986, 4989, 4990, 4993, 4997, 5003, 5004, 5010, 5011, + 5014, 5015, 5018, 5021, 5022, 5023, 5026, 5027, 5030, 5035, + 5038, 5039, 5043 }; #endif -#if YYDEBUG || YYERROR_VERBOSE || 1 +/** Accessing symbol of state STATE. */ +#define YY_ACCESSING_SYMBOL(State) YY_CAST (yysymbol_kind_t, yystos[State]) + +#if 1 +/* The user-facing name of the symbol whose (internal) number is + YYSYMBOL. No bounds checking. */ +static const char *yysymbol_name (yysymbol_kind_t yysymbol) YY_ATTRIBUTE_UNUSED; + /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. First, the terminals, then, starting at YYNTOKENS, nonterminals. */ static const char *const yytname[] = { - "$end", "error", "$undefined", "keyword_class", "keyword_module", - "keyword_def", "keyword_begin", "keyword_if", "keyword_unless", - "keyword_while", "keyword_until", "keyword_for", "keyword_undef", - "keyword_rescue", "keyword_ensure", "keyword_end", "keyword_then", - "keyword_elsif", "keyword_else", "keyword_case", "keyword_when", - "keyword_break", "keyword_next", "keyword_redo", "keyword_retry", - "keyword_in", "keyword_do", "keyword_do_cond", "keyword_do_block", - "keyword_do_LAMBDA", "keyword_return", "keyword_yield", "keyword_super", - "keyword_self", "keyword_nil", "keyword_true", "keyword_false", - "keyword_and", "keyword_or", "keyword_not", "modifier_if", - "modifier_unless", "modifier_while", "modifier_until", "modifier_rescue", - "keyword_alias", "keyword_BEGIN", "keyword_END", "keyword__LINE__", - "keyword__FILE__", "keyword__ENCODING__", "\"local variable or method\"", - "\"method\"", "\"global variable\"", "\"instance variable\"", - "\"constant\"", "\"class variable\"", "\"label\"", "\"integer literal\"", - "\"float literal\"", "\"character literal\"", "tXSTRING", "tREGEXP", - "tSTRING", "tSTRING_PART", "tSTRING_MID", "tNTH_REF", "tBACK_REF", - "tREGEXP_END", "\"numbered parameter\"", "\"unary plus\"", - "\"unary minus\"", "\"<=>\"", "\"==\"", "\"===\"", "\"!=\"", "\">=\"", - "\"<=\"", "\"&&\"", "\"||\"", "\"=~\"", "\"!~\"", "\"..\"", "\"...\"", - "tBDOT2", "tBDOT3", "tAREF", "tASET", "\"<<\"", "\">>\"", "\"::\"", - "tCOLON3", "tOP_ASGN", "\"=>\"", "tLPAREN", "\"(\"", "\")\"", "\"[\"", - "tLBRACE", "\"{\"", "\"*\"", "tPOW", "\"**\"", "\"&\"", "\"->\"", - "\"&.\"", "\"symbol\"", "\"string literal\"", "tXSTRING_BEG", - "tSTRING_DVAR", "tREGEXP_BEG", "tWORDS_BEG", "tSYMBOLS_BEG", "tLAMBEG", - "\"here document\"", "tHEREDOC_END", "tLITERAL_DELIM", - "tHD_LITERAL_DELIM", "tHD_STRING_PART", "tHD_STRING_MID", "tLOWEST", - "'='", "'?'", "':'", "'>'", "'<'", "'|'", "'^'", "'&'", "'+'", "'-'", - "'*'", "'/'", "'%'", "tUMINUS_NUM", "'!'", "'~'", "tLAST_TOKEN", "'{'", - "'}'", "'['", "']'", "','", "'`'", "'('", "')'", "';'", "'.'", "'\\n'", - "$accept", "program", "$@1", "top_compstmt", "top_stmts", "top_stmt", - "@2", "bodystmt", "compstmt", "stmts", "stmt", "$@3", "command_asgn", - "command_rhs", "expr", "defn_head", "defs_head", "$@4", "expr_value", - "command_call", "block_command", "cmd_brace_block", "$@5", "command", - "mlhs", "mlhs_inner", "mlhs_basic", "mlhs_item", "mlhs_list", - "mlhs_post", "mlhs_node", "lhs", "cname", "cpath", "fname", "fsym", - "undef_list", "$@6", "op", "reswords", "arg", "aref_args", "arg_rhs", - "paren_args", "opt_paren_args", "opt_call_args", "call_args", - "command_args", "@7", "block_arg", "opt_block_arg", "comma", "args", - "mrhs", "primary", "@8", "@9", "$@10", "$@11", "@12", "@13", "$@14", - "$@15", "$@16", "$@17", "$@18", "$@19", "@20", "@21", "@22", "@23", - "primary_value", "then", "do", "if_tail", "opt_else", "for_var", - "f_margs", "$@24", "block_args_tail", "opt_block_args_tail", - "block_param", "opt_block_param", "block_param_def", "$@25", - "opt_bv_decl", "bv_decls", "bvar", "f_larglist", "lambda_body", - "do_block", "$@26", "block_call", "method_call", "brace_block", "@27", - "@28", "case_body", "cases", "opt_rescue", "exc_list", "exc_var", - "opt_ensure", "literal", "string", "string_fragment", "string_rep", - "string_interp", "@29", "xstring", "regexp", "heredoc", "heredoc_bodies", - "heredoc_body", "heredoc_string_rep", "heredoc_string_interp", "@30", - "words", "symbol", "basic_symbol", "sym", "symbols", "numeric", - "variable", "var_lhs", "var_ref", "backref", "superclass", "$@31", - "f_opt_arglist_paren", "f_arglist_paren", "f_arglist", "f_label", "f_kw", - "f_block_kw", "f_block_kwarg", "f_kwarg", "kwrest_mark", "f_kwrest", - "args_tail", "opt_args_tail", "f_args", "f_bad_arg", "f_norm_arg", - "f_arg_item", "@32", "f_arg", "f_opt_asgn", "f_opt", "f_block_opt", - "f_block_optarg", "f_optarg", "restarg_mark", "f_rest_arg", - "blkarg_mark", "f_block_arg", "opt_f_block_arg", "singleton", "$@33", - "assoc_list", "assocs", "assoc", "operation", "operation2", "operation3", - "dot_or_colon", "call_op", "call_op2", "opt_terms", "opt_nl", "rparen", - "trailer", "term", "nl", "terms", "none", YY_NULLPTR + "\"end of file\"", "error", "\"invalid token\"", "\"'class'\"", + "\"'module'\"", "\"'def'\"", "\"'begin'\"", "\"'if'\"", "\"'unless'\"", + "\"'while'\"", "\"'until'\"", "\"'for'\"", "\"'undef'\"", "\"'rescue'\"", + "\"'ensure'\"", "\"'end'\"", "\"'then'\"", "\"'elsif'\"", "\"'else'\"", + "\"'case'\"", "\"'when'\"", "\"'break'\"", "\"'next'\"", "\"'redo'\"", + "\"'retry'\"", "\"'in'\"", "\"'do'\"", "\"'do' for condition\"", + "\"'do' for block\"", "\"'do' for lambda\"", "\"'return'\"", + "\"'yield'\"", "\"'super'\"", "\"'self'\"", "\"'nil'\"", "\"'true'\"", + "\"'false'\"", "\"'and'\"", "\"'or'\"", "\"'not'\"", "\"'if' modifier\"", + "\"'unless' modifier\"", "\"'while' modifier\"", "\"'until' modifier\"", + "\"'rescue' modifier\"", "\"'alias'\"", "\"'BEGIN'\"", "\"'END'\"", + "\"'__LINE__'\"", "\"'__FILE__'\"", "\"'__ENCODING__'\"", + "\"local variable or method\"", "\"method\"", "\"global variable\"", + "\"instance variable\"", "\"constant\"", "\"class variable\"", + "\"label\"", "\"integer literal\"", "\"float literal\"", + "\"character literal\"", "tXSTRING", "tREGEXP", "tSTRING", + "tSTRING_PART", "tSTRING_MID", "tNTH_REF", "tBACK_REF", "tREGEXP_END", + "\"numbered parameter\"", "\"unary plus\"", "\"unary minus\"", "\"<=>\"", + "\"==\"", "\"===\"", "\"!=\"", "\">=\"", "\"<=\"", "\"&&\"", "\"||\"", + "\"=~\"", "\"!~\"", "\"..\"", "\"...\"", "tBDOT2", "tBDOT3", "tAREF", + "tASET", "\"<<\"", "\">>\"", "\"::\"", "tCOLON3", "tOP_ASGN", "\"=>\"", + "tLPAREN", "\"(\"", "\")\"", "\"[\"", "tLBRACE", "\"{\"", "\"*\"", + "tPOW", "\"**\"", "\"&\"", "\"->\"", "\"&.\"", "\"symbol\"", + "\"string literal\"", "tXSTRING_BEG", "tSTRING_DVAR", "tREGEXP_BEG", + "tWORDS_BEG", "tSYMBOLS_BEG", "tLAMBEG", "\"here document\"", + "tHEREDOC_END", "tLITERAL_DELIM", "tHD_LITERAL_DELIM", "tHD_STRING_PART", + "tHD_STRING_MID", "tLOWEST", "'='", "'?'", "':'", "'>'", "'<'", "'|'", + "'^'", "'&'", "'+'", "'-'", "'*'", "'/'", "'%'", "tUMINUS_NUM", "'!'", + "'~'", "tLAST_TOKEN", "'{'", "'}'", "'['", "']'", "','", "'`'", "'('", + "')'", "';'", "'.'", "'\\n'", "$accept", "$@1", "program", + "top_compstmt", "top_stmts", "top_stmt", "@2", "bodystmt", "compstmt", + "stmts", "$@3", "stmt", "command_asgn", "command_rhs", "expr", "$@4", + "$@5", "defn_head", "$@6", "defs_head", "expr_value", "command_call", + "block_command", "$@7", "cmd_brace_block", "command", "mlhs", + "mlhs_inner", "mlhs_basic", "mlhs_item", "mlhs_list", "mlhs_post", + "mlhs_node", "lhs", "cname", "cpath", "fname", "fsym", "undef_list", + "$@8", "op", "reswords", "arg", "aref_args", "arg_rhs", "paren_args", + "opt_paren_args", "opt_call_args", "call_args", "@9", "command_args", + "block_arg", "opt_block_arg", "comma", "args", "mrhs", "primary", "@10", + "@11", "$@12", "$@13", "@14", "@15", "$@16", "$@17", "$@18", "$@19", + "$@20", "$@21", "@22", "@23", "@24", "@25", "primary_value", "then", + "do", "if_tail", "opt_else", "for_var", "f_margs", "$@26", + "block_args_tail", "opt_block_args_tail", "block_param", + "opt_block_param", "$@27", "block_param_def", "opt_bv_decl", "bv_decls", + "bvar", "f_larglist", "lambda_body", "@28", "do_block", "block_call", + "method_call", "@29", "brace_block", "@30", "case_body", "cases", + "in_clauses", "$@31", "$@32", "$@33", "p_expr", "p_args_head", + "p_args_post", "p_as", "p_alt", "p_value", "p_array", "p_array_body", + "p_array_elems", "p_rest", "p_const", "p_hash", "p_hash_body", + "p_hash_elems", "p_hash_elem", "p_kwrest", "p_var", "opt_rescue", + "exc_list", "exc_var", "opt_ensure", "literal", "string", + "string_fragment", "string_rep", "string_interp", "@34", "xstring", + "regexp", "heredoc", "heredoc_bodies", "heredoc_body", + "heredoc_string_rep", "heredoc_string_interp", "@35", "words", "symbol", + "basic_symbol", "sym", "symbols", "numeric", "variable", "var_lhs", + "var_ref", "backref", "superclass", "$@36", "f_opt_arglist_paren", + "f_arglist_paren", "f_arglist", "f_label", "f_kw", "f_block_kw", + "f_block_kwarg", "f_kwarg", "kwrest_mark", "f_kwrest", "args_tail", + "opt_args_tail", "f_args", "f_bad_arg", "f_norm_arg", "f_arg_item", + "@37", "f_arg", "f_opt_asgn", "f_opt", "f_block_opt", "f_block_optarg", + "f_optarg", "restarg_mark", "f_rest_arg", "blkarg_mark", "f_block_arg", + "opt_f_block_arg", "singleton", "$@38", "assoc_list", "assocs", "assoc", + "operation", "operation2", "operation3", "dot_or_colon", "call_op", + "call_op2", "opt_terms", "opt_nl", "rparen", "trailer", "term", "nl", + "terms", "none", YY_NULLPTR }; -#endif -# ifdef YYPRINT -/* YYTOKNUM[NUM] -- (External) token number corresponding to the - (internal) symbol number NUM (which must be that of a token). */ -static const yytype_int16 yytoknum[] = -{ - 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, - 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, - 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, - 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, - 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, - 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, - 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, - 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, - 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, - 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, - 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, - 375, 61, 63, 58, 62, 60, 124, 94, 38, 43, - 45, 42, 47, 37, 376, 33, 126, 377, 123, 125, - 91, 93, 44, 96, 40, 41, 59, 46, 10 -}; -# endif +static const char * +yysymbol_name (yysymbol_kind_t yysymbol) +{ + return yytname[yysymbol]; +} +#endif -#define YYPACT_NINF (-868) +#define YYPACT_NINF (-981) #define yypact_value_is_default(Yyn) \ ((Yyn) == YYPACT_NINF) -#define YYTABLE_NINF (-619) +#define YYTABLE_NINF (-693) #define yytable_value_is_error(Yyn) \ ((Yyn) == YYTABLE_NINF) - /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing - STATE-NUM. */ +/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ static const yytype_int16 yypact[] = { - -868, 115, 3515, -868, 8231, 10355, 10697, 6539, -868, 10001, - 10001, -868, -868, 10469, 7721, 6155, 8467, 8467, -868, -868, - 8467, 4172, 3764, -868, -868, -868, -868, 18, 7721, -868, - 38, -868, -868, -868, 6681, 3628, -868, -868, 6823, -868, - -868, -868, -868, -868, -868, -868, 45, 10119, 10119, 10119, - 10119, 217, 5414, 792, 8939, 9293, 8003, -868, 7439, 1134, - 894, 67, 1139, 1203, -868, 94, 10237, 10119, -868, 780, - -868, 1204, -868, 529, 1741, 1741, -868, -868, 271, 173, - -868, 184, 10583, -868, 272, 12773, 579, 668, 228, 75, - -868, 381, -868, -868, -868, -868, -868, -868, -868, -868, - -868, 48, 278, -868, 300, 129, -868, -868, -868, -868, - -868, 243, 243, 18, 110, 646, -868, 10001, 360, 5533, - 517, 1837, 1837, -868, 279, -868, 756, -868, -868, 129, - -868, -868, -868, -868, -868, -868, -868, -868, -868, -868, - -868, -868, -868, -868, -868, -868, -868, -868, -868, -868, - -868, -868, -868, -868, -868, -868, -868, -868, 33, 89, - 108, 109, -868, -868, -868, -868, -868, -868, 117, 154, - 192, 199, -868, 208, -868, -868, -868, -868, -868, -868, - -868, -868, -868, -868, -868, -868, -868, -868, -868, -868, - -868, -868, -868, -868, -868, -868, -868, -868, -868, -868, - -868, -868, -868, -868, -868, -868, -868, -868, -868, 290, - 4592, 354, 529, 1741, 1741, 77, 305, 12897, 801, 159, - 347, 193, 77, 10001, 10001, 866, 397, -868, -868, 890, - 433, 78, 85, -868, -868, -868, -868, -868, -868, -868, - -868, -868, 7580, -868, -868, 320, -868, -868, -868, -868, - -868, -868, 780, -868, 944, -868, 444, -868, -868, 780, - 3900, 124, 10119, 10119, 10119, 10119, -868, 12835, -868, -868, - 324, 428, 324, -868, -868, -868, 8585, -868, -868, -868, - 8467, -868, -868, -868, 6155, 6393, -868, 367, 5652, -868, - 974, 391, 12959, 12959, 355, 8349, 5414, 378, 780, 1204, - 780, 416, -868, 8349, 780, 408, 714, 714, -868, 12835, - 423, 714, -868, 500, 10811, 429, 988, 999, 1035, 1982, - -868, -868, -868, -868, 1241, -868, -868, -868, -868, -868, - -868, 981, 1248, -868, -868, 1126, -868, 1051, -868, 1252, - -868, 1265, 480, 482, -868, -868, -868, -868, 5917, 10001, - 10001, 10001, 10001, 8349, 10001, 10001, 93, -868, -868, -868, - -868, 536, 780, -868, -868, -868, -868, -868, -868, -868, - 2113, 475, 479, 4592, 10119, -868, 463, 563, 476, -868, - 780, -868, -868, -868, 483, 10119, -868, 487, 576, 489, - 581, -868, -868, 521, 4592, -868, -868, 9411, -868, 5414, - 8117, 503, 9411, 10119, 10119, 10119, 10119, 10119, 10119, 10119, - 10119, 10119, 10119, 10119, 10119, 10119, 10119, 592, 10119, 10119, - 10119, 10119, 10119, 10119, 10119, 10119, 10119, 10119, 10119, 10119, - 3310, -868, 8467, -868, 11089, -868, -868, 12293, -868, -868, - -868, -868, 10237, 10237, -868, 550, -868, 529, -868, 1036, - -868, -868, -868, -868, -868, -868, 11175, 8467, 11261, 4592, - 10001, -868, -868, -868, 636, 642, 210, 538, 540, -868, - 4738, 659, 10119, 11347, 8467, 11433, 10119, 10119, 5030, 620, - 620, 134, 11519, 8467, 11605, -868, 616, -868, 5652, 444, - -868, -868, 9529, 665, -868, 10119, 10119, 12897, 12897, 12897, - 10119, -868, -868, 8703, -868, 10119, -868, 9057, 6274, 546, - 780, 324, 324, -868, -868, 313, 547, -868, -868, -868, - 7721, 5149, 557, 11347, 11433, 10119, 1204, 780, -868, -868, - 6036, 555, 1204, -868, -868, 9175, -868, 780, 9293, -868, - -868, -868, 1036, 184, 10811, -868, 10811, 11691, 8467, 11777, - 2314, -868, -868, 561, -868, 1290, 5652, 981, -868, -868, - -868, -868, -868, -868, -868, 10119, 10119, -868, -868, -868, - -868, -868, -868, -868, -868, -868, -868, -868, -868, 1428, - 780, 780, 564, 10237, 699, 12897, 266, -868, -868, -868, - 263, -868, -868, 2562, -868, 12897, 2314, -868, -868, 2042, - -868, -868, 10237, 701, 65, 10119, -868, 12489, 324, -868, - 780, 10811, 566, -868, -868, -868, 674, 591, 2410, -868, - -868, 1037, 220, 3100, 3100, 3100, 3100, 1377, 1377, 3375, - 2808, 3100, 3100, 12959, 12959, 1279, 1279, -868, 391, 12897, - 1377, 1377, 1483, 1483, 1288, 383, 383, 391, 391, 391, - 2930, 7179, 4308, 7297, -868, 243, -868, 584, 324, 452, - -868, 516, -868, -868, 4036, -868, -868, 1908, 65, 65, - -868, 3027, -868, -868, -868, -868, -868, 780, 10001, 4592, - 697, 530, -868, 243, 587, 243, 707, 313, 7862, -868, - 9647, 715, -868, 10119, 10119, 572, -868, 6941, 7060, 596, - 223, 285, 715, -868, -868, -868, -868, 116, 121, 603, - 135, 136, 10001, 7721, 609, 734, 12897, 206, -868, 12897, - 12897, 12897, 281, 10119, 12835, -868, 324, 12897, -868, -868, - -868, -868, 8821, 9057, -868, -868, -868, 611, -868, -868, - 211, 1204, 780, 714, 503, -868, 697, 530, 613, 860, - 904, -868, 36, 2314, -868, 618, -868, 391, 391, -868, - -868, 812, 780, 619, -868, -868, 2628, 711, 12365, -868, - 706, 536, -868, 476, -868, 780, -868, -868, 623, 627, - 628, -868, 630, 706, 628, 731, 12427, -868, -868, 2314, - 4592, -868, -868, 12560, 9765, -868, -868, 10811, 8349, 10237, - 10119, 11863, 8467, 11949, 413, 10237, 10237, -868, 550, 560, - 8703, 10237, 10237, -868, 550, 75, 271, 4592, 5652, 65, - -868, 780, 766, -868, -868, -868, -868, 12489, -868, 689, - -868, 5295, 771, -868, 10001, 774, -868, 10119, 10119, 333, - 10119, 10119, 776, 5798, 5798, 148, 620, -868, -868, -868, - 9883, 4884, 12897, -868, 6274, 324, -868, -868, -868, 88, - 647, 990, 4592, 5652, -868, -868, -868, 653, -868, 1554, - 780, 10119, 10119, -868, -868, 2314, -868, 2042, -868, 2042, - -868, 2042, -868, -868, 10119, 10119, -868, -868, -868, 10925, - -868, 655, 476, 657, 10925, -868, 661, 667, -868, 786, - 10119, 12631, -868, -868, 12897, 3179, 4444, 670, 406, 426, - 10119, 10119, -868, -868, -868, -868, -868, 10237, -868, -868, - -868, -868, -868, -868, -868, 803, 677, 5652, 4592, -868, - -868, 11039, 77, -868, -868, 5798, -868, -868, 77, -868, - 10119, -868, 808, 809, -868, 12897, 219, -868, 9057, -868, - 1629, 813, 682, 1445, 1445, 1019, -868, 12897, 12897, 628, - 694, 628, 628, 12897, 12897, 705, 712, 788, 1054, 266, - -868, -868, 1801, -868, 1054, 2314, -868, 2042, -868, -868, - 12702, 432, 12897, 12897, -868, -868, -868, -868, 718, 830, - 795, -868, 1083, 999, 1035, 4592, -868, 4738, -868, -868, - 5798, -868, -868, -868, -868, 722, -868, -868, -868, -868, - 727, 727, 1445, 733, -868, 2042, -868, -868, -868, -868, - -868, -868, 12035, -868, 476, 266, -868, -868, 736, 746, - 750, -868, 751, 750, -868, -868, 1036, 12121, 8467, 12207, - 642, 572, 879, 1629, 281, 1445, 727, 1445, 628, 757, - 761, -868, 2314, -868, 2042, -868, 2042, -868, 2042, -868, - -868, 697, 530, 772, 201, 467, -868, -868, -868, -868, - 727, -868, 750, 781, 750, 750, 88, -868, 2042, -868, - -868, -868, 750, -868 + -981, 3710, 87, 8854, 10978, 11320, 7162, -981, 10624, 10624, + -981, -981, 11092, 8344, 6778, 9090, 9090, -981, -981, 9090, + 4231, 3370, -981, -981, -981, -981, -46, 8344, -981, 23, + -981, -981, -981, 7304, 3823, -981, -981, 7446, -981, -981, + -981, -981, -981, -981, -981, 232, 10742, 10742, 10742, 10742, + 111, 5891, 6010, 9562, 9916, 8626, -981, 8062, 1248, 1022, + 365, 1325, 1340, -981, 259, 10860, 10742, -981, 985, -981, + 834, -981, 507, 2969, 2969, -981, -981, 85, 76, -981, + -3, 11206, -981, 78, 3465, 631, 667, 143, 45, -981, + 209, -981, -981, -981, -981, -981, -981, -981, -981, -981, + 214, 109, -981, 279, 66, -981, -981, -981, -981, -981, + -981, 88, 88, -46, 498, 748, -981, 10624, 389, 6129, + 447, 2225, 2225, -981, 114, -981, 701, -981, -981, 66, + -981, -981, -981, -981, -981, -981, -981, -981, -981, -981, + -981, -981, -981, -981, -981, -981, -981, -981, -981, -981, + -981, -981, -981, -981, -981, -981, -981, -981, 77, 82, + 91, 130, -981, -981, -981, -981, -981, -981, 155, 156, + 165, 229, -981, 233, -981, -981, -981, -981, -981, -981, + -981, -981, -981, -981, -981, -981, -981, -981, -981, -981, + -981, -981, -981, -981, -981, -981, -981, -981, -981, -981, + -981, -981, -981, -981, -981, -981, -981, -981, -981, 248, + 4923, 202, 507, 2969, 2969, 179, 210, 823, 244, 158, + 263, 179, 10624, 10624, 855, 287, -981, -981, 898, 309, + 58, 89, -981, -981, -981, -981, -981, -981, -981, -981, + -981, 8203, -981, -981, 237, -981, -981, -981, -981, -981, + -981, 985, -981, 549, -981, 384, -981, -981, 985, 3959, + 106, 10742, 10742, 10742, 10742, -981, 13544, -981, -981, 318, + 407, 318, -981, -981, -981, 9208, -981, -981, 9090, -981, + -981, -981, -981, 6778, 7016, -981, 334, 6248, -981, 929, + 379, 13668, 13668, 291, 8972, 5891, 341, 1458, 834, 985, + 402, -981, 6129, 985, 359, 1229, 1229, -981, 13544, 400, + 1229, -981, 491, 11434, 408, 934, 950, 952, 2307, -981, + -981, -981, -981, -981, 1353, -981, -981, -981, -981, -981, + -981, 576, 1398, -981, -981, 1181, -981, 892, -981, 1401, + -981, 1416, 452, 458, -981, -981, -981, -981, 6659, 10624, + 10624, 10624, 10624, 8972, 10624, 10624, 54, -981, -981, -981, + -981, 515, 985, -981, -981, -981, -981, -981, -981, -981, + 2406, 462, 470, 4923, 10742, -981, 455, 553, 465, -981, + 985, -981, -981, -981, 480, 10742, -981, 492, 586, 504, + 51, -981, -981, 479, 4923, -981, -981, 10034, -981, 5891, + 8740, 511, 10034, -981, 10742, 10742, 10742, 10742, 10742, 10742, + 10742, 10742, 10742, 10742, 10742, 10742, 10742, 10742, -981, 10742, + 10742, 10742, 10742, 10742, 10742, 10742, 10742, 10742, 10742, 10742, + 10742, 11712, -981, 9090, -981, 11798, -981, -981, 13002, -981, + -981, -981, -981, 10860, 10860, -981, 541, -981, 507, -981, + 1063, -981, -981, -981, -981, -981, -981, 11884, 9090, 11970, + 4923, 10624, -981, -981, -981, 650, 649, 286, 556, 562, + -981, 5069, 673, 10742, 12056, 9090, 12142, 10742, 10742, 5507, + 327, 327, 105, 12228, 9090, 12314, -981, 638, -981, 6248, + 381, -981, -981, 10152, 697, -981, 10742, 10742, 13606, 13606, + 13606, 10742, -981, -981, 9326, -981, 10742, -981, 9680, 6897, + 571, 985, 318, 318, -981, -981, 943, 587, -981, -981, + -981, 8344, 5626, 598, 12056, 12142, 10742, 834, 985, -981, + -981, 6540, 600, -981, -981, -981, 9798, -981, 985, 9916, + -981, -981, -981, 1063, -3, 11434, -981, 11434, 12400, 9090, + 12486, 2459, -981, -981, 602, -981, 1447, 6248, 576, -981, + -981, -981, -981, -981, -981, -981, 10742, 10742, -981, -981, + -981, -981, -981, -981, -981, -981, -981, -981, -981, -981, + 1453, 985, 985, 604, 10860, 737, 13606, 578, -981, -981, + -981, 311, -981, -981, 2734, -981, 13606, 2459, -981, -981, + 1251, -981, -981, -981, 10860, 754, 95, 10742, -981, 13260, + 318, -981, 985, 11434, 628, -981, -981, -981, 729, 654, + 1535, -981, -981, 1076, 290, 2835, 3556, 3556, 3556, 3556, + 1656, 1656, 13686, 3121, 3556, 3556, 13668, 13668, 337, 337, + 2835, 379, 13606, 1656, 1656, 1702, 1702, 1587, 542, 542, + 379, 379, 379, 4367, 7802, 4639, 7920, -981, 88, -981, + 636, 318, 310, -981, 342, -981, -981, 4095, -981, -981, + 2566, 95, 95, -981, 13074, -981, -981, -981, -981, -981, + 985, 10624, 4923, 759, 161, -981, 88, 640, 88, 767, + 943, 8485, -981, 10270, 765, -981, 10742, 10742, 597, -981, + 7564, 7683, 645, 469, 474, 765, -981, -981, -981, -981, + 63, 84, 646, 123, 129, 10624, 8344, 655, 2835, 773, + 13606, 195, -981, 13606, 13606, 13606, 857, 10742, 13544, -981, + 318, 13606, -981, -981, -981, -981, 9444, 9680, -981, -981, + -981, 656, -981, -981, 190, 834, 985, 1229, 511, -981, + 759, 161, 648, 764, 811, -981, 122, 2459, -981, 657, + -981, 379, 379, -981, -981, 336, 985, 658, -981, -981, + 2754, 753, 13136, -981, 751, 515, -981, 465, -981, 985, + -981, -981, 668, 671, 678, -981, 689, 751, 678, 780, + 13198, -981, -981, 2459, 4923, -981, -981, 13331, 10388, -981, + -981, 11434, 8972, 10860, 10742, 12572, 9090, 12658, -981, -981, + -981, 752, -981, 777, 2044, 113, 782, 191, 783, -981, + 2438, 700, 194, -981, -981, 705, 761, -981, 710, -981, + -981, -981, 209, -981, -981, -981, 1139, 10860, 10860, -981, + 541, 468, 9326, 10860, 10860, -981, 541, 45, 85, 4923, + 6248, 95, -981, 985, 840, -981, -981, -981, -981, 13260, + -981, 769, -981, 5772, 844, -981, 10624, 850, -981, 10742, + 10742, 488, 10742, 10742, 853, 6394, 6394, 154, 327, -981, + -981, 536, -981, 10506, 5215, 13606, -981, 6897, 318, -981, + -981, -981, 615, 727, 1002, 4923, 6248, -981, -981, -981, + 728, -981, 1600, 985, 10742, 10742, -981, -981, 2459, -981, + 1251, -981, 1251, -981, 1251, -981, -981, 10742, 10742, -981, + -981, -981, 11548, -981, 747, 465, 749, 11548, -981, 756, + 758, -981, 879, 10742, 13402, -981, -981, 13606, 4503, 4775, + 755, 514, 529, 2921, -981, -981, -981, -981, 771, 768, + 775, 752, -981, 776, 778, -981, -981, -981, -981, -981, + 789, 790, -981, 842, 2921, 2921, 852, 135, 10742, 10742, + -981, -981, -981, -981, -981, 10860, -981, -981, -981, -981, + -981, -981, -981, 903, 786, 6248, 4923, -981, -981, 11662, + 179, -981, -981, 6394, -981, -981, 179, -981, 10742, -981, + 919, 921, -981, 10624, 10624, 5361, 13606, 81, -981, 9680, + -981, 1341, 922, 799, 1642, 1642, 977, -981, 13606, 13606, + 678, 798, 678, 678, 13606, 13606, 815, 826, 907, 1097, + 578, -981, -981, 2165, -981, 1097, 2459, -981, 1251, -981, + -981, 13473, 564, -981, -981, 2438, 2921, -981, 135, -981, + 2921, -981, -981, 824, -981, -981, -981, -981, 13606, 13606, + -981, -981, -981, -981, 829, 956, 910, -981, 1144, 950, + 952, 4923, -981, 5069, -981, -981, 6394, 179, 179, 430, + -981, -981, -981, -981, 831, -981, -981, -981, -981, 847, + 847, 1642, 854, -981, 1251, -981, -981, -981, -981, -981, + -981, 12744, -981, 465, 578, -981, -981, 858, 868, 869, + -981, 872, 869, -981, 876, 884, -981, 824, 2921, -981, + -981, 1063, 12830, 9090, 12916, 649, 597, 1005, 5361, 5361, + 2835, -981, 1014, 1341, 857, 1642, 847, 1642, 678, 848, + 897, -981, 2459, -981, 1251, -981, 1251, -981, 1251, -981, + -981, 2921, 2438, -981, 759, 161, 915, 711, 725, -981, + -981, -981, 430, 430, 682, -981, -981, 847, -981, 869, + 918, 869, 869, 923, -981, 615, 1051, 1052, 179, 1031, + 1039, -981, 1251, -981, -981, -981, 2921, -981, -981, 5361, + 10624, 10624, 869, 430, 179, 179, -981, -981, 5361, 5361, + 430, 430, -981, -981 }; - /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. - Performed when YYTABLE does not specify something else to do. Zero - means the default is an error. */ +/* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. + Performed when YYTABLE does not specify something else to do. Zero + means the default is an error. */ static const yytype_int16 yydefact[] = { - 2, 0, 0, 1, 0, 0, 0, 0, 294, 0, - 0, 318, 321, 0, 0, 604, 338, 339, 340, 341, - 306, 270, 270, 489, 488, 490, 491, 606, 0, 10, - 0, 493, 492, 494, 480, 590, 482, 481, 484, 483, - 476, 477, 438, 439, 495, 496, 292, 0, 0, 0, - 0, 0, 0, 296, 618, 618, 88, 313, 0, 0, - 0, 0, 0, 0, 453, 0, 0, 0, 3, 604, - 6, 9, 27, 33, 543, 543, 49, 60, 59, 0, - 76, 0, 80, 90, 0, 54, 248, 0, 61, 311, - 285, 286, 436, 287, 288, 289, 434, 433, 465, 435, - 432, 487, 0, 290, 291, 270, 5, 8, 338, 339, - 306, 618, 414, 0, 113, 114, 292, 0, 0, 0, - 0, 543, 543, 116, 497, 342, 0, 487, 291, 0, - 334, 168, 178, 169, 165, 194, 195, 196, 197, 176, - 191, 184, 174, 173, 189, 172, 171, 167, 192, 166, - 179, 183, 185, 177, 170, 186, 193, 188, 187, 180, - 190, 175, 164, 182, 181, 163, 161, 162, 158, 159, - 160, 118, 120, 119, 153, 154, 131, 132, 133, 140, - 137, 139, 134, 135, 155, 156, 141, 142, 146, 149, - 150, 136, 138, 128, 129, 130, 143, 144, 145, 147, - 148, 151, 152, 157, 574, 55, 121, 122, 573, 0, - 0, 0, 58, 543, 543, 0, 0, 54, 0, 487, - 0, 291, 0, 0, 0, 112, 0, 353, 352, 0, - 0, 487, 291, 187, 180, 190, 175, 158, 159, 160, - 118, 119, 0, 123, 125, 20, 124, 456, 461, 460, - 612, 614, 604, 615, 0, 458, 0, 616, 613, 605, - 588, 292, 278, 587, 273, 0, 265, 277, 74, 269, - 618, 436, 618, 578, 75, 73, 618, 259, 307, 72, - 0, 258, 413, 71, 604, 0, 18, 0, 0, 221, - 0, 222, 209, 212, 303, 0, 0, 0, 604, 15, - 604, 78, 14, 0, 604, 0, 609, 609, 249, 0, - 0, 609, 576, 0, 0, 86, 0, 96, 103, 543, - 470, 469, 471, 472, 0, 468, 467, 440, 445, 444, - 447, 0, 0, 442, 449, 0, 451, 0, 463, 0, - 474, 0, 478, 479, 53, 236, 237, 4, 605, 0, - 0, 0, 0, 0, 0, 0, 550, 546, 545, 544, - 547, 548, 0, 552, 564, 519, 520, 568, 567, 563, - 543, 0, 505, 0, 512, 517, 618, 522, 618, 542, - 0, 549, 551, 554, 528, 0, 561, 528, 566, 528, - 570, 526, 501, 0, 0, 401, 403, 0, 92, 0, - 84, 81, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 208, 211, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 0, 0, 0, 297, 0, 0, + 321, 324, 0, 0, 678, 344, 345, 346, 347, 309, + 273, 273, 560, 559, 561, 562, 680, 0, 10, 0, + 564, 563, 565, 550, 664, 552, 551, 554, 553, 546, + 547, 507, 508, 566, 567, 558, 0, 0, 0, 0, + 0, 0, 0, 692, 692, 91, 316, 0, 0, 0, + 0, 0, 0, 522, 0, 0, 0, 3, 678, 6, + 9, 27, 32, 615, 615, 48, 63, 62, 0, 79, + 0, 83, 93, 0, 57, 251, 0, 64, 314, 289, + 290, 505, 291, 292, 293, 503, 502, 534, 504, 501, + 557, 0, 294, 295, 273, 5, 1, 8, 344, 345, + 309, 692, 420, 0, 116, 117, 558, 0, 0, 0, + 0, 615, 615, 119, 568, 348, 0, 557, 295, 0, + 340, 171, 181, 172, 168, 197, 198, 199, 200, 179, + 194, 187, 177, 176, 192, 175, 174, 170, 195, 169, + 182, 186, 188, 180, 173, 189, 196, 191, 190, 183, + 193, 178, 167, 185, 184, 166, 164, 165, 161, 162, + 163, 121, 123, 122, 156, 157, 134, 135, 136, 143, + 140, 142, 137, 138, 158, 159, 144, 145, 149, 152, + 153, 139, 141, 131, 132, 133, 146, 147, 148, 150, + 151, 154, 155, 160, 648, 58, 124, 125, 647, 0, + 0, 0, 61, 615, 615, 0, 0, 0, 557, 0, + 295, 0, 0, 0, 115, 0, 359, 358, 0, 0, + 557, 295, 190, 183, 193, 178, 161, 162, 163, 121, + 122, 0, 126, 128, 20, 127, 525, 530, 529, 686, + 688, 678, 689, 0, 527, 0, 690, 687, 679, 662, + 558, 281, 661, 276, 0, 268, 280, 77, 272, 692, + 505, 692, 652, 78, 76, 692, 262, 310, 0, 75, + 261, 419, 74, 678, 0, 18, 0, 0, 224, 0, + 225, 212, 215, 306, 0, 0, 0, 0, 15, 678, + 81, 14, 0, 678, 0, 683, 683, 252, 0, 0, + 683, 650, 0, 0, 89, 0, 99, 106, 615, 540, + 539, 541, 542, 536, 0, 538, 537, 509, 514, 513, + 516, 0, 0, 511, 518, 0, 520, 0, 532, 0, + 544, 0, 548, 549, 52, 239, 240, 4, 679, 0, + 0, 0, 0, 0, 0, 0, 622, 618, 617, 616, + 619, 620, 0, 624, 636, 590, 591, 640, 639, 635, + 615, 0, 576, 0, 583, 588, 692, 593, 692, 614, + 0, 621, 623, 626, 600, 0, 633, 600, 638, 600, + 643, 597, 572, 0, 0, 407, 409, 0, 95, 0, + 87, 84, 0, 55, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 211, 214, 0, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 601, 618, 600, 0, 603, 602, 0, 418, 416, - 312, 437, 0, 0, 407, 65, 310, 331, 113, 114, - 115, 478, 479, 505, 498, 329, 0, 618, 0, 0, - 0, 599, 598, 56, 0, 618, 303, 0, 0, 344, - 0, 343, 0, 0, 618, 0, 0, 0, 0, 0, - 0, 303, 0, 618, 0, 326, 0, 126, 0, 0, - 457, 459, 0, 0, 617, 582, 583, 279, 586, 272, - 0, 606, 266, 0, 275, 0, 267, 0, 604, 0, - 604, 618, 618, 260, 271, 604, 0, 309, 52, 607, - 0, 0, 0, 0, 0, 0, 17, 604, 301, 13, - 605, 77, 297, 300, 304, 611, 250, 610, 611, 252, - 305, 577, 102, 94, 0, 89, 0, 0, 618, 0, - 543, 314, 398, 528, 473, 0, 0, 448, 454, 441, - 443, 450, 452, 464, 475, 0, 0, 7, 21, 22, - 23, 24, 25, 50, 51, 509, 556, 510, 508, 0, - 604, 604, 528, 0, 0, 511, 0, 524, 572, 521, - 0, 525, 506, 0, 535, 557, 0, 538, 565, 0, - 540, 569, 0, 0, 618, 278, 28, 30, 0, 31, - 604, 0, 82, 93, 48, 34, 46, 0, 253, 198, - 29, 0, 291, 226, 231, 232, 233, 228, 230, 240, - 241, 234, 235, 207, 210, 238, 239, 32, 218, 606, - 227, 229, 223, 224, 225, 213, 214, 215, 216, 217, - 591, 596, 592, 597, 412, 270, 410, 0, 618, 591, - 593, 592, 594, 411, 270, 591, 592, 270, 618, 618, - 35, 253, 199, 45, 206, 63, 66, 0, 0, 0, - 113, 114, 117, 0, 0, 618, 0, 604, 0, 295, - 618, 618, 424, 0, 0, 618, 345, 595, 302, 0, - 591, 592, 618, 347, 319, 346, 322, 595, 302, 0, - 591, 592, 0, 0, 0, 0, 277, 0, 325, 581, - 584, 580, 276, 0, 280, 274, 618, 585, 579, 257, - 255, 261, 262, 264, 308, 608, 19, 0, 26, 205, - 79, 16, 604, 609, 95, 87, 99, 101, 0, 98, - 100, 606, 0, 0, 466, 0, 455, 219, 220, 550, - 548, 361, 604, 354, 504, 502, 0, 41, 244, 336, - 0, 0, 518, 618, 571, 0, 527, 555, 528, 528, - 528, 562, 528, 550, 528, 43, 246, 337, 389, 387, - 0, 386, 385, 284, 0, 91, 85, 0, 0, 0, - 0, 0, 618, 0, 0, 0, 0, 409, 69, 415, - 262, 0, 0, 408, 67, 404, 62, 0, 0, 618, - 332, 0, 0, 415, 335, 575, 57, 425, 426, 618, - 427, 0, 618, 350, 0, 0, 348, 0, 0, 415, - 0, 0, 0, 0, 0, 415, 0, 127, 462, 324, - 0, 0, 281, 268, 604, 618, 11, 298, 251, 97, - 0, 391, 0, 0, 315, 446, 362, 359, 553, 0, - 604, 0, 0, 523, 507, 0, 531, 0, 533, 0, - 539, 0, 536, 541, 0, 0, 384, 606, 606, 514, - 515, 618, 618, 369, 0, 559, 369, 369, 367, 0, - 0, 282, 83, 47, 254, 591, 592, 0, 591, 592, - 0, 0, 40, 203, 39, 204, 70, 0, 37, 201, - 38, 202, 68, 405, 406, 0, 0, 0, 0, 499, - 330, 0, 0, 429, 351, 0, 12, 431, 0, 316, - 0, 317, 0, 0, 327, 280, 618, 256, 263, 397, - 0, 0, 0, 0, 0, 357, 503, 42, 245, 528, - 528, 528, 528, 44, 247, 0, 0, 0, 513, 0, - 365, 366, 369, 377, 558, 0, 380, 0, 382, 402, - 283, 415, 243, 242, 36, 200, 419, 417, 0, 0, - 0, 428, 0, 104, 111, 0, 430, 0, 320, 323, - 0, 421, 422, 420, 395, 606, 393, 396, 400, 399, - 363, 360, 0, 355, 532, 0, 529, 534, 537, 390, - 388, 303, 0, 516, 618, 0, 368, 375, 369, 369, - 369, 560, 369, 369, 64, 333, 110, 0, 618, 0, - 618, 618, 0, 0, 392, 0, 358, 0, 528, 595, - 302, 364, 0, 372, 0, 374, 0, 381, 0, 378, - 383, 107, 109, 0, 591, 592, 423, 349, 328, 394, - 356, 530, 369, 369, 369, 369, 105, 373, 0, 370, - 376, 379, 369, 371 + 0, 0, 675, 692, 674, 0, 677, 676, 0, 424, + 422, 315, 506, 0, 0, 413, 68, 313, 337, 116, + 117, 118, 548, 549, 576, 569, 335, 0, 692, 0, + 0, 0, 673, 672, 59, 0, 692, 306, 0, 0, + 350, 0, 349, 0, 0, 692, 0, 0, 0, 0, + 0, 0, 306, 0, 692, 0, 332, 0, 129, 0, + 0, 526, 528, 0, 0, 691, 656, 657, 282, 660, + 275, 0, 680, 269, 0, 278, 0, 270, 0, 678, + 0, 678, 692, 692, 263, 274, 678, 0, 312, 51, + 681, 0, 0, 0, 0, 0, 0, 17, 678, 304, + 13, 0, 80, 300, 303, 307, 685, 253, 684, 685, + 255, 308, 651, 105, 97, 0, 92, 0, 0, 692, + 0, 615, 317, 404, 600, 543, 0, 0, 517, 523, + 510, 512, 519, 521, 533, 545, 0, 0, 7, 21, + 22, 23, 24, 25, 49, 50, 580, 628, 581, 579, + 0, 678, 678, 600, 0, 0, 582, 645, 595, 646, + 592, 645, 596, 577, 599, 607, 629, 599, 610, 637, + 599, 612, 642, 641, 0, 0, 692, 281, 28, 30, + 0, 31, 678, 0, 85, 96, 47, 33, 45, 0, + 256, 201, 29, 0, 295, 0, 229, 234, 235, 236, + 231, 233, 243, 244, 237, 238, 210, 213, 241, 242, + 0, 221, 680, 230, 232, 226, 227, 228, 216, 217, + 218, 219, 220, 665, 670, 666, 671, 418, 273, 416, + 0, 692, 665, 667, 666, 668, 417, 273, 665, 666, + 273, 692, 692, 34, 256, 202, 44, 209, 66, 69, + 0, 0, 0, 116, 117, 120, 0, 0, 692, 0, + 678, 0, 298, 692, 692, 493, 0, 0, 692, 351, + 669, 305, 0, 665, 666, 692, 353, 322, 352, 325, + 669, 305, 0, 665, 666, 0, 0, 0, 0, 0, + 280, 0, 328, 655, 658, 654, 279, 284, 283, 277, + 692, 659, 653, 260, 258, 264, 265, 267, 311, 682, + 19, 0, 26, 208, 82, 16, 678, 683, 98, 90, + 102, 104, 0, 101, 103, 680, 0, 599, 535, 0, + 524, 222, 223, 622, 620, 367, 678, 360, 575, 573, + 599, 40, 247, 342, 0, 0, 589, 692, 644, 0, + 598, 627, 600, 600, 600, 634, 600, 622, 600, 42, + 249, 343, 395, 393, 0, 392, 391, 288, 0, 94, + 88, 0, 0, 0, 0, 0, 692, 0, 457, 458, + 459, 491, 476, 0, 0, 0, 475, 490, 0, 56, + 0, 436, 449, 451, 461, 440, 460, 462, 442, 484, + 444, 453, 456, 455, 454, 54, 0, 0, 0, 415, + 72, 421, 265, 0, 0, 414, 70, 410, 65, 0, + 0, 692, 338, 0, 0, 421, 341, 649, 60, 494, + 495, 692, 496, 0, 692, 356, 0, 0, 354, 0, + 0, 421, 0, 0, 0, 0, 0, 421, 0, 130, + 531, 0, 327, 0, 0, 285, 271, 678, 692, 11, + 301, 254, 100, 0, 397, 0, 0, 318, 515, 368, + 365, 625, 0, 678, 0, 0, 594, 578, 599, 603, + 599, 605, 599, 611, 599, 608, 613, 0, 0, 390, + 680, 680, 585, 586, 692, 692, 375, 0, 631, 375, + 375, 373, 0, 284, 286, 86, 46, 257, 665, 666, + 0, 665, 666, 487, 478, 491, 465, 472, 0, 466, + 469, 0, 480, 0, 481, 483, 474, 489, 488, 463, + 437, 438, 445, 0, 0, 0, 0, 0, 0, 0, + 39, 206, 38, 207, 73, 0, 36, 204, 37, 205, + 71, 411, 412, 0, 0, 0, 0, 570, 336, 0, + 0, 498, 357, 0, 12, 500, 0, 319, 0, 320, + 0, 0, 333, 0, 0, 0, 283, 692, 259, 266, + 403, 0, 0, 0, 0, 0, 363, 574, 41, 248, + 600, 600, 600, 600, 43, 250, 0, 0, 0, 584, + 645, 371, 372, 375, 383, 630, 0, 386, 0, 388, + 408, 287, 421, 486, 464, 0, 0, 479, 0, 446, + 0, 450, 452, 441, 447, 477, 485, 443, 246, 245, + 35, 203, 425, 423, 0, 0, 0, 497, 0, 107, + 114, 0, 499, 0, 323, 326, 0, 0, 0, 692, + 427, 428, 426, 401, 680, 399, 402, 406, 405, 369, + 366, 0, 361, 604, 599, 601, 606, 609, 396, 394, + 306, 0, 587, 692, 0, 374, 381, 375, 375, 375, + 632, 375, 375, 473, 467, 470, 482, 439, 0, 67, + 339, 113, 0, 692, 0, 692, 692, 0, 0, 0, + 0, 429, 0, 0, 398, 0, 364, 0, 600, 669, + 305, 370, 0, 378, 0, 380, 0, 387, 0, 384, + 389, 0, 0, 448, 110, 112, 0, 665, 666, 492, + 355, 334, 692, 692, 430, 329, 400, 362, 602, 375, + 375, 375, 375, 468, 471, 108, 0, 0, 0, 0, + 0, 379, 0, 376, 382, 385, 0, 330, 331, 0, + 0, 0, 375, 692, 0, 0, 377, 431, 0, 0, + 692, 692, 433, 435 }; - /* YYPGOTO[NTERM-NUM]. */ +/* YYPGOTO[NTERM-NUM]. */ static const yytype_int16 yypgoto[] = { - -868, -868, -868, 411, -868, 25, -868, -282, 693, -868, - 42, -868, -254, -213, 768, 1343, 1513, -868, 84, -59, - -868, -429, -868, -14, 916, -190, 4, -33, -271, -487, - -11, 1993, -84, 936, 29, -19, -868, -868, 19, -868, - 867, -868, -392, 46, -461, -327, 118, -7, -868, -446, - -233, -184, 15, -360, 57, -868, -868, -868, -868, -868, - -868, -868, -868, -868, -868, -868, -868, -868, -868, -868, - -868, 8, -211, -460, -94, -610, -868, -868, -868, 163, - 501, -868, -572, -868, -868, -276, -868, -90, -868, -868, - 145, -868, -868, -868, -81, -868, -868, -451, -868, -78, - -868, -868, -868, -868, -868, 147, 58, -167, -868, -868, - -868, -868, -868, -248, -868, 710, -868, -868, -868, 2, - -868, -868, -868, 2347, 2558, 960, 1777, -868, -868, -27, - 502, 20, -9, 396, 16, -868, -868, -868, 181, 485, - 249, -244, -839, -672, -556, -868, 180, -723, -541, -867, - 14, -513, -868, -388, -868, 675, -351, -868, -868, -868, - 62, -436, 624, -330, -868, -868, -47, -868, 7, -22, - 806, -253, 394, -284, -65, -2 + -981, -981, -981, 563, -981, 14, -981, -267, 357, -981, + -981, 65, -318, -327, -5, -981, -981, 878, -981, 1448, + 18, -45, -981, -981, -387, 29, 1072, -190, 4, -38, + -298, -461, -19, 1858, -97, 1100, 20, -20, -981, -981, + 12, -981, 2730, -981, 651, 73, -247, -384, 116, -981, + -9, -454, -269, 17, 118, -365, 26, -981, -981, -981, + -981, -981, -981, -981, -981, -981, -981, -981, -981, -981, + -981, -981, -981, 141, -151, -468, -17, -590, -981, -981, + -981, 313, 275, -981, -637, -981, -981, -172, -981, -26, + -981, -981, -981, 262, -981, -981, -981, -80, -981, -479, + -981, -569, -981, -981, -981, -599, -981, 61, 250, -981, + 148, -981, -981, -888, -791, -981, -981, -981, 299, -889, + -748, -981, -10, -981, -981, -981, -981, 766, 276, 62, + -159, -981, -981, -981, -981, -981, -285, -981, 870, -981, + -981, 1046, 1, -981, -981, 1154, 1875, 2479, 1129, 1627, + -981, -981, -23, 644, 16, 146, 550, 110, -981, -981, + -981, -69, 67, -219, -242, -946, -723, -554, -981, 1045, + -769, -575, -980, 108, -514, -981, -534, -981, 230, -368, + -981, -981, -981, 43, -475, 699, -356, -981, -981, -81, + -981, 75, -25, 647, -249, 1060, -268, -21, -1 }; - /* YYDEFGOTO[NTERM-NUM]. */ +/* YYDEFGOTO[NTERM-NUM]. */ static const yytype_int16 yydefgoto[] = { - -1, 1, 2, 68, 69, 70, 287, 464, 465, 298, - 299, 520, 72, 615, 73, 213, 214, 688, 215, 76, - 77, 676, 819, 78, 79, 300, 80, 81, 82, 545, - 83, 216, 123, 124, 243, 244, 245, 713, 653, 207, - 85, 305, 619, 654, 278, 509, 510, 279, 280, 269, - 502, 538, 658, 609, 86, 210, 303, 742, 304, 319, - 752, 223, 843, 224, 844, 712, 1000, 679, 677, 928, - 459, 290, 470, 704, 835, 836, 230, 762, 953, 1026, - 973, 887, 790, 791, 888, 860, 1005, 1006, 551, 864, - 396, 604, 88, 89, 446, 669, 668, 493, 1003, 691, - 829, 932, 936, 90, 91, 92, 332, 333, 556, 93, - 94, 95, 557, 253, 254, 255, 488, 96, 97, 98, - 326, 99, 100, 219, 220, 103, 221, 455, 678, 371, - 372, 373, 374, 375, 890, 891, 376, 377, 378, 776, - 594, 380, 381, 382, 383, 579, 384, 385, 386, 895, - 896, 387, 388, 389, 390, 391, 587, 209, 460, 310, - 512, 273, 129, 683, 656, 463, 458, 437, 516, 861, - 517, 536, 257, 258, 259, 302 + 0, 1, 2, 67, 68, 69, 286, 465, 466, 297, + 521, 298, 71, 617, 72, 640, 625, 213, 691, 214, + 215, 75, 76, 851, 679, 77, 78, 299, 79, 80, + 81, 546, 82, 216, 123, 124, 242, 243, 244, 716, + 656, 207, 84, 304, 621, 657, 277, 510, 511, 278, + 279, 268, 503, 539, 661, 611, 85, 210, 302, 746, + 303, 318, 756, 222, 875, 223, 876, 715, 1076, 682, + 680, 986, 460, 289, 471, 707, 867, 1131, 229, 766, + 1014, 1105, 1034, 920, 794, 921, 795, 893, 1084, 1085, + 552, 897, 606, 396, 87, 88, 672, 447, 671, 494, + 1082, 1132, 1178, 1179, 1180, 819, 820, 1053, 821, 822, + 823, 824, 948, 949, 825, 826, 827, 953, 828, 829, + 830, 831, 694, 861, 990, 994, 89, 90, 91, 332, + 333, 557, 92, 93, 94, 558, 252, 253, 254, 489, + 95, 96, 97, 326, 98, 99, 218, 219, 102, 220, + 456, 681, 371, 372, 373, 374, 375, 923, 924, 376, + 377, 378, 780, 595, 380, 381, 382, 383, 580, 384, + 385, 386, 928, 929, 387, 388, 389, 390, 391, 588, + 209, 461, 309, 513, 272, 129, 686, 659, 464, 459, + 438, 517, 894, 518, 537, 256, 257, 258, 301 }; - /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If - positive, shift that token. If negative, reduce the rule whose - number is the opposite. If YYTABLE_NINF, syntax error. */ +/* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule whose + number is the opposite. If YYTABLE_NINF, syntax error. */ static const yytype_int16 yytable[] = { - 106, 519, 266, 266, 348, 285, 266, 344, 440, 286, - 87, 478, 87, 126, 126, 283, 246, 218, 218, 281, - 706, 229, 256, 218, 218, 218, 206, 591, 218, 107, - 246, 270, 270, 206, 450, 270, 205, 777, 715, 506, - 434, 436, 620, 544, 71, 315, 71, 206, 393, 401, - 672, 674, 308, 312, 539, 781, 301, 725, 541, 745, - 87, 725, 125, 125, 316, 862, 894, 277, 282, 306, - 125, 728, 392, 392, 218, 552, 347, 206, 272, 272, - 778, 832, 272, 558, 672, 674, 503, 325, 507, 867, - 316, 584, 842, 469, 222, 394, 817, 818, 445, 252, - 655, 438, 728, -104, 664, 657, 527, 667, 1031, 281, - -111, 1007, 603, 125, -415, 3, 307, 311, 335, 337, - 339, 341, 535, -489, 796, 218, 581, 87, 685, 336, - 684, 328, 329, 739, 268, 274, -588, -486, 275, 125, - -485, -107, 394, 655, 788, 664, -109, 699, 614, 863, - 575, 444, 342, 343, 685, 438, 709, 277, 282, -110, - -106, -108, 284, 271, 271, 560, -112, 271, 560, -104, - 560, 475, 560, -105, 560, 444, 288, 686, -415, -488, - -489, 496, 484, 330, 331, 1031, 467, 468, 614, 614, - -96, 789, 247, -415, 685, 248, 249, 777, -490, -491, - -480, 271, 271, 813, 1007, 779, 815, -493, 782, 610, - 777, 392, 392, 439, 576, -480, -486, -106, 87, 685, - -96, 748, 469, 250, 813, 251, -415, -103, -415, 670, - 673, 218, 218, 530, 394, -415, -488, 831, 441, 492, - 778, 537, 537, 524, -492, -112, 537, 927, -588, 894, - -480, -485, 894, 778, -588, -490, -491, -480, -99, 489, - -591, 206, 266, -101, -493, -592, 266, 439, 504, 549, - 504, 325, 294, 276, 513, 544, -102, -98, -100, 506, - -104, 543, -494, 1013, 218, 477, 725, 725, 218, -480, - -97, 511, 218, 218, 397, 270, 87, 728, -484, 395, - 301, -492, 525, 87, 87, 529, 751, 479, 480, 756, - 902, 87, 443, 777, -111, 840, -106, 770, 435, -106, - -106, 247, 316, 777, 248, 249, 398, 732, 733, 894, - 828, -110, -77, 431, 781, 771, 1001, 526, 606, -494, - 544, -111, 272, 616, -106, 532, -480, -106, 501, -106, - 354, 355, 250, -91, 251, -484, 87, 218, 218, 218, - 218, 87, 218, 218, 725, 779, 367, 365, 366, 367, - 442, 125, 682, 567, 588, 433, 588, 841, 779, 916, - 461, 87, 555, 616, 616, 922, 944, 276, 560, 612, - 71, 368, 443, 402, 368, 572, 247, 822, 514, 248, - 249, 897, 87, 301, 454, 218, -108, 87, 316, 466, - 621, 448, 608, 913, 915, 449, 777, 608, 266, 919, - 921, -111, 873, 271, 794, 940, 472, 271, 247, 251, - 513, 248, 249, 568, 569, 570, 571, 462, 519, 476, - 218, 42, -103, 266, 43, 913, 915, 525, 919, 921, - 621, 621, 481, 662, -105, 513, 662, 125, 485, 250, - 266, 251, 487, 692, 492, 218, 501, 87, 218, 266, - 910, 655, 513, 664, 810, 907, -110, 662, 87, 722, - 663, 513, 218, -108, 418, 505, 87, 960, 59, 777, - 858, 218, 418, 853, 662, 1002, 87, -102, 811, 553, - 777, 736, 725, 662, 663, 521, 851, 717, 899, 504, - 504, 744, 728, 543, 427, 428, 429, 731, 812, 106, - 519, 663, 246, 528, 917, 985, 544, -106, 247, 87, - 663, 248, 249, 850, 266, 925, 911, -76, 87, 206, - 970, 971, 507, 662, 811, 614, 513, -108, 985, 534, - 582, 614, 316, -105, 316, 542, 218, 614, 614, 379, - 379, 251, 540, 71, 87, 726, 354, 355, 662, 767, - 663, 546, 741, -106, 803, 451, 452, 519, 543, 271, - 951, 565, -108, 566, 1029, -108, -108, 1032, 785, 834, - 831, 218, 912, 577, -98, 663, 583, 743, 918, 920, - -500, 125, 792, 125, 271, 586, 379, 379, 812, 471, - 218, 965, 966, -108, 589, -108, 471, 804, 590, 316, - -302, 271, 853, 453, 453, 593, 105, 598, 105, 596, - 271, 599, 601, 105, 105, -302, 597, -108, 600, 105, - 105, 105, 602, 637, 105, 613, 989, 703, 808, 675, - 271, 689, 917, 494, 271, 690, 504, 814, -100, 693, - 816, 694, 281, 614, 1073, 281, 792, 792, 125, -342, - -302, 948, -589, 1051, -592, 696, 105, -302, 537, 554, - 718, -105, 271, 281, -342, 271, 218, 87, 830, 833, - 105, 730, 735, 833, 847, 271, 738, -91, 379, 379, - 833, 807, -97, 753, 984, 608, 766, 206, 797, 685, - 277, 1063, 799, 277, 769, 246, 787, 826, 798, -342, - 218, 995, 824, -595, 504, 809, -342, 997, 823, 807, - 553, 277, 206, 831, 923, 247, -484, 839, 248, 249, - 616, 105, 494, 105, 845, 297, 616, 914, 848, 849, - 856, -484, 616, 616, 859, 871, 578, 865, 430, 763, - 519, 869, 821, 575, 543, 875, 250, 773, 251, 877, - 879, 588, 881, 431, 592, 884, 780, 212, 212, 784, - 889, 930, 931, 212, -589, 935, -484, -595, 266, 939, - -589, 941, 949, -484, 855, 954, 846, 969, 87, 972, - 513, 979, -595, 975, 379, 316, 87, 621, 432, 977, - 218, 981, 297, 621, 218, 433, 987, 792, 986, 621, - 621, 1009, 662, 998, 999, 87, 87, 933, 1008, 247, - 937, 1019, 248, 249, 105, -595, 1015, -595, 1020, 87, - 903, -591, 218, 1021, -595, 1035, 456, 105, 105, 663, - 1036, 87, 87, 504, 125, 379, 501, 1034, 616, 87, - 250, 431, 251, 759, 1043, 357, 358, 359, 360, 1045, - 87, 87, 855, 705, 705, 1047, 217, 217, 1052, 271, - 271, 760, 217, 267, 267, 447, -591, 267, 1054, 588, - 588, 473, 1056, 1058, 1068, 247, 457, 968, 248, 249, - 105, -591, 974, 433, 105, -592, 431, -299, 105, 105, - -299, -299, 105, 1076, 289, 291, 292, 293, 938, 105, - 105, 267, 309, 1078, 494, 621, 250, 105, 251, 227, - -592, 494, 737, 345, 346, 87, 87, -299, -299, 992, - -299, 474, 130, 87, 833, 1039, 125, 1067, 433, 271, - -591, 125, 886, 1069, 866, 334, -292, 271, 328, 329, - 889, 924, 1066, 889, 491, -591, 889, 208, 889, 893, - 892, -292, 105, 105, 105, 105, 105, 105, 105, 105, - 482, 522, 772, 1044, 217, 1023, 1028, 0, 125, 297, - 0, 212, 212, 0, -592, 431, 0, 105, -591, 0, - -591, 0, 0, 87, -591, 87, -292, -591, 87, -592, - 330, 331, 0, -292, 0, 0, 889, 0, 105, 0, - 0, 105, 588, 105, 266, 0, 105, 876, 878, 880, - 483, 882, 0, 883, 0, 379, 513, 433, 692, 833, - 0, 0, -592, 889, -592, 889, 218, 889, -592, 889, - 0, -592, 515, 518, 0, 959, 105, 961, 662, 490, - 0, 962, 248, 249, 523, 0, 105, 105, 0, 889, - 759, 820, 357, 358, 359, 360, 0, 0, 547, 431, - 0, 105, 0, 105, 105, 663, 0, 0, 760, -487, - 217, 217, 297, 431, 105, 271, 247, 0, 105, 248, - 249, 0, 105, 0, -487, 247, 531, 105, 248, 249, - 533, 471, 105, 562, 474, 328, 329, 212, 212, 212, - 212, 433, 573, 574, 0, -291, -303, 801, 548, 497, - 498, 499, 345, 1010, 1011, 433, 950, 0, 251, -487, - -291, -303, 431, 267, 1022, 105, -487, 267, 0, 0, - 1024, 217, 217, 892, 105, 1030, 892, 1033, 892, 431, - 0, 1012, 0, 695, 0, 0, 0, 330, 331, 874, - 0, 702, 105, 1037, 0, -291, -303, 802, 0, 0, - 105, 714, -291, -303, 433, 271, 0, 561, 431, 0, - 328, 329, 1046, 0, 457, 1048, 0, 327, 328, 329, - 0, 433, 338, 328, 329, 0, 892, 105, 1014, 1016, - 1017, 1018, 0, 0, 0, 929, 217, 217, 217, 217, - 0, 217, 217, 1038, 0, 0, 105, 1070, 687, 0, - 433, 0, 1072, 892, 1074, 892, 0, 892, 1075, 892, - 705, 585, 330, 331, 349, 350, 351, 352, 353, 755, - 330, 331, 595, 0, 0, 330, 331, 0, 1082, 892, - 0, 774, 0, 0, 607, 774, 340, 328, 329, 618, - 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, - 633, 634, 635, 636, 0, 638, 639, 640, 641, 642, - 643, 644, 645, 646, 647, 648, 649, 1071, 0, 267, - 0, 0, 105, 105, 554, 328, 329, 0, 0, 671, - 671, 559, 328, 329, 729, 563, 328, 329, 0, 330, - 331, 734, 0, 0, 267, 0, 471, 217, 564, 328, - 329, 0, 471, 740, 0, 0, 105, 0, 0, 671, - 0, 267, 0, 671, 671, 74, 0, 74, 121, 121, - 267, 0, 0, 754, 328, 329, 121, 330, 331, 716, - 0, 0, 719, 720, 330, 331, 0, 721, 330, 331, - 724, 0, 727, 0, 309, 293, 415, 416, 0, 0, - 418, 330, 331, 0, 0, 0, 764, 765, 0, 418, - 0, 0, 671, 0, 0, 74, 0, 976, 978, 121, - 0, 0, 724, 0, 0, 309, 330, 331, 425, 426, - 427, 428, 429, 0, 105, 267, 795, 425, 426, 427, - 428, 429, 105, 105, 0, 121, 105, 0, 0, 105, - 105, 0, 757, 758, 0, 105, 105, 0, 0, 0, - 0, 105, 105, 0, 0, 0, 212, 0, 0, 0, - 768, 0, 0, 0, 0, 105, 0, 0, 105, 0, - 0, 0, 74, 0, 898, 415, 416, 105, 105, 786, - 0, 0, 793, 1027, 0, 105, 0, 0, 418, 759, - 212, 357, 358, 359, 360, 0, 105, 105, 0, 0, - 0, 0, 0, 825, 0, 0, 759, 760, 357, 358, - 359, 360, 0, 422, 423, 424, 425, 426, 427, 428, - 429, 926, 0, 0, 760, 75, 0, 75, 122, 122, - 0, 0, 363, 0, 934, 0, 122, 0, 761, 1053, - 1055, 1057, 0, 1059, 1060, 0, 942, 943, 0, 363, - 0, 105, 0, 0, 946, 217, 0, 0, 857, 0, - 0, 105, 105, 74, 0, 0, 952, 827, 0, 105, - 768, 786, 0, 0, 0, 75, 0, 0, 868, 122, - 0, 415, 416, 1077, 1079, 1080, 1081, 0, 0, 217, - 0, 0, 0, 1083, 418, 0, 0, 0, 0, 0, - 852, 0, 0, 0, 0, 122, 0, 0, 0, 724, - 309, 0, 212, 0, 0, 759, 0, 357, 358, 359, - 360, 424, 425, 426, 427, 428, 429, 0, 0, 105, - 988, 105, 0, 760, 105, 0, 0, 0, 996, 0, - 0, 74, 75, 0, 0, 0, 0, 0, 74, 74, - 0, 0, 0, 0, 774, 0, 74, 898, 363, 0, - 898, 0, 898, 0, 955, 0, 0, 121, 0, 0, - 947, 901, 105, 0, 0, 0, 671, 904, 0, 267, - 0, 0, 671, 671, 0, 0, 956, 724, 671, 671, - 1004, 0, 357, 358, 359, 360, 0, 0, 1040, 0, - 1041, 74, 0, 1042, 0, 0, 74, 0, 760, 0, - 898, 217, 0, 0, 671, 671, 0, 671, 671, 0, - 0, 0, 0, 0, 0, 0, 74, 945, 0, 0, - 0, 293, 0, 75, 0, 0, 0, 898, 0, 898, - 0, 898, 0, 898, 0, 0, 0, 74, 957, 958, - 0, 0, 74, 121, 0, 74, 0, 0, 0, 0, - 0, 963, 964, 898, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 980, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 982, 983, 104, - 0, 104, 128, 128, 671, 74, 74, 0, 0, 0, - 232, 0, 356, 0, 357, 358, 359, 360, 0, 0, - 0, 75, 74, 0, 0, 0, 0, 671, 75, 75, - 361, 0, 0, 74, 0, 309, 75, 0, 0, 0, - 0, 74, 0, 0, 362, 0, 0, 122, 0, 104, - 0, 74, 0, 318, 0, 363, 0, 0, 0, 0, - 0, 364, 365, 366, 367, 0, 0, 0, 0, 0, - 0, 0, 356, 0, 357, 358, 359, 360, 0, 318, - 0, 75, -618, 0, 74, 0, 75, 0, 0, 368, - 361, 0, 369, 74, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 370, 75, 121, 356, 121, - 357, 358, 359, 360, 0, 363, 104, 0, 0, 74, - 0, 364, 365, 366, 367, 267, 361, 75, -618, 0, - 0, 0, 75, 122, 0, 75, 0, 0, 0, 0, - 362, -618, -618, -618, -618, -618, -618, 0, -618, 368, - 0, 363, 369, 0, -618, -618, 0, 364, 365, 366, - 367, 0, 0, 1025, 0, -618, -618, 0, -618, -618, - -618, -618, -618, 0, 121, 75, 75, 0, 0, 0, - 0, 0, 0, 0, 0, 368, 0, 0, 369, 0, - 0, 0, 75, 0, 0, 0, 0, 0, 0, 0, - 0, 370, 0, 75, 0, 0, 0, 104, 0, 0, - 0, 75, 0, 0, 0, 84, 0, 84, -618, 0, - 0, 75, 0, 0, 0, 0, 228, 0, 0, 0, - 0, 0, 0, -618, 0, 0, 0, 0, 0, 0, - 0, 0, 74, -618, 0, 0, -618, -618, 0, 0, - 0, 0, 0, 356, 75, 357, 358, 359, 360, 0, - 0, 0, 0, 75, 0, 84, -618, -618, 0, 0, - 0, 361, 276, -618, -618, -618, -618, 122, 0, 122, - 0, 0, 0, 0, 0, 104, 0, 0, 0, 75, - 0, 0, 104, 104, 0, 0, 363, 0, 0, 0, - 104, 0, 364, 365, 366, 367, 0, 0, 0, 0, - 0, 318, 0, 783, 0, 357, 358, 359, 360, 0, + 105, 284, 507, 212, 212, 435, 437, 285, 441, 212, + 592, 719, 282, 709, 245, 545, 520, 107, 206, 280, + 344, 451, 785, 950, 927, 206, 205, 221, 245, 961, + 125, 125, 251, 732, 849, 850, 314, 622, 125, 206, + 781, 835, 900, 401, 265, 265, 559, 348, 265, 660, + 729, 393, 307, 311, 729, 300, 1110, 540, 271, 271, + 783, 542, 271, 786, 732, 1086, 70, 955, 70, 206, + 479, 439, 392, 392, 687, 658, 553, 325, 1056, 667, + 782, 125, 670, -107, 616, 602, 749, 106, -110, 255, + 394, 702, 439, 276, 281, 446, 306, 310, 283, 863, + 712, 493, 603, 688, 864, 528, 585, 125, 868, -112, + 280, 576, 448, 395, -114, 874, 673, 676, 658, 881, + 667, 335, 337, 339, 341, 616, 616, 605, 582, 688, + -113, 267, 273, 269, 269, 274, 476, 269, 394, 398, + 379, 379, 86, 347, 86, 126, 126, 485, -109, 217, + 217, 895, 800, 228, -111, 217, 217, 217, 1115, 1056, + 217, 287, 1110, 497, 951, 752, 293, -560, 598, 688, + 601, 305, -559, 561, 792, 577, 561, 445, 561, -108, + 561, -561, 561, 440, 276, 281, 951, 1086, 379, 379, + 468, 469, 86, 689, 688, 470, 315, 397, -556, 402, + -99, 443, 445, 781, 440, -102, 217, -665, 525, 612, + 275, 470, 392, 392, 985, 817, 781, 212, 212, 1057, + -562, 793, 315, 783, -560, 957, -104, -115, -666, -559, + 394, -106, 275, 436, 550, 896, 783, 817, -561, 455, + 480, 481, 958, 782, 507, -564, -563, -105, 432, 545, + 477, -305, 952, 206, 1114, -565, 782, 467, 217, 930, + 86, 325, 732, 1173, 927, -101, -305, 927, 505, 41, + 505, -103, 42, 760, 514, 544, 531, -562, 516, 519, + 379, 379, 729, 729, 538, 538, 504, 963, 508, 538, + 434, 270, 270, 1092, 246, 270, -100, 247, 248, 300, + 1116, -305, -564, -563, 265, -666, -555, 265, -305, 755, + 246, -80, -565, 247, 248, 545, 58, 342, 343, -550, + 964, 271, 536, -554, -556, 249, 490, 250, 860, 270, + 270, 473, -94, 785, 486, -107, -555, 502, 462, 125, + 935, 249, 482, 250, 212, 212, 212, 212, 781, 574, + 575, 86, 608, -115, 706, 478, -99, 618, 781, 527, + 685, 1174, 568, 217, 217, -107, 442, 569, 570, 571, + 572, 444, 530, 927, 1021, 589, -550, 589, 526, 488, + -554, 614, 444, 526, -114, 379, 556, 763, 729, 357, + 358, 359, 360, 512, 515, 463, 269, 561, 618, 618, + -114, 493, 843, 300, 493, 764, 718, -113, 296, 906, + 1002, -114, -113, 70, 367, 854, 217, 1080, 573, 217, + 845, -106, 940, 847, 217, 217, 125, 336, 86, 328, + 329, -109, 514, -105, 844, 86, 86, 379, 419, 368, + 449, 845, 246, 86, 450, 247, 248, 665, 863, 658, + 665, 667, -101, 974, 315, 1130, 690, 514, 520, 980, + 502, 886, 265, -111, 506, 695, 426, 427, 428, 429, + 430, 665, 522, 249, 514, 250, 296, 726, 899, 781, + 419, 330, 331, 514, -103, 616, 529, 265, 665, 86, + 217, 217, 217, 217, 86, 217, 217, 665, 891, 1108, + 535, 740, 1111, 545, 265, 452, 453, 748, 666, 544, + 970, 505, 505, 265, 86, 610, 976, 978, 777, 616, + 610, 105, 245, -79, -662, 616, 616, 932, 1081, 736, + 737, 1164, 666, 206, 732, 86, 868, 665, 217, 541, + 86, 315, 807, 623, 354, 355, 543, 730, 514, 666, + 547, 270, 470, 566, 270, 729, 1031, 1032, 666, 567, + 975, 872, 665, 909, 911, 913, 873, 915, 520, 916, + 884, 125, 578, 125, 217, 544, 1003, 1004, 265, 747, + 998, 781, 983, 584, 623, 623, 735, 70, -550, -108, + -109, -571, 781, 1176, 1177, -111, 745, 587, 666, 217, + 604, 86, 217, -550, 590, 796, 843, 591, 1170, -108, + -100, 721, 86, 771, 866, 863, 217, 836, 379, 886, + 86, 844, 594, 666, 1197, 217, 520, 798, 1012, 774, + 86, 1202, 1203, 789, 597, -109, -662, 599, -550, 125, + 678, -421, -662, 419, 523, -550, 600, 775, 1060, 840, + -111, 246, 296, 615, 247, 248, 975, 616, 846, 533, + 505, 848, 693, 86, 491, 692, 280, 247, 248, 280, + 796, 796, 86, 428, 429, 430, 212, 696, 842, 365, + 366, 367, 249, 697, 250, -108, 315, 280, 315, 699, + 217, 246, 862, 865, 247, 248, 879, 865, 86, 853, + 104, 555, 104, 206, 865, -421, 368, 104, 104, 270, + 212, 858, 722, 104, 104, 104, 734, 245, 104, 1065, + -421, -348, -432, -434, 925, 217, 538, -109, 206, 505, + 1005, 839, 739, 878, 270, 1141, -348, 742, 883, 1156, + 276, -111, -94, 276, 757, 217, 770, 508, 1026, 1027, + 104, 270, 773, -421, 315, -421, 296, 431, 618, 839, + 270, 276, -421, 544, 104, 454, 454, 981, 688, 791, + 801, -348, 432, 802, -663, 803, 589, 841, -348, 888, + 270, 855, 856, 863, 270, -669, 871, 877, 882, 892, + -665, 457, 618, 972, 880, 889, 898, 904, 618, 618, + 902, 1093, 1095, 1096, 1097, 514, 432, 433, 576, 943, + 908, 610, 270, 910, 434, 270, 104, 778, 104, 665, + 912, 778, 217, 86, 917, 270, -109, 125, 698, -109, + -109, 914, 944, 956, 959, 265, 705, -666, -554, 1071, + -111, 458, 962, -111, -111, 1073, 717, 965, 434, -669, + 796, 966, 967, -554, -665, 988, 217, -109, 993, -109, + 991, 212, 989, 995, -669, 997, 520, 936, 999, -665, + 1015, -111, 1010, -111, 349, 350, 351, 352, 353, 73, + 666, 73, 121, 121, 996, 888, -663, 505, -554, 1030, + 121, 1033, -663, 1051, 1040, -554, 1042, -669, 1036, -669, + 1038, -666, -665, -665, -665, 1009, -669, 1055, -665, 104, + 1045, -665, 1044, 474, 759, 1047, -666, 1046, 1062, 1168, + 1048, 104, 104, 589, 589, 1063, 1128, 1129, 432, 73, + 618, 1049, 1050, 121, 1074, 86, 1075, 1087, 1088, 922, + 1094, 1098, 315, 86, 623, -558, 532, 217, 125, -666, + 534, -666, 1099, 125, 563, -666, 328, 329, -666, 121, + -558, 1103, 1100, 475, 925, 1121, 1118, 925, 1119, 925, + 434, 1120, 246, 1133, 104, 247, 248, 104, 623, 217, + 354, 355, 104, 104, 623, 623, 104, 1124, 483, 1135, + 86, 86, -665, 104, 104, -558, 1137, 73, 212, 212, + 1142, 104, -558, 432, 86, 250, 865, 217, 330, 331, + 1144, 1146, 270, 270, 1148, 125, 86, 86, 1151, 524, + 1161, 1077, 1078, 931, 548, 86, 1152, 1189, 763, 1165, + 357, 358, 359, 360, 432, 925, 86, 86, 484, 432, + -557, -666, -295, 1198, 1199, 434, 764, 104, 104, 104, + 104, 104, 104, 104, 104, -557, 1175, -295, 246, 1134, + 1182, 247, 248, 1029, 947, 1186, 1187, 1188, 1035, 475, + 960, 1190, 104, 925, 549, 925, 434, 925, 865, 925, + 1191, 434, 270, 334, 226, 741, 328, 329, 73, 249, + -557, 250, -295, 104, 675, 677, 104, -557, 104, -295, + 246, 104, 589, 247, 248, 130, 919, 1166, 442, 1160, + 982, 1117, 1052, 925, 954, 1159, 623, 246, 270, 1091, + 247, 248, 514, 492, 695, 865, 86, 86, 675, 677, + 1068, 249, 104, 250, 86, 208, 665, 776, 330, 331, + 1102, 1107, 104, 104, 217, 217, 86, 0, 1011, 0, + 250, 0, 265, -306, 0, 0, 733, 104, 0, 104, + 104, 865, 865, 738, 0, 73, 805, 0, -306, 0, + 104, 0, 73, 73, 104, 744, 922, 743, 104, 922, + 73, 432, 922, 104, 922, 212, 212, 1101, 104, 0, + 0, 121, 865, 1043, 0, 0, 968, 666, 0, 865, + 865, 0, 432, -306, 1037, 1039, 0, 984, 1194, 1195, + -306, 0, 86, 0, 86, 1054, 806, 86, 0, 0, + 992, 104, 0, 434, 0, 0, 73, 0, 768, 769, + 104, 73, 1000, 1001, 1122, 0, 0, 458, 0, 0, + 0, 1007, 562, 0, 434, 328, 329, 0, 104, 432, + 922, 73, 0, 1013, 246, 0, 104, 247, 248, 799, + 778, 0, 969, 931, 217, 0, 931, 0, 931, 86, + 86, 0, 73, 0, 0, 472, 0, 73, 121, 0, + 73, 472, 0, 104, 1123, 270, 0, 250, 922, 0, + 922, 434, 922, 0, 922, 1113, 947, 330, 331, 0, + 1054, 0, 787, 104, 357, 358, 359, 360, 1106, 0, + 0, 327, 328, 329, 0, 0, 0, 0, 495, 0, + 361, 73, 73, 0, 0, 0, 0, 0, 922, 0, + 86, 217, 217, 0, 931, 0, 0, 857, 73, 86, + 86, 0, 1064, 0, 246, 363, 0, 247, 248, 73, + 1072, 0, 365, 366, 367, 0, 0, 73, 0, 0, + 0, 0, 1079, 554, 330, 331, 0, 73, 1153, 0, + 0, 502, 931, 0, 931, 249, 931, 250, 931, 368, + 104, 104, 1143, 1145, 1147, 0, 1149, 1150, 338, 328, + 329, 832, 1083, 890, 357, 358, 359, 360, 0, 270, + 73, 947, 1113, 340, 328, 329, 832, 0, 495, 73, + 764, 0, 931, 901, 104, 583, 555, 328, 329, 0, + 0, 0, 579, 121, 0, 121, 0, 0, 1125, 0, + 1126, 0, 0, 1127, 0, 73, 1113, 0, 0, 0, + 593, 330, 331, 0, 1181, 1183, 1184, 1185, 0, 74, + 0, 74, 122, 122, 0, 0, 330, 331, 0, 0, + 122, 560, 328, 329, 564, 328, 329, 1196, 0, 330, + 331, -678, -678, -678, 0, -678, -678, 0, -678, 565, + 328, 329, 0, -678, 832, 1162, 1163, 0, 971, 973, + 0, 121, 0, 104, 977, 979, 0, 0, 0, 74, + 0, 104, 104, 122, 763, 104, 357, 358, 359, 360, + 758, 328, 329, 0, 330, 331, 0, 330, 331, 0, + 971, 973, 764, 977, 979, 0, 0, 0, 0, 122, + 0, 0, 330, 331, 1008, 0, 104, 104, 0, 0, + 708, 708, 104, 104, 0, 0, 1193, 363, 104, 104, + 1017, 0, 0, 765, 0, 1200, 1201, 0, 0, 0, + 73, 0, 104, 330, 331, 104, 0, 74, 0, 0, + 0, 0, 0, 246, 104, 104, 247, 248, 0, 804, + 832, 0, 0, 104, 0, 0, 832, 0, 0, 0, + 0, 495, 0, 0, 104, 104, 554, -678, 495, 0, + 0, 0, 0, -678, 249, 0, 250, 404, 405, 406, + 407, 408, 409, 410, 411, 412, 413, 414, 415, 0, + 0, 0, 0, 416, 417, 767, 1061, 0, 103, 0, + 103, 128, 128, 0, 0, 0, 419, 0, 0, 231, + 0, 0, 784, 0, 0, 788, 0, 0, 0, 1061, + 0, 763, 0, 357, 358, 359, 360, 420, 74, 421, + 422, 423, 424, 425, 426, 427, 428, 429, 430, 764, + 0, 833, 73, 0, 104, 416, 417, -280, 103, 121, + 73, 73, 317, 0, 104, 104, 833, 0, 419, 0, + 0, 0, 104, 763, 363, 357, 358, 359, 360, 0, + 1016, 0, 104, 104, 104, 0, 0, 0, 317, 832, + 0, 764, 0, 0, 0, 73, 426, 427, 428, 429, + 430, 73, 73, 0, 0, 0, 0, 73, 73, 0, + 832, 832, 0, 0, 0, 74, 363, 0, 0, 0, + 852, 73, 74, 74, 416, 417, 103, 0, 0, 0, + 74, 0, 0, 73, 73, 0, 0, 419, 0, 0, + 0, 122, 73, 0, 833, 0, 0, 0, 0, 0, + 104, 0, 104, 73, 73, 104, 0, 0, 0, 834, + 0, 472, 423, 424, 425, 426, 427, 428, 429, 430, + 416, 417, 0, 0, 834, 0, 74, 0, 0, 0, + 121, 74, 0, 419, 0, 121, 0, 0, 0, 0, + 0, 832, 832, 0, 0, 0, 832, 0, 0, 0, + 0, 74, 104, 0, 0, 0, 0, 104, 104, 0, + 425, 426, 427, 428, 429, 430, 0, 103, 926, 907, + 0, 0, 74, 0, 0, 0, 0, 74, 122, 0, + 74, 0, 0, 73, 0, 0, 0, 0, 0, 83, + 833, 83, 0, 73, 73, 0, 833, 121, 0, 0, + 227, 73, 834, 0, 0, 0, 100, 0, 100, 127, + 127, 127, 0, 73, 832, 0, 0, 230, 104, 104, + 104, 74, 74, 0, 0, 0, 832, 104, 104, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 74, 83, + 0, 0, 0, 987, 103, 0, 0, 832, 832, 74, + 0, 103, 103, 0, 0, 0, 100, 74, 0, 103, + 316, 0, 0, 0, 0, 0, 0, 74, 708, 0, + 317, 472, 0, 0, 0, 0, 0, 0, 0, 73, + 0, 73, 832, 1020, 73, 1022, 316, 0, 0, 1023, + 0, 0, 0, 0, 0, 0, 0, 0, 834, 0, + 74, 0, 0, 0, 834, 103, 0, 83, 0, 74, + 103, 0, 0, 0, 0, 0, 0, 0, 0, 833, + 0, 0, 0, 122, 100, 122, 0, 0, 0, 0, + 103, 0, 0, 0, 0, 74, 73, 73, 0, 0, + 833, 833, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 103, 0, 0, 0, 0, 103, 317, 0, 624, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 368, 361, 84, 369, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 122, 104, 550, 0, 0, 0, - 104, 0, 0, 74, 0, 0, 363, 0, 0, 0, - 121, 74, 74, 365, 366, 367, 0, 0, 74, 0, - 104, 0, 0, 0, 74, 74, 0, 0, 0, 0, - 74, 74, 0, 0, 356, 0, 357, 358, 359, 360, - 368, 104, 0, 0, 74, 0, 104, 318, 0, 622, - 0, 0, 361, 0, 0, 0, 74, 74, 0, 0, - 0, 0, 75, 0, 74, 0, 0, 0, 580, 0, - 0, 0, 0, 84, 0, 74, 74, 363, 0, 0, - 0, 0, 0, 364, 365, 366, 367, 0, 0, 622, - 622, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 121, 0, 0, 0, 104, 121, 0, 0, - 0, 368, 0, 0, 369, 0, 0, 104, 0, 0, - 0, 0, 0, 0, 0, 104, 0, 0, 0, 0, - 74, 0, 0, 0, 0, 104, 0, 0, 0, 0, - 74, 74, 0, 0, 121, 0, 0, 0, 74, 0, - 0, 84, 0, 0, 0, 0, 0, 0, 84, 84, - 0, 0, 0, 0, 0, 0, 84, 0, 104, 0, - 0, 0, 0, 75, 0, 0, 0, 104, 0, 0, - 122, 75, 75, 0, 0, 0, 0, 0, 75, 0, - 0, 318, 0, 318, 75, 75, 0, 0, 0, 0, - 75, 75, 0, 104, 0, 0, 0, 0, 74, 0, - 74, 84, 0, 74, 75, 0, 84, 0, 0, 101, - 0, 101, 127, 127, 127, 0, 75, 75, 0, 0, - 231, 0, 0, 0, 75, 356, 84, 357, 358, 359, - 360, 0, 0, 0, 0, 75, 75, 0, 0, 0, - 0, 0, 0, 361, 0, 0, 0, 84, 318, 0, - 0, 0, 84, 0, 0, 617, 0, 0, 0, 101, - 0, 0, 122, 317, 0, 0, 0, 122, 363, 0, - 0, 0, 0, 0, 364, 365, 366, 367, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 317, - 75, 0, 0, 0, 0, 617, 617, 0, 0, 0, - 75, 75, 368, 0, 122, 369, 0, 0, 75, 0, - 0, 0, 84, 0, 800, 0, 104, 0, 0, 0, - 0, 0, 0, 84, 0, 0, 101, 0, 0, 0, - 0, 84, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 84, 403, 404, 405, 406, 407, 408, 409, 410, - 411, 412, 413, 414, 0, 0, 0, 0, 415, 416, - 0, 0, 0, 0, 0, 0, 0, 0, 75, 0, - 75, 418, 0, 75, 84, 0, 0, 0, 0, 0, - 0, 0, 0, 84, 0, 0, 0, 0, 0, 0, - 0, 0, 419, 0, 420, 421, 422, 423, 424, 425, - 426, 427, 428, 429, 0, 0, 0, 0, 0, 84, - 0, 0, -277, 0, 0, 0, 0, 101, 0, 0, - 102, 0, 102, 0, 0, 0, 0, 104, 0, 0, - 0, 0, 0, 0, 318, 104, 622, 0, 0, 0, - 0, 0, 622, 0, 0, 0, 0, 0, 622, 622, - 0, 0, 0, 0, 104, 104, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 104, 0, - 102, 0, 0, 356, 0, 357, 358, 359, 360, 0, - 104, 104, 0, 0, 0, 0, 0, 0, 104, 0, - 0, 361, 0, 0, 0, 101, 0, 0, 0, 104, - 104, 0, 101, 101, 0, 0, 0, 775, 0, 0, - 101, 0, 0, 0, 0, 0, 363, 0, 0, 0, - 0, 317, 364, 365, 366, 367, 128, 0, 0, 0, - 0, 128, 84, 0, 0, 0, 0, 102, 0, 356, - 0, 357, 358, 359, 360, 0, 0, 0, 0, 0, - 368, 0, 0, 369, 622, 101, 0, 361, 0, 0, - 101, 0, 0, 0, 104, 104, 0, 0, 994, 0, - 0, 0, 104, 870, 0, 0, 0, 0, 0, 0, - 101, 0, 363, 0, 0, 0, 0, 0, 364, 365, - 366, 367, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 101, 0, 0, 0, 0, 101, 317, 0, 0, - 0, 0, 0, 0, 0, 0, 368, 0, 0, 369, - 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, - 0, 0, 104, 0, 104, 0, 0, 104, 0, 0, - 0, 0, 0, 84, 0, 0, 0, 0, 0, 0, - 0, 84, 617, 0, 0, 0, 0, 0, 617, 0, - 0, 0, 0, 0, 617, 617, 101, 0, 0, 0, - 84, 84, 0, 0, 0, 0, 0, 101, 0, 0, - 0, 0, 0, 0, 84, 101, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 101, 84, 84, 0, 0, - 0, 0, 0, 0, 84, 0, 102, 0, 0, 0, - 0, 0, 0, 102, 102, 84, 84, 0, 0, 0, - 0, 102, 0, 0, 0, 0, 0, 0, 101, 0, - 0, 0, 0, 0, 0, 0, 0, 101, 0, 0, - 403, 404, 405, 406, 407, 408, 409, 0, 411, 412, - 0, 317, 0, 317, 0, 0, 415, 416, 0, 0, - 0, 0, 0, 101, 0, 0, 102, 0, 0, 418, - 617, 102, 0, 0, 0, 0, 0, 0, 0, 0, - 84, 84, 0, 0, 991, 0, 0, 0, 84, 0, - -595, 102, 420, 421, 422, 423, 424, 425, 426, 427, - 428, 429, 0, -595, -595, -595, 0, -595, -595, 0, - -595, 0, 102, 0, 0, 0, -595, 102, 317, 0, - 102, 0, 0, 0, 0, 0, 0, -595, -595, 0, - -595, -595, -595, -595, -595, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 84, 0, - 84, 0, 0, 84, 0, 0, 0, 0, 0, 0, - 102, 102, -595, -595, -595, -595, -595, -595, -595, -595, - -595, -595, -595, -595, 0, 0, 0, 102, -595, -595, - -595, 0, 805, -595, 0, 0, 101, 0, 102, 0, - 0, -595, 0, 0, 0, -595, 102, 0, 0, 0, - 0, 0, 0, 0, 0, -595, 102, 0, -595, -595, - 0, -107, -595, 0, -595, -595, -595, -595, -595, -595, - -595, -595, -595, -595, 0, 0, 0, 0, -595, -595, - -595, 800, -99, 0, 0, -595, -595, -595, -595, 102, - 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 403, - 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, - 414, 0, 0, 0, 102, 415, 416, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 418, 0, - 0, 0, 0, 0, 0, 0, 0, 101, 0, 0, - 0, 0, 0, 0, 317, 101, 0, 0, 0, 419, - 0, 420, 421, 422, 423, 424, 425, 426, 427, 428, - 429, 0, 0, 0, 101, 101, 0, 0, 0, 0, - 0, 0, -619, -619, -619, -619, 407, 408, 101, -595, - -619, -619, 0, 0, 0, 0, 0, 0, 415, 416, - 101, 101, -595, -595, -595, 0, -595, -595, 101, -595, - 0, 418, 0, 0, 0, -595, 0, 0, 0, 101, - 101, 0, 0, 0, 0, 0, -595, -595, 0, -595, - -595, -595, -595, -595, 420, 421, 422, 423, 424, 425, - 426, 427, 428, 429, 0, 0, 127, 102, 0, 0, - 0, 127, 0, 0, 0, 0, 0, 0, 0, 0, - 0, -595, -595, -595, -595, -595, -595, -595, -595, -595, - -595, -595, -595, 0, 0, 0, 0, -595, -595, -595, - 0, 805, -595, 0, 101, 101, 0, 0, 993, 0, - -595, 0, 101, 0, -595, 0, 0, 0, 0, 0, - 0, 0, 0, 0, -595, 0, 0, -595, -595, 0, - -107, -595, 0, -595, -595, -595, -595, -595, -595, -595, - -595, -595, -595, 0, 0, 0, 0, -595, -595, -595, - 0, -595, 0, 0, -595, -595, -595, -595, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 101, 0, 101, 0, 0, 101, 102, 0, - 0, 0, 0, 0, 0, 0, 102, 102, 0, 0, - 0, 650, 651, 102, 0, 652, 0, 0, 0, 102, - 102, 0, 0, 0, 0, 102, 102, 0, 0, 0, - 174, 175, 176, 177, 178, 179, 180, 181, 0, 102, - 182, 183, 0, 0, 0, 0, 184, 185, 186, 187, - 0, 102, 102, 0, 0, 0, 0, 0, 0, 102, - 188, 189, 190, 0, 0, 0, 0, 0, 0, 0, - 102, 102, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 191, 192, 193, 194, 195, 196, - 197, 198, 199, 200, 0, 201, 202, 403, 404, 405, - 406, 407, 408, 203, 276, 411, 412, 0, 0, 0, - 0, 0, 0, 415, 416, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 102, 418, 0, 0, 0, - 0, 0, 0, 0, 0, 102, 102, 0, 0, 0, - 0, 0, 0, 102, 0, 0, 0, 0, 0, 420, - 421, 422, 423, 424, 425, 426, 427, 428, 429, 0, - 0, 0, 0, 0, 0, -618, 4, 0, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 14, 0, 0, - 0, 0, 0, 0, 15, 0, 16, 17, 18, 19, - 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, - 25, 26, 0, 102, 27, 102, 0, 0, 102, 0, - 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 0, 40, 41, 42, 0, 0, 43, 0, - 0, 44, 45, 0, 46, 47, 48, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, - 50, 0, 0, 0, 0, 0, 51, 0, 0, 52, - 53, 0, 54, 55, 0, 56, 0, 0, 0, 57, - 0, 58, 59, 60, 0, 61, 62, 63, -293, 64, - -618, 0, 0, -618, -618, 0, 0, 0, 0, 0, - 0, -293, -293, -293, -293, -293, -293, 0, -293, 65, - 66, 67, 0, 0, 0, -293, -293, -293, 0, 0, - 0, -618, 0, -618, 0, -293, -293, 0, -293, -293, - -293, -293, -293, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, -293, 0, 0, 0, 0, + 472, 0, 0, 0, 0, 0, 472, 0, 0, 1089, + 1090, 122, 0, 0, 0, 0, 0, 73, 83, 0, + 624, 624, 0, 0, 0, 0, 73, 73, 808, 809, + 810, 1109, 0, 1112, 0, 100, 0, 103, 0, 0, + 0, 833, 833, 0, 0, 945, 833, 834, 103, 812, + 0, 0, 39, 40, 41, 0, 103, 42, 0, 0, + 0, 0, 0, 0, 0, 0, 103, 0, 834, 834, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -293, -293, -293, -293, -293, -293, -293, -293, -293, -293, - -293, -293, 0, 0, 0, 0, -293, -293, -293, 0, - 0, -293, 0, 0, 0, 0, 0, -293, 0, -293, - 0, 0, 0, -293, 0, 0, 0, 0, 0, 0, - 0, -293, 0, -293, 0, 0, -293, -293, 0, 0, - -293, -293, -293, -293, -293, -293, -293, -293, -293, -293, - -293, -293, 0, 0, -414, 0, 0, -293, -293, -293, - -293, 0, 0, -293, -293, -293, -293, -414, -414, -414, - -414, -414, -414, 0, -414, 0, 0, 0, 0, 0, - -414, -414, -414, 0, 0, 0, 0, 0, 0, 0, - 0, -414, -414, 0, -414, -414, -414, -414, -414, 0, + 74, 0, 0, 0, 0, 813, 1136, 472, 472, 1138, + 0, 814, 815, 0, 816, 83, 0, 0, 0, 103, + 57, 58, 83, 83, 0, 0, 0, 0, 103, 0, + 83, 0, 100, 0, 833, 0, 0, 0, 0, 100, + 100, 818, 317, 0, 317, 0, 833, 100, 120, 0, + 0, 0, 1167, 0, 103, 946, 0, 1169, 316, 1171, + 0, 0, 0, 1172, 0, 0, 0, 833, 833, 834, + 834, 0, 0, 0, 834, 0, 83, 0, 0, 0, + 0, 83, 0, 0, 0, 0, 356, 0, 357, 358, + 359, 360, 0, 100, 0, 0, 0, 1192, 100, 0, + 0, 83, 833, 0, 361, 0, 0, 0, 472, 0, + 317, 0, 74, 0, 0, 0, 0, 0, 100, 122, + 74, 74, 83, 0, 472, 472, 0, 83, 0, 363, + 619, 0, 0, 0, 0, 364, 365, 366, 367, 100, + 0, 0, 834, 0, 100, 316, 356, 0, 357, 358, + 359, 360, 0, 0, 834, 74, 0, 0, 0, 0, + 0, 74, 74, 368, 361, 0, 369, 74, 74, 0, + 0, 619, 619, 0, 0, 834, 834, 1104, 362, 103, + 0, 74, 0, 0, 0, 0, 0, 0, 83, 363, + 0, 0, 0, 74, 74, 364, 365, 366, 367, 83, + 0, 0, 74, 0, 0, 100, 0, 83, 0, 0, + 834, 0, 0, 74, 74, 0, 100, 83, 0, 0, + 0, 0, 0, 368, 100, 0, 369, 0, 356, 0, + 357, 358, 359, 360, 100, 0, 0, 0, 0, 370, + 122, 0, 0, 0, 0, 122, 361, 0, 0, 0, + 83, 0, 0, 0, 0, 0, 0, 0, 0, 83, + 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, + 0, 363, 0, 0, 0, 0, 100, 364, 365, 366, + 367, 0, 0, 0, 0, 83, 0, 0, 0, 0, + 316, 103, 316, 74, 0, 0, 0, 0, 317, 103, + 624, 0, 100, 74, 74, 368, 0, 122, 369, 0, + 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 551, 0, 74, 0, 0, 0, 356, 0, 357, + 358, 359, 360, 0, 624, 0, 0, 0, 0, 0, + 624, 624, 808, 809, 810, 361, 103, 103, 0, 0, + 101, 0, 101, 0, 0, 0, 0, 0, 316, 945, + 103, 581, 0, 812, 0, 0, 39, 40, 41, 0, + 363, 42, 103, 103, 0, 0, 364, 365, 366, 367, + 356, 103, 357, 358, 359, 360, 0, 0, 0, 74, + 0, 74, 103, 103, 74, 0, 0, 0, 361, 813, + 101, 0, 0, 0, 368, 814, 815, 369, 816, 0, + 83, 0, 0, 0, 57, 58, 0, 0, 0, 128, + 0, 0, 0, 363, 128, 0, 0, 100, 0, 364, + 365, 366, 367, 0, 0, 818, -692, 0, 0, 0, + 0, 0, 120, 0, 0, 0, 74, 74, 0, -692, + -692, -692, -692, -692, -692, 0, -692, 368, 0, 0, + 369, -692, -692, -692, 0, 0, 0, 0, 101, 0, + 0, 0, 624, -692, -692, 0, -692, -692, -692, -692, + -692, 0, 103, 103, 0, 0, 1070, 0, 0, 0, + 103, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 103, 0, 0, 0, 0, 74, 0, 0, + 0, 0, 0, 0, 0, 0, 74, 74, 0, 0, + 0, 0, 83, 0, 0, 0, -692, 0, 0, 0, + 83, 619, 0, 0, 0, 0, 0, 0, 0, 100, + 0, -692, 0, 0, 0, 0, 316, 100, 0, 0, + 0, -692, 0, 0, -692, -692, 0, 0, 0, 101, + 0, 0, 0, 0, 0, 619, 0, 0, 103, 0, + 103, 619, 619, 103, -692, -692, 0, 83, 83, 0, + 275, -692, -692, -692, -692, 0, 0, 0, 0, 0, + 0, 83, 0, 0, 100, 100, 0, 0, 0, 0, + 0, 0, 0, 83, 83, 0, 0, 0, 100, 0, + 0, 0, 83, 0, 0, 266, 266, 0, 0, 266, + 100, 100, 0, 83, 83, 103, 103, 0, 0, 100, + 0, 0, 0, 0, 0, 0, 101, 0, 0, 0, + 100, 100, 0, 101, 101, 0, 288, 290, 291, 292, + 0, 101, 0, 266, 308, 356, 0, 357, 358, 359, + 360, 0, 0, 0, 0, 345, 346, 127, 0, 0, + 0, 0, 127, 361, 0, 356, 0, 357, 358, 359, + 360, 0, 0, 0, 0, 0, 103, 0, 0, 779, + 0, 0, 0, 361, 0, 103, 103, 101, 363, 0, + 0, 0, 101, 619, 364, 365, 366, 367, 0, 903, + 0, 0, 0, 83, 83, 0, 0, 1067, 363, 0, + 0, 83, 101, 0, 364, 365, 366, 367, 0, 0, + 100, 100, 368, 83, 1069, 369, 0, 0, 100, 808, + 809, 810, 0, 101, 0, 0, 0, 0, 101, 0, + 100, 101, 368, 0, 0, 369, 811, 0, 0, 0, + 812, 0, 0, 39, 40, 41, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, -414, -414, -414, -414, - -414, -414, -414, -414, -414, -414, -414, -414, 0, 0, - 0, 0, -414, -414, -414, 0, 0, -414, 0, 0, - 0, 0, 0, -414, 0, -414, 0, 0, 0, -414, - 0, 0, 0, 0, 0, 0, 0, 0, 0, -414, - 0, 0, -414, -414, 0, 0, -414, 0, -414, -414, - -414, -414, -414, -414, -414, -414, -414, -414, 0, 0, - -480, 0, -414, -414, -414, -414, -414, 0, 276, -414, - -414, -414, -414, -480, -480, -480, -480, -480, -480, 0, - -480, 0, 0, 0, 0, 0, 0, -480, -480, 0, - 0, 0, 0, 0, 0, 0, 0, -480, -480, 0, - -480, -480, -480, -480, -480, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 495, 0, 0, + 0, 0, 101, 101, 0, 0, 813, 0, 0, 83, + 0, 83, 814, 815, 83, 816, 0, 817, 0, 101, + 0, 57, 58, 0, 0, 0, 100, 0, 100, 0, + 101, 100, 0, 0, 0, 808, 809, 810, 101, 0, + 0, 0, 818, 0, 0, 0, 0, 0, 101, 120, + 0, 0, 945, 0, 0, 0, 812, 0, 0, 39, + 40, 41, 0, 0, 42, 0, 83, 83, 0, 0, + 0, 498, 499, 500, 345, 0, 0, 0, 0, 0, + 0, 101, 0, 100, 100, 266, 0, 0, 266, 0, + 101, 0, 813, 0, 0, 0, 0, 0, 814, 815, + 356, 0, 357, 358, 359, 360, 0, 57, 58, 0, + 0, 0, 0, 0, 0, 0, 101, 0, 361, 0, + 0, 0, 0, 0, 0, 0, 0, 83, 818, 0, + 0, 0, 362, 0, 0, 120, 83, 83, 0, 0, + 0, 0, 0, 363, 100, 0, 0, 0, 0, 364, + 365, 366, 367, 100, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, -480, -480, -480, -480, -480, -480, -480, -480, - -480, -480, -480, -480, 0, 0, 0, 0, -480, -480, - -480, 0, -480, -480, 0, 0, 0, 0, 0, -480, - 0, -480, 0, 0, 0, -480, 0, 0, 0, 0, - 0, 0, 0, 0, 0, -480, 0, 0, -480, -480, - 0, -480, -480, 0, -480, -480, -480, -480, -480, -480, - -480, -480, -480, -480, 0, 0, -618, 0, 0, -480, - -480, -480, -480, 0, 0, -480, -480, -480, -480, -618, - -618, -618, -618, -618, -618, 0, -618, 0, 0, 0, - 0, 0, -618, -618, -618, 0, 0, 0, 0, 0, - 0, 0, 0, -618, -618, 0, -618, -618, -618, -618, - -618, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -692, 0, 0, 0, 0, 0, 0, 368, 0, 0, + 369, 0, 0, 0, 586, 0, 0, 0, 0, 0, + 0, 0, 0, 370, 0, 596, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 609, 0, 0, + 0, 0, 620, 0, 626, 627, 628, 629, 630, 631, + 632, 633, 634, 635, 636, 637, 638, 639, 0, 641, + 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, + 652, 101, 0, 266, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 674, 674, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 266, 0, + 0, 0, 0, 404, 405, 406, 407, 408, 409, 410, + 0, 412, 413, 674, 0, 266, 0, 674, 674, 416, + 417, 0, 0, 0, 266, 0, 0, 0, 0, 0, + 0, 0, 419, 720, 0, 0, 723, 724, 0, 0, + 0, 725, 0, 0, 728, 0, 731, 0, 308, 292, + 0, 0, 0, 0, 0, 421, 422, 423, 424, 425, + 426, 427, 428, 429, 430, 0, 674, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 728, 0, 0, 308, + 0, 0, 0, 101, 0, 0, 0, 0, 0, 266, + 0, 101, 101, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 761, 762, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, -618, -618, - -618, -618, -618, -618, -618, -618, -618, -618, -618, -618, - 0, 0, 0, 0, -618, -618, -618, 0, 0, -618, - 0, 0, 0, 0, 0, -618, 0, -618, 0, 0, - 0, -618, 0, 0, 0, 0, 0, 0, 0, 0, - 0, -618, 0, 0, -618, -618, 0, 0, -618, 0, - -618, -618, -618, -618, -618, -618, -618, -618, -618, -618, - 0, 0, -618, 0, -618, -618, -618, -618, -618, 0, - 276, -618, -618, -618, -618, -618, -618, -618, -618, -618, - -618, 0, -618, 0, 0, 0, 0, 0, 0, -618, - -618, 0, 0, 0, 0, 0, 0, 0, 0, -618, - -618, 0, -618, -618, -618, -618, -618, 0, 0, 0, + 0, 0, 0, 0, 772, 0, 101, 0, 0, 0, + 0, 0, 101, 101, 0, 0, 0, 0, 101, 101, + 0, 0, 0, 0, 790, 0, 0, 797, 0, 0, + 0, 0, 101, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 101, 101, 0, 0, 0, 0, + 0, 0, 0, 101, 0, 0, 0, 0, 0, 0, + -420, 0, 0, 0, 101, 101, 0, 0, 0, 0, + 0, 0, 0, -420, -420, -420, -420, -420, -420, 0, + -420, 0, 0, 0, 0, -420, -420, -420, -420, 0, + 0, 0, 0, 0, 0, 0, 0, -420, -420, 0, + -420, -420, -420, -420, -420, 0, 0, 0, 0, 0, + 0, 0, 0, 859, 0, 0, 772, 790, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, -420, -420, -420, -420, -420, -420, -420, -420, + -420, -420, -420, -420, 101, 0, 0, 885, -420, -420, + -420, 0, 0, -420, 101, 101, 728, 308, 0, -420, + 0, -420, 101, 0, 0, -420, 0, 0, 0, 0, + 0, 0, 0, 0, 101, -420, 0, 0, -420, -420, + 403, 0, -420, 0, -420, -420, -420, -420, -420, -420, + -420, -420, -420, -420, 0, 0, 0, 0, -420, -420, + -420, -420, -420, 0, 275, -420, -420, -420, -420, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 934, 0, + 0, 0, 0, 674, 937, 0, 266, 404, 405, 406, + 407, 408, 409, 410, 411, 412, 413, 414, 415, 0, + 101, 0, 101, 416, 417, 101, 0, 0, 418, 0, + 0, 0, 0, 0, 0, 0, 419, 674, 674, 0, + 0, 0, 728, 674, 674, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 420, 0, 421, + 422, 423, 424, 425, 426, 427, 428, 429, 430, 674, + 674, 0, 674, 674, 0, 0, 0, 101, 101, 0, + 0, 0, 0, 1006, 0, 0, 0, 292, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, -693, -693, + -693, -693, 408, 409, 1018, 1019, -693, -693, 0, 0, + 0, 0, 0, 0, 416, 417, 0, 1024, 1025, 0, + 0, 0, 0, 0, 0, 0, 0, 419, 0, 0, + 0, 0, 0, 1041, 0, 0, 0, 0, 101, 0, + 0, 0, 0, 0, 0, 0, 0, 101, 101, 0, + 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, + 0, 0, 0, 0, 0, 0, 0, 0, 1058, 1059, + 0, 0, 0, 0, 0, 674, 0, 0, 0, 0, + -692, 3, 0, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 0, 0, 0, 0, 0, 674, 14, + 0, 15, 16, 17, 18, 0, 0, 0, 0, 308, + 19, 20, 21, 22, 23, 24, 25, 0, 0, 26, + 0, 0, 0, 0, 0, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 0, 39, 40, + 41, 0, 0, 42, 0, 0, 43, 44, 0, 45, + 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 48, 49, 0, 0, 0, 0, + 0, 50, 0, 0, 51, 52, 0, 53, 54, 0, + 55, 0, 0, 0, 56, 0, 57, 58, 59, 0, + 60, 61, 62, -296, 63, -692, 0, 0, -692, -692, + 0, 0, 0, 0, 0, 0, -296, -296, -296, -296, + -296, -296, 0, -296, 64, 65, 66, 0, -296, 0, + -296, -296, -296, 266, 0, 0, -692, 0, -692, 0, + -296, -296, 0, -296, -296, -296, -296, -296, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, -618, -618, -618, -618, -618, -618, - -618, -618, -618, -618, -618, -618, 0, 0, 0, 0, - -618, -618, -618, 0, 0, -618, 0, 0, 0, 0, - 0, -618, 0, -618, 0, 0, 0, -618, 0, 0, - 0, 0, 0, 0, 0, 0, 0, -618, 0, 0, - -618, -618, 0, 0, -618, 0, -618, -618, -618, -618, - -618, -618, -618, -618, -618, -618, 0, 0, -302, 0, - 0, -618, -618, -618, -618, 0, 276, -618, -618, -618, - -618, -302, -302, -302, 0, -302, -302, 0, -302, 0, + -296, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, -296, -296, -296, -296, -296, + -296, -296, -296, -296, -296, -296, -296, 0, 0, 0, + 0, -296, -296, -296, 0, 0, -296, 0, 0, 0, + 0, 0, -296, 0, -296, 0, 0, 0, -296, 0, + 0, 0, 0, 0, 0, 0, -296, 0, -296, 0, + 0, -296, -296, 0, 0, -296, -296, -296, -296, -296, + -296, -296, -296, -296, -296, -296, -296, 0, 0, -550, + 0, 0, -296, -296, -296, -296, 0, 0, -296, -296, + -296, -296, -550, -550, -550, -550, -550, -550, 0, -550, + 0, 0, 0, 0, -550, 0, -550, -550, 0, 0, + 0, 0, 0, 0, 0, 0, -550, -550, 0, -550, + -550, -550, -550, -550, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 496, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, -302, -302, 0, -302, -302, - -302, -302, -302, 0, 0, 0, 0, 0, 0, 0, + 0, -550, -550, -550, -550, -550, -550, -550, -550, -550, + -550, -550, -550, 0, 0, 0, 0, -550, -550, -550, + 0, -550, -550, 0, 0, 0, 0, 0, -550, 0, + -550, 0, 0, 0, -550, 0, 0, 0, 0, 0, + 0, 0, 0, 0, -550, 0, 0, -550, -550, 0, + -550, -550, 0, -550, -550, -550, -550, -550, -550, -550, + -550, -550, -550, 0, 0, -692, 0, 0, -550, -550, + -550, -550, 0, 0, -550, -550, -550, -550, -692, -692, + -692, -692, -692, -692, 0, -692, 0, 0, 0, 0, + -692, -692, -692, -692, 0, 0, 0, 0, 0, 0, + 0, 0, -692, -692, 0, -692, -692, -692, -692, -692, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -302, -302, -302, -302, -302, -302, -302, -302, -302, -302, - -302, -302, 0, 0, 0, 0, -302, -302, -302, 0, - 806, -302, 0, 0, 0, 0, 0, 0, 0, -302, - 0, 0, 0, -302, 0, 0, 0, 0, 0, 0, - 0, 0, 0, -302, 0, 0, -302, -302, 0, -109, - -302, 0, -302, -302, -302, -302, -302, -302, -302, -302, - -302, -302, 0, 0, -302, 0, 0, -302, -302, 0, - -101, 0, 0, -302, -302, -302, -302, -302, -302, -302, - 0, -302, -302, 0, -302, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, -692, -692, -692, + -692, -692, -692, -692, -692, -692, -692, -692, -692, 0, + 0, 0, 0, -692, -692, -692, 0, 0, -692, 0, + 0, 0, 0, 0, -692, 0, -692, 0, 0, 0, + -692, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -692, 0, 0, -692, -692, 0, 0, -692, 0, -692, + -692, -692, -692, -692, -692, -692, -692, -692, -692, 0, + 0, -692, 0, -692, -692, -692, -692, -692, 0, 275, + -692, -692, -692, -692, -692, -692, -692, -692, -692, -692, + 0, -692, 0, 0, 0, 0, -692, 0, -692, -692, + 0, 0, 0, 0, 0, 0, 0, 0, -692, -692, + 0, -692, -692, -692, -692, -692, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, -302, -302, 0, -302, -302, -302, -302, -302, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, -692, -692, -692, -692, -692, -692, -692, + -692, -692, -692, -692, -692, 0, 0, 0, 0, -692, + -692, -692, 0, 0, -692, 0, 0, 0, 0, 0, + -692, 0, -692, 0, 0, 0, -692, 0, 0, 0, + 0, 0, 0, 0, 0, 0, -692, 0, 0, -692, + -692, 0, 0, -692, 0, -692, -692, -692, -692, -692, + -692, -692, -692, -692, -692, 0, 0, -669, 0, 0, + -692, -692, -692, -692, 0, 275, -692, -692, -692, -692, + -669, -669, -669, 0, -669, -669, 0, -669, 0, 0, + 0, 0, -669, -669, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, -669, -669, 0, -669, -669, -669, + -669, -669, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, -302, -302, -302, -302, - -302, -302, -302, -302, -302, -302, -302, -302, 0, 0, - 0, 0, -302, -302, -302, 0, 806, -302, 0, 0, - 0, 0, 0, 0, 0, -302, 0, 0, 0, -302, - 0, 0, 0, 0, 0, 0, 0, 0, 0, -302, - 0, 0, -302, -302, 0, -109, -302, 0, -302, -302, - -302, -302, -302, -302, -302, -302, -302, -302, 0, 0, - 0, 0, 0, -302, -302, 0, -302, 0, 0, -302, - -302, -302, -302, 295, 0, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, -618, -618, -618, 0, 0, - -618, 15, 0, 16, 17, 18, 19, 0, 0, 0, - 0, 0, 20, 21, 22, 23, 24, 25, 26, 0, - 0, 27, 0, 0, 0, 0, 0, 28, 0, 30, - 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, - 40, 41, 42, 0, 0, 43, 0, 0, 44, 45, - 0, 46, 47, 48, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 49, 50, 0, 0, - 0, 0, 0, 51, 0, 0, 52, 53, 0, 54, - 55, 0, 56, 0, 0, 0, 57, 0, 58, 59, - 60, 0, 61, 62, 63, 0, 64, -618, 0, 0, - -618, -618, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 65, 66, 67, 0, - 0, 0, 0, 0, 0, 0, 0, 0, -618, 295, - -618, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 0, 0, -618, 0, -618, -618, 15, 0, 16, - 17, 18, 19, 0, 0, 0, 0, 0, 20, 21, - 22, 23, 24, 25, 26, 0, 0, 27, 0, 0, - 0, 0, 0, 28, 0, 30, 31, 32, 33, 34, - 35, 36, 37, 38, 39, 0, 40, 41, 42, 0, - 0, 43, 0, 0, 44, 45, 0, 46, 47, 48, + 0, 0, 0, 0, 0, 0, 0, 0, 0, -669, + -669, -669, -669, -669, -669, -669, -669, -669, -669, -669, + -669, 0, 0, 0, 0, -669, -669, -669, 0, 837, + -669, 0, 0, 0, 0, 0, 0, 0, -669, 0, + 0, 0, -669, 0, 0, 0, 0, 0, 0, 0, + 0, 0, -669, 0, 0, -669, -669, 0, -110, -669, + 0, -669, -669, -669, -669, -669, -669, -669, -669, -669, + -669, 0, 0, -669, 0, -669, -669, -669, 0, -102, + 0, 0, -669, -669, -669, -669, -669, -669, -669, 0, + -669, -669, 0, -669, 0, 0, 0, 0, -669, -669, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 49, 50, 0, 0, 0, 0, 0, 51, - 0, 0, 52, 53, 0, 54, 55, 0, 56, 0, - 0, 0, 57, 0, 58, 59, 60, 0, 61, 62, - 63, 0, 64, -618, 0, 0, -618, -618, 0, 0, + -669, -669, 0, -669, -669, -669, -669, -669, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 65, 66, 67, 0, 0, 0, 0, 0, - 0, 0, 0, 0, -618, 295, -618, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 0, 0, -618, - 0, 0, -618, 15, -618, 16, 17, 18, 19, 0, - 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, - 26, 0, 0, 27, 0, 0, 0, 0, 0, 28, - 0, 30, 31, 32, 33, 34, 35, 36, 37, 38, - 39, 0, 40, 41, 42, 0, 0, 43, 0, 0, - 44, 45, 0, 46, 47, 48, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 49, 50, - 0, 0, 0, 0, 0, 51, 0, 0, 52, 53, - 0, 54, 55, 0, 56, 0, 0, 0, 57, 0, - 58, 59, 60, 0, 61, 62, 63, 0, 64, -618, - 0, 0, -618, -618, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 65, 66, - 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -618, 295, -618, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 0, 0, -618, 0, 0, -618, 15, - 0, 16, 17, 18, 19, 0, 0, 0, 0, 0, - 20, 21, 22, 23, 24, 25, 26, 0, 0, 27, - 0, 0, 0, 0, 0, 28, 0, 30, 31, 32, - 33, 34, 35, 36, 37, 38, 39, 0, 40, 41, - 42, 0, 0, 43, 0, 0, 44, 45, 0, 46, - 47, 48, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 49, 50, 0, 0, 0, 0, - 0, 51, 0, 0, 52, 53, 0, 54, 55, 0, - 56, 0, 0, 0, 57, 0, 58, 59, 60, 0, - 61, 62, 63, 0, 64, -618, 0, 0, -618, -618, - 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 0, 0, 65, 66, 67, 0, 15, 0, - 16, 17, 18, 19, 0, 0, -618, 0, -618, 20, - 21, 22, 23, 24, 25, 26, 0, 0, 27, 0, - 0, 0, 0, 0, 28, 29, 30, 31, 32, 33, - 34, 35, 36, 37, 38, 39, 0, 40, 41, 42, - 0, 0, 43, 0, 0, 44, 45, 0, 46, 47, - 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 49, 50, 0, 0, 0, 0, 0, - 51, 0, 0, 52, 53, 0, 54, 55, 0, 56, - 0, 0, 0, 57, 0, 58, 59, 60, 0, 61, - 62, 63, 0, 64, -618, 0, 0, -618, -618, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 65, 66, 67, 0, 0, -618, 0, - 0, 0, 0, 0, 0, -618, 295, -618, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 14, 0, -618, - -618, 0, 0, 0, 15, 0, 16, 17, 18, 19, - 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, - 25, 26, 0, 0, 27, 0, 0, 0, 0, 0, - 28, 0, 30, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 0, 40, 41, 42, 0, 0, 43, 0, - 0, 44, 45, 0, 46, 47, 48, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, - 50, 0, 0, 0, 0, 0, 51, 0, 0, 52, - 53, 0, 54, 55, 0, 56, 0, 0, 0, 57, - 0, 58, 59, 60, 0, 61, 62, 63, 0, 64, - -618, 0, 0, -618, -618, 295, 0, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 0, 0, 65, - 66, 67, 0, 15, 0, 16, 17, 18, 19, 0, - 0, -618, 0, -618, 20, 21, 22, 23, 24, 25, - 26, 0, 0, 27, 0, 0, 0, 0, 0, 28, - 0, 30, 31, 32, 33, 34, 35, 36, 37, 38, - 39, 0, 40, 41, 42, 0, 0, 43, 0, 0, - 44, 45, 0, 46, 47, 48, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 49, 50, - 0, 0, 0, 0, 0, 51, 0, 0, 296, 53, - 0, 54, 55, 0, 56, 0, 0, 0, 57, 0, - 58, 59, 60, 0, 61, 62, 63, 0, 64, -618, - 0, 0, -618, -618, 295, 0, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 0, 0, 65, 66, - 67, 0, 15, 0, 16, 17, 18, 19, 0, -618, - -618, 0, -618, 20, 21, 22, 23, 24, 25, 26, - 0, 0, 27, 0, 0, 0, 0, 0, 28, 0, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 0, 40, 41, 42, 0, 0, 43, 0, 0, 44, - 45, 0, 46, 47, 48, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 49, 50, 0, - 0, 0, 0, 0, 51, 0, 0, 52, 53, 0, - 54, 55, 0, 56, 0, 0, 0, 57, 0, 58, - 59, 60, 0, 61, 62, 63, 0, 64, -618, 0, - 0, -618, -618, 295, 0, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 0, 0, 65, 66, 67, - 0, 15, 0, 16, 17, 18, 19, 0, -618, -618, - 0, -618, 20, 21, 22, 23, 24, 25, 26, 0, - 0, 27, 0, 0, 0, 0, 0, 28, 0, 30, - 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, - 40, 41, 42, 0, 0, 43, 0, 0, 44, 45, - 0, 46, 47, 48, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 49, 50, 0, 0, - 0, 0, 0, 51, 0, 0, 52, 53, 0, 54, - 55, 0, 56, 0, 0, 0, 57, 0, 58, 59, - 60, 0, 61, 62, 63, 0, 64, -618, 0, 0, - -618, -618, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 65, 66, 67, 0, - 0, -618, 0, 0, 0, 0, 0, 0, -618, 295, - -618, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 0, 0, -618, 0, 0, 0, 15, 0, 16, - 17, 18, 19, 0, 0, 0, 0, 0, 20, 21, - 22, 23, 24, 25, 26, 0, 0, 27, 0, 0, - 0, 0, 0, 28, 0, 30, 31, 32, 33, 34, - 35, 36, 37, 38, 39, 0, 40, 41, 42, 0, - 0, 43, 0, 0, 44, 45, 0, 46, 47, 48, + 0, 0, 0, 0, 0, -669, -669, -669, -669, -669, + -669, -669, -669, -669, -669, -669, -669, 0, 0, 0, + 0, -669, -669, -669, 0, 837, -669, 0, 0, 0, + 0, 0, 0, 0, -669, 0, 0, 0, -669, 0, + 0, 0, 0, 0, 0, 0, 0, 0, -669, 0, + 0, -669, -669, 0, -110, -669, 0, -669, -669, -669, + -669, -669, -669, -669, -669, -669, -669, 0, 0, -305, + 0, -669, -669, -669, 0, -669, 0, 0, -669, -669, + -669, -669, -305, -305, -305, 0, -305, -305, 0, -305, + 0, 0, 0, 0, -305, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, -305, -305, 0, -305, + -305, -305, -305, -305, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 49, 50, 0, 0, 0, 0, 0, 51, - 0, 0, 52, 53, 0, 54, 55, 0, 56, 0, - 0, 0, 57, 0, 58, 59, 60, 0, 61, 62, - 63, 0, 64, -618, 0, 0, -618, -618, 0, 0, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 0, 0, 65, 66, 67, 0, 15, 0, 16, 17, - 18, 19, 0, 0, -618, 0, -618, 20, 21, 22, - 23, 24, 25, 26, 0, 0, 27, 0, 0, 0, - 0, 0, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 0, 40, 41, 42, 0, 0, - 43, 0, 0, 44, 45, 0, 46, 47, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 49, 50, 0, 0, 0, 0, 0, 51, 0, - 0, 52, 53, 0, 54, 55, 0, 56, 0, 0, - 0, 57, 0, 58, 59, 60, 0, 61, 62, 63, - 0, 64, 247, 0, 0, 248, 249, 0, 0, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, - 0, 65, 66, 67, 0, 15, 0, 16, 17, 18, - 19, 0, 0, 250, 0, 251, 20, 21, 22, 23, - 24, 25, 26, 0, 0, 27, 0, 0, 0, 0, - 0, 28, 0, 30, 31, 32, 33, 34, 35, 36, - 37, 38, 39, 0, 40, 41, 42, 0, 0, 43, - 0, 0, 44, 45, 0, 46, 47, 48, 0, 0, + 0, -305, -305, -305, -305, -305, -305, -305, -305, -305, + -305, -305, -305, 0, 0, 0, 0, -305, -305, -305, + 0, 838, -305, 0, 0, 0, 0, 0, 0, 0, + -305, 0, 0, 0, -305, 0, 0, 0, 0, 0, + 0, 0, 0, 0, -305, 0, 0, -305, -305, 0, + -112, -305, 0, -305, -305, -305, -305, -305, -305, -305, + -305, -305, -305, 0, 0, -305, 0, 0, -305, -305, + 0, -104, 0, 0, -305, -305, -305, -305, -305, -305, + -305, 0, -305, -305, 0, -305, 0, 0, 0, 0, + -305, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, -305, -305, 0, -305, -305, -305, -305, -305, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 49, 50, 0, 0, 0, 0, 0, 51, 0, 0, - 52, 53, 0, 54, 55, 0, 56, 0, 0, 0, - 57, 0, 58, 59, 60, 0, 61, 62, 63, 0, - 64, 247, 0, 0, 248, 249, 0, 0, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 0, 0, 0, - 65, 66, 67, 0, 15, 0, 16, 17, 18, 19, - 0, 0, 250, 0, 251, 20, 21, 22, 23, 24, - 25, 26, 0, 0, 27, 0, 0, 0, 0, 0, - 0, 0, 0, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 0, 40, 41, 42, 0, 0, 43, 0, - 0, 44, 45, 0, 46, 47, 48, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, - 50, 0, 0, 0, 0, 0, 211, 0, 0, 119, - 53, 0, 54, 55, 0, 0, 0, 0, 0, 57, - 0, 58, 59, 60, 0, 61, 62, 63, 0, 64, - 247, 0, 0, 248, 249, 0, 0, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 0, 0, 0, 65, - 66, 67, 0, 15, 0, 108, 109, 18, 19, 0, - 0, 250, 0, 251, 110, 111, 112, 23, 24, 25, - 26, 0, 0, 113, 0, 0, 0, 0, 0, 0, - 0, 0, 31, 32, 33, 34, 35, 36, 37, 38, - 39, 0, 40, 41, 42, 0, 0, 43, 0, 0, - 44, 45, 0, 46, 47, 48, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 49, 50, - 0, 0, 0, 0, 0, 211, 0, 0, 119, 53, - 0, 54, 55, 0, 0, 0, 0, 0, 57, 0, - 58, 59, 60, 0, 61, 62, 63, 0, 64, 247, - 0, 0, 248, 249, 0, 0, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 0, 0, 0, 65, 265, - 67, 0, 15, 0, 16, 17, 18, 19, 0, 0, - 250, 0, 251, 20, 21, 22, 23, 24, 25, 26, - 0, 0, 27, 0, 0, 0, 0, 0, 0, 0, - 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 0, 40, 41, 42, 0, 0, 43, 0, 0, 44, - 45, 0, 46, 47, 48, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 49, 50, 0, - 0, 0, 0, 0, 211, 0, 0, 119, 53, 0, - 54, 55, 0, 0, 0, 0, 0, 57, 0, 58, - 59, 60, 0, 61, 62, 63, 0, 64, 247, 0, - 0, 248, 249, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 65, 66, 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 251, 131, 132, 133, 134, 135, 136, 137, 138, - 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, - 149, 150, 151, 152, 153, 154, 0, 0, 0, 155, - 156, 157, 158, 159, 160, 161, 162, 163, 164, 0, - 0, 0, 0, 0, 165, 166, 167, 168, 169, 170, - 171, 172, 36, 37, 173, 39, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 174, - 175, 176, 177, 178, 179, 180, 181, 0, 0, 182, - 183, 0, 0, 0, 0, 184, 185, 186, 187, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 188, - 189, 190, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, -305, -305, -305, + -305, -305, -305, -305, -305, -305, -305, -305, -305, 0, + 0, 0, 0, -305, -305, -305, 0, 838, -305, 0, + 0, 0, 0, 0, 0, 0, -305, 0, 0, 0, + -305, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -305, 0, 0, -305, -305, 0, -112, -305, 0, -305, + -305, -305, -305, -305, -305, -305, -305, -305, -305, 0, + 0, 0, 0, 0, -305, -305, 0, -305, 0, 0, + -305, -305, -305, -305, 294, 0, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, -692, -692, -692, 0, + 0, -692, 14, 0, 15, 16, 17, 18, 0, 0, + 0, 0, 0, 19, 20, 21, 22, 23, 24, 25, + 0, 0, 26, 0, 0, 0, 0, 0, 27, 0, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, + 0, 39, 40, 41, 0, 0, 42, 0, 0, 43, + 44, 0, 45, 46, 47, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 48, 49, 0, + 0, 0, 0, 0, 50, 0, 0, 51, 52, 0, + 53, 54, 0, 55, 0, 0, 0, 56, 0, 57, + 58, 59, 0, 60, 61, 62, 0, 63, -692, 0, + 0, -692, -692, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 64, 65, 66, + 0, 0, 0, 0, 0, 0, 0, 0, 0, -692, + 294, -692, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 0, 0, -692, 0, -692, -692, 14, 0, + 15, 16, 17, 18, 0, 0, 0, 0, 0, 19, + 20, 21, 22, 23, 24, 25, 0, 0, 26, 0, + 0, 0, 0, 0, 27, 0, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 0, 39, 40, 41, + 0, 0, 42, 0, 0, 43, 44, 0, 45, 46, + 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 48, 49, 0, 0, 0, 0, 0, + 50, 0, 0, 51, 52, 0, 53, 54, 0, 55, + 0, 0, 0, 56, 0, 57, 58, 59, 0, 60, + 61, 62, 0, 63, -692, 0, 0, -692, -692, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 191, 192, 193, 194, 195, 196, 197, - 198, 199, 200, 0, 201, 202, 0, 0, 0, 0, - 0, 0, 203, 204, -588, -588, -588, -588, -588, -588, - -588, -588, -588, 0, 0, 0, 0, 0, 0, 0, - -588, 0, -588, -588, -588, -588, 0, -588, 0, 0, - 0, -588, -588, -588, -588, -588, -588, -588, 0, 0, - -588, 0, 0, 0, 0, 0, 0, 0, 0, -588, - -588, -588, -588, -588, -588, -588, -588, -588, 0, -588, - -588, -588, 0, 0, -588, 0, 0, -588, -588, 0, - -588, -588, -588, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, -588, -588, 0, 0, 0, - 0, 0, -588, 0, 0, -588, -588, 0, -588, -588, - 0, -588, 0, -588, -588, -588, 0, -588, -588, -588, - 0, -588, -588, -588, 0, -588, 0, 0, 0, 0, + 0, 0, 0, 64, 65, 66, 0, 0, 0, 0, + 0, 0, 0, 0, 0, -692, 294, -692, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 0, 0, + -692, 0, 0, -692, 14, -692, 15, 16, 17, 18, + 0, 0, 0, 0, 0, 19, 20, 21, 22, 23, + 24, 25, 0, 0, 26, 0, 0, 0, 0, 0, + 27, 0, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38, 0, 39, 40, 41, 0, 0, 42, 0, + 0, 43, 44, 0, 45, 46, 47, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, + 49, 0, 0, 0, 0, 0, 50, 0, 0, 51, + 52, 0, 53, 54, 0, 55, 0, 0, 0, 56, + 0, 57, 58, 59, 0, 60, 61, 62, 0, 63, + -692, 0, 0, -692, -692, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, + 65, 66, 0, 0, 0, 0, 0, 0, 0, 0, + 0, -692, 294, -692, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 0, 0, -692, 0, 0, -692, + 14, 0, 15, 16, 17, 18, -692, 0, 0, 0, + 0, 19, 20, 21, 22, 23, 24, 25, 0, 0, + 26, 0, 0, 0, 0, 0, 27, 0, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 0, 39, + 40, 41, 0, 0, 42, 0, 0, 43, 44, 0, + 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 48, 49, 0, 0, 0, + 0, 0, 50, 0, 0, 51, 52, 0, 53, 54, + 0, 55, 0, 0, 0, 56, 0, 57, 58, 59, + 0, 60, 61, 62, 0, 63, -692, 0, 0, -692, + -692, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 64, 65, 66, 0, 0, + 0, 0, 0, 0, 0, 0, 0, -692, 294, -692, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 0, 0, -692, 0, 0, -692, 14, 0, 15, 16, + 17, 18, 0, 0, 0, 0, 0, 19, 20, 21, + 22, 23, 24, 25, 0, 0, 26, 0, 0, 0, + 0, 0, 27, 0, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 0, 39, 40, 41, 0, 0, + 42, 0, 0, 43, 44, 0, 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, -588, -588, -588, 0, -588, - 0, 0, 0, 0, 0, -588, -589, -589, -589, -589, - -589, -589, -589, -589, -589, 0, 0, 0, 0, 0, - 0, 0, -589, 0, -589, -589, -589, -589, 0, -589, - 0, 0, 0, -589, -589, -589, -589, -589, -589, -589, - 0, 0, -589, 0, 0, 0, 0, 0, 0, 0, - 0, -589, -589, -589, -589, -589, -589, -589, -589, -589, - 0, -589, -589, -589, 0, 0, -589, 0, 0, -589, - -589, 0, -589, -589, -589, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, -589, -589, 0, - 0, 0, 0, 0, -589, 0, 0, -589, -589, 0, - -589, -589, 0, -589, 0, -589, -589, -589, 0, -589, - -589, -589, 0, -589, -589, -589, 0, -589, 0, 0, - 0, 0, 0, 0, -591, -591, -591, -591, -591, -591, - -591, -591, -591, 0, 0, 0, 0, -589, -589, -589, - -591, -589, -591, -591, -591, -591, 0, -589, 0, 0, - 0, -591, -591, -591, -591, -591, -591, -591, 0, 0, - -591, 0, 0, 0, 0, 0, 0, 0, 0, -591, - -591, -591, -591, -591, -591, -591, -591, -591, 0, -591, - -591, -591, 0, 0, -591, 0, 0, -591, -591, 0, - -591, -591, -591, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, -591, -591, 0, 0, 0, - 0, 0, -591, 837, 0, -591, -591, 0, -591, -591, - 0, -591, 0, -591, -591, -591, 0, -591, -591, -591, - 0, -591, -591, -591, 0, -591, 0, 0, 0, 0, - 0, 0, -107, -592, -592, -592, -592, -592, -592, -592, - -592, -592, 0, 0, 0, -591, -591, -591, 0, -592, - 0, -592, -592, -592, -592, -591, 0, 0, 0, 0, - -592, -592, -592, -592, -592, -592, -592, 0, 0, -592, - 0, 0, 0, 0, 0, 0, 0, 0, -592, -592, - -592, -592, -592, -592, -592, -592, -592, 0, -592, -592, - -592, 0, 0, -592, 0, 0, -592, -592, 0, -592, - -592, -592, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, -592, -592, 0, 0, 0, 0, - 0, -592, 838, 0, -592, -592, 0, -592, -592, 0, - -592, 0, -592, -592, -592, 0, -592, -592, -592, 0, - -592, -592, -592, 0, -592, 0, 0, 0, 0, 0, - 0, -109, -593, -593, -593, -593, -593, -593, -593, -593, - -593, 0, 0, 0, -592, -592, -592, 0, -593, 0, - -593, -593, -593, -593, -592, 0, 0, 0, 0, -593, - -593, -593, -593, -593, -593, -593, 0, 0, -593, 0, - 0, 0, 0, 0, 0, 0, 0, -593, -593, -593, - -593, -593, -593, -593, -593, -593, 0, -593, -593, -593, - 0, 0, -593, 0, 0, -593, -593, 0, -593, -593, - -593, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, -593, -593, 0, 0, 0, 0, 0, - -593, 0, 0, -593, -593, 0, -593, -593, 0, -593, - 0, -593, -593, -593, 0, -593, -593, -593, 0, -593, - -593, -593, 0, -593, 0, 0, 0, 0, 0, 0, - -594, -594, -594, -594, -594, -594, -594, -594, -594, 0, - 0, 0, 0, -593, -593, -593, -594, 0, -594, -594, - -594, -594, 0, -593, 0, 0, 0, -594, -594, -594, - -594, -594, -594, -594, 0, 0, -594, 0, 0, 0, - 0, 0, 0, 0, 0, -594, -594, -594, -594, -594, - -594, -594, -594, -594, 0, -594, -594, -594, 0, 0, - -594, 0, 0, -594, -594, 0, -594, -594, -594, 0, + 0, 48, 49, 0, 0, 0, 0, 0, 50, 0, + 0, 51, 52, 0, 53, 54, 0, 55, 0, 0, + 0, 56, 0, 57, 58, 59, 0, 60, 61, 62, + 0, 63, -692, 0, 0, -692, -692, 3, 0, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, + 0, 64, 65, 66, 0, 14, 0, 15, 16, 17, + 18, 0, 0, -692, 0, -692, 19, 20, 21, 22, + 23, 24, 25, 0, 0, 26, 0, 0, 0, 0, + 0, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 0, 39, 40, 41, 0, 0, 42, + 0, 0, 43, 44, 0, 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, -594, -594, 0, 0, 0, 0, 0, -594, 0, - 0, -594, -594, 0, -594, -594, 0, -594, 0, -594, - -594, -594, 0, -594, -594, -594, 0, -594, -594, -594, - 0, -594, 0, 0, 0, 0, 0, 0, 0, 0, + 48, 49, 0, 0, 0, 0, 0, 50, 0, 0, + 51, 52, 0, 53, 54, 0, 55, 0, 0, 0, + 56, 0, 57, 58, 59, 0, 60, 61, 62, 0, + 63, -692, 0, 0, -692, -692, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, -594, -594, -594, 0, 0, 0, 0, 0, 0, - 0, -594, 131, 132, 133, 134, 135, 136, 137, 138, - 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, - 149, 150, 151, 152, 153, 154, 0, 0, 0, 155, - 156, 157, 233, 234, 235, 236, 162, 163, 164, 0, - 0, 0, 0, 0, 165, 166, 167, 237, 238, 239, - 240, 172, 320, 321, 241, 322, 0, 0, 0, 0, - 0, 0, 323, 0, 0, 0, 0, 0, 0, 174, - 175, 176, 177, 178, 179, 180, 181, 0, 0, 182, - 183, 0, 0, 0, 0, 184, 185, 186, 187, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 188, - 189, 190, 0, 0, 0, 0, 324, 0, 0, 0, + 64, 65, 66, 0, 0, -692, 0, 0, 0, 0, + 0, 0, -692, 294, -692, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 0, -692, -692, 0, 0, + 0, 14, 0, 15, 16, 17, 18, 0, 0, 0, + 0, 0, 19, 20, 21, 22, 23, 24, 25, 0, + 0, 26, 0, 0, 0, 0, 0, 27, 0, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 0, + 39, 40, 41, 0, 0, 42, 0, 0, 43, 44, + 0, 45, 46, 47, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 48, 49, 0, 0, + 0, 0, 0, 50, 0, 0, 51, 52, 0, 53, + 54, 0, 55, 0, 0, 0, 56, 0, 57, 58, + 59, 0, 60, 61, 62, 0, 63, -692, 0, 0, + -692, -692, 294, 0, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 0, 0, 64, 65, 66, 0, + 14, 0, 15, 16, 17, 18, 0, 0, -692, 0, + -692, 19, 20, 21, 22, 23, 24, 25, 0, 0, + 26, 0, 0, 0, 0, 0, 27, 0, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 0, 39, + 40, 41, 0, 0, 42, 0, 0, 43, 44, 0, + 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 48, 49, 0, 0, 0, + 0, 0, 50, 0, 0, 295, 52, 0, 53, 54, + 0, 55, 0, 0, 0, 56, 0, 57, 58, 59, + 0, 60, 61, 62, 0, 63, -692, 0, 0, -692, + -692, -299, 0, -299, -299, -299, -299, -299, -299, -299, + -299, -299, -299, 0, 0, 64, 65, 66, 0, -299, + 0, -299, -299, -299, -299, 0, -692, -692, 0, -692, + -299, -299, -299, -299, -299, -299, -299, 0, 0, -299, + 0, 0, 0, 0, 0, -299, 0, -299, -299, -299, + -299, -299, -299, -299, -299, -299, -299, 0, -299, -299, + -299, 0, 0, -299, 0, 0, -299, -299, 0, -299, + -299, -299, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, -299, -299, 0, 0, 0, 0, + 0, -299, 0, 0, -299, -299, 0, -299, -299, 0, + -299, 0, 0, 0, -299, 0, -299, -299, -299, 0, + -299, -299, -299, 0, -299, -299, 0, 0, -299, -299, + 294, 0, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 0, 0, -299, -299, -299, 0, 14, 0, + 15, 16, 17, 18, 0, -299, -299, 0, -299, 19, + 20, 21, 22, 23, 24, 25, 0, 0, 26, 0, + 0, 0, 0, 0, 27, 0, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 0, 39, 40, 41, + 0, 0, 42, 0, 0, 43, 44, 0, 45, 46, + 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 48, 49, 0, 0, 0, 0, 0, + 50, 0, 0, 51, 52, 0, 53, 54, 0, 55, + 0, 0, 0, 56, 0, 57, 58, 59, 0, 60, + 61, 62, 0, 63, -692, 0, 0, -692, -692, 294, + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 0, 0, 64, 65, 66, 0, 14, 0, 15, + 16, 17, 18, 0, -692, -692, 0, -692, 19, 20, + 21, 22, 23, 24, 25, 0, 0, 26, 0, 0, + 0, 0, 0, 27, 0, 29, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 0, 39, 40, 41, 0, + 0, 42, 0, 0, 43, 44, 0, 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 191, 192, 193, 194, 195, 196, 197, - 198, 199, 200, 0, 201, 202, 0, 0, 0, 0, - 0, 0, 203, 131, 132, 133, 134, 135, 136, 137, - 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, - 148, 149, 150, 151, 152, 153, 154, 0, 0, 0, - 155, 156, 157, 233, 234, 235, 236, 162, 163, 164, - 0, 0, 0, 0, 0, 165, 166, 167, 237, 238, - 239, 240, 172, 320, 321, 241, 322, 0, 0, 0, - 0, 0, 0, 323, 0, 0, 0, 0, 0, 0, - 174, 175, 176, 177, 178, 179, 180, 181, 0, 0, - 182, 183, 0, 0, 0, 0, 184, 185, 186, 187, + 0, 0, 48, 49, 0, 0, 0, 0, 0, 50, + 0, 0, 51, 52, 0, 53, 54, 0, 55, 0, + 0, 0, 56, 0, 57, 58, 59, 0, 60, 61, + 62, 0, 63, -692, 0, 0, -692, -692, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 188, 189, 190, 0, 0, 0, 0, 486, 0, 0, + 0, 0, 64, 65, 66, 0, 0, -692, 0, 0, + 0, 0, 0, 0, -692, 294, -692, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 0, 0, -692, + 0, 0, 0, 14, 0, 15, 16, 17, 18, 0, + 0, 0, 0, 0, 19, 20, 21, 22, 23, 24, + 25, 0, 0, 26, 0, 0, 0, 0, 0, 27, + 0, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 0, 39, 40, 41, 0, 0, 42, 0, 0, + 43, 44, 0, 45, 46, 47, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 48, 49, + 0, 0, 0, 0, 0, 50, 0, 0, 51, 52, + 0, 53, 54, 0, 55, 0, 0, 0, 56, 0, + 57, 58, 59, 0, 60, 61, 62, 0, 63, -692, + 0, 0, -692, -692, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 64, 65, + 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -692, 0, -692, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, -679, -679, -679, 0, -679, -679, 14, + -679, 15, 16, 17, 18, -679, 0, 0, 0, 0, + 19, 20, 21, 22, 23, 24, 25, 0, 0, 26, + 0, 0, 0, 0, 0, 27, 0, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 0, 39, 40, + 41, 0, 0, 42, 0, 0, 43, 44, 0, 45, + 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 48, 49, 0, 0, 0, 0, + 0, 50, 0, 0, 51, 52, 0, 53, 54, 0, + 55, 0, 0, 0, 56, 0, 57, 58, 59, 0, + 60, 61, 62, 0, 63, 246, 0, 0, 247, 248, + 0, 0, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 0, 0, 64, 65, 66, 0, 14, -679, + 15, 16, 17, 18, 0, -679, 249, 0, 250, 19, + 20, 21, 22, 23, 24, 25, 0, 0, 26, 0, + 0, 0, 0, 0, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 0, 39, 40, 41, + 0, 0, 42, 0, 0, 43, 44, 0, 45, 46, + 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 48, 49, 0, 0, 0, 0, 0, + 50, 0, 0, 51, 52, 0, 53, 54, 0, 55, + 0, 0, 0, 56, 0, 57, 58, 59, 0, 60, + 61, 62, 0, 63, 246, 0, 0, 247, 248, 0, + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 0, 0, 0, 64, 65, 66, 0, 14, 0, 15, + 16, 17, 18, 0, 0, 249, 0, 250, 19, 20, + 21, 22, 23, 24, 25, 0, 0, 26, 0, 0, + 0, 0, 0, 0, 0, 0, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 0, 39, 40, 41, 0, + 0, 42, 0, 0, 43, 44, 0, 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 191, 192, 193, 194, 195, 196, - 197, 198, 199, 200, 0, 201, 202, 0, 0, 0, - 0, 0, 0, 203, 131, 132, 133, 134, 135, 136, - 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, - 147, 148, 149, 150, 151, 152, 153, 154, 0, 0, - 0, 155, 156, 157, 233, 234, 235, 236, 162, 163, - 164, 0, 0, 0, 0, 0, 165, 166, 167, 237, - 238, 239, 240, 172, 0, 0, 241, 0, 0, 0, + 0, 0, 48, 49, 0, 0, 0, 0, 0, 211, + 0, 0, 119, 52, 0, 53, 54, 0, 0, 0, + 0, 0, 56, 0, 57, 58, 59, 0, 60, 61, + 62, 0, 63, 246, 0, 0, 247, 248, 0, 0, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, + 0, 0, 64, 65, 66, 0, 14, 0, 108, 109, + 17, 18, 0, 0, 249, 0, 250, 110, 111, 112, + 22, 23, 24, 25, 0, 0, 113, 0, 0, 0, + 0, 0, 0, 0, 0, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 0, 39, 40, 41, 0, 0, + 42, 0, 0, 43, 44, 0, 45, 46, 47, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 48, 49, 0, 0, 0, 0, 0, 211, 0, + 0, 119, 52, 0, 53, 54, 0, 0, 0, 0, + 0, 56, 0, 57, 58, 59, 0, 60, 61, 62, + 0, 63, 246, 0, 0, 247, 248, 0, 0, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 0, 0, + 0, 64, 264, 66, 0, 14, 0, 15, 16, 17, + 18, 0, 0, 249, 0, 250, 19, 20, 21, 22, + 23, 24, 25, 0, 0, 26, 0, 0, 0, 0, + 0, 0, 0, 0, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 0, 39, 40, 41, 0, 0, 42, + 0, 0, 43, 44, 0, 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 174, 175, 176, 177, 178, 179, 180, 181, 0, - 0, 182, 183, 0, 0, 0, 0, 184, 185, 186, - 187, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 188, 189, 190, 0, 0, 0, 242, 0, 0, + 48, 49, 0, 0, 0, 0, 0, 211, 0, 0, + 119, 52, 0, 53, 54, 0, 0, 0, 0, 0, + 56, 0, 57, 58, 59, 0, 60, 61, 62, 0, + 63, 246, 0, 0, 247, 248, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 191, 192, 193, 194, 195, - 196, 197, 198, 199, 200, 0, 201, 202, 0, 0, - 0, 0, 0, 0, 203, 131, 132, 133, 134, 135, + 64, 65, 66, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 250, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 0, - 0, 0, 155, 156, 157, 233, 234, 235, 236, 162, + 0, 0, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 0, 0, 0, 0, 0, 165, 166, 167, - 237, 238, 239, 240, 172, 0, 0, 241, 0, 0, + 168, 169, 170, 171, 172, 35, 36, 173, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 174, 175, 176, 177, 178, 179, 180, 181, + 0, 116, 174, 175, 176, 177, 178, 179, 180, 181, 0, 0, 182, 183, 0, 0, 0, 0, 184, 185, 186, 187, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 188, 189, 190, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 0, 201, 202, 0, - 0, 0, 0, 0, 0, 203, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 0, 0, 0, 0, 0, - 0, 0, 15, 0, 108, 109, 18, 19, 0, 0, - 0, 0, 0, 110, 111, 112, 23, 24, 25, 26, - 0, 0, 113, 0, 0, 0, 0, 0, 0, 0, - 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 0, 40, 41, 42, 0, 0, 43, 0, 0, 44, - 45, 0, 116, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 313, 0, 0, 119, 53, 0, - 54, 55, 0, 0, 0, 0, 0, 57, 0, 58, - 59, 60, 0, 61, 62, 63, 0, 64, 0, 0, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, - 0, 0, 0, 0, 0, 0, 15, 120, 108, 109, - 18, 19, 0, 0, 0, 314, 0, 110, 111, 112, - 23, 24, 25, 26, 0, 0, 113, 0, 0, 0, - 0, 0, 0, 0, 0, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 0, 40, 41, 42, 0, 0, - 43, 0, 0, 44, 45, 0, 116, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 313, 0, - 0, 119, 53, 0, 54, 55, 0, 0, 0, 0, - 0, 57, 0, 58, 59, 60, 0, 61, 62, 63, - 0, 64, 0, 0, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 0, 0, 0, 0, 0, 0, - 15, 120, 16, 17, 18, 19, 0, 0, 0, 611, - 0, 20, 21, 22, 23, 24, 25, 26, 0, 0, - 27, 0, 0, 0, 0, 0, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 0, 40, - 41, 42, 0, 0, 43, 0, 0, 44, 45, 0, - 46, 47, 48, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 49, 50, 0, 0, 0, - 0, 0, 51, 0, 0, 52, 53, 0, 54, 55, - 0, 56, 0, 0, 0, 57, 0, 58, 59, 60, - 0, 61, 62, 63, 0, 64, 0, 0, 0, 0, - 0, 0, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 0, 0, 0, 65, 66, 67, 15, 0, - 16, 17, 18, 19, 0, 0, 0, 0, 0, 20, - 21, 22, 23, 24, 25, 26, 0, 0, 27, 0, - 0, 0, 0, 0, 28, 0, 30, 31, 32, 33, - 34, 35, 36, 37, 38, 39, 0, 40, 41, 42, - 0, 0, 43, 0, 0, 44, 45, 0, 46, 47, - 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 49, 50, 0, 0, 0, 0, 0, - 51, 0, 0, 52, 53, 0, 54, 55, 0, 56, - 0, 0, 0, 57, 0, 58, 59, 60, 0, 61, - 62, 63, 0, 64, 0, 0, 0, 0, 0, 0, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, - 0, 0, 0, 65, 66, 67, 15, 0, 16, 17, - 18, 19, 0, 0, 0, 0, 0, 20, 21, 22, - 23, 24, 25, 26, 0, 0, 113, 0, 0, 0, - 0, 0, 0, 0, 0, 31, 32, 33, 260, 35, - 36, 37, 38, 39, 0, 40, 41, 42, 0, 0, - 43, 0, 0, 44, 45, 0, 261, 47, 48, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 49, 50, 0, 0, 0, 0, 0, 211, 0, - 0, 119, 53, 0, 54, 55, 0, 262, 0, 263, - 264, 57, 0, 58, 59, 60, 0, 61, 62, 63, - 0, 64, 0, 0, 0, 0, 0, 0, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 0, 0, 0, - 0, 65, 265, 67, 15, 0, 16, 17, 18, 19, - 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, - 25, 26, 0, 0, 113, 0, 0, 0, 0, 0, - 0, 0, 0, 31, 32, 33, 260, 35, 36, 37, - 38, 39, 0, 40, 41, 42, 0, 0, 43, 0, - 0, 44, 45, 0, 261, 47, 48, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, - 508, 0, 0, 0, 0, 0, 211, 0, 0, 119, - 53, 0, 54, 55, 0, 262, 0, 263, 264, 57, - 0, 58, 59, 60, 0, 61, 62, 63, 0, 64, - 0, 0, 0, 0, 0, 0, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 0, 0, 0, 0, 65, - 265, 67, 15, 0, 108, 109, 18, 19, 0, 0, - 0, 0, 0, 110, 111, 112, 23, 24, 25, 26, - 0, 0, 113, 0, 0, 0, 0, 0, 0, 0, - 0, 31, 32, 33, 260, 35, 36, 37, 38, 39, - 0, 40, 41, 42, 0, 0, 43, 0, 0, 44, - 45, 0, 261, 47, 48, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 49, 50, 0, - 0, 0, 0, 0, 211, 0, 0, 119, 53, 0, - 54, 55, 0, 723, 0, 263, 264, 57, 0, 58, - 59, 60, 0, 61, 62, 63, 0, 64, 0, 0, - 0, 0, 0, 0, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 0, 0, 0, 0, 65, 265, 67, - 15, 0, 108, 109, 18, 19, 0, 0, 0, 0, - 0, 110, 111, 112, 23, 24, 25, 26, 0, 0, - 113, 0, 0, 0, 0, 0, 0, 0, 0, 31, - 32, 33, 260, 35, 36, 37, 38, 39, 0, 40, - 41, 42, 0, 0, 43, 0, 0, 44, 45, 0, - 261, 47, 48, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 49, 854, 0, 0, 0, - 0, 0, 211, 0, 0, 119, 53, 0, 54, 55, - 0, 723, 0, 263, 264, 57, 0, 58, 59, 60, - 0, 61, 62, 63, 0, 64, 0, 0, 0, 0, - 0, 0, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 0, 0, 0, 0, 65, 265, 67, 15, 0, - 108, 109, 18, 19, 0, 0, 0, 0, 0, 110, - 111, 112, 23, 24, 25, 26, 0, 0, 113, 0, - 0, 0, 0, 0, 0, 0, 0, 31, 32, 33, - 260, 35, 36, 37, 38, 39, 0, 40, 41, 42, - 0, 0, 43, 0, 0, 44, 45, 0, 261, 47, - 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 49, 50, 0, 0, 0, 0, 0, - 211, 0, 0, 119, 53, 0, 54, 55, 0, 262, - 0, 263, 0, 57, 0, 58, 59, 60, 0, 61, - 62, 63, 0, 64, 0, 0, 0, 0, 0, 0, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, - 0, 0, 0, 65, 265, 67, 15, 0, 108, 109, - 18, 19, 0, 0, 0, 0, 0, 110, 111, 112, - 23, 24, 25, 26, 0, 0, 113, 0, 0, 0, - 0, 0, 0, 0, 0, 31, 32, 33, 260, 35, - 36, 37, 38, 39, 0, 40, 41, 42, 0, 0, - 43, 0, 0, 44, 45, 0, 261, 47, 48, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 49, 50, 0, 0, 0, 0, 0, 211, 0, - 0, 119, 53, 0, 54, 55, 0, 0, 0, 263, - 264, 57, 0, 58, 59, 60, 0, 61, 62, 63, - 0, 64, 0, 0, 0, 0, 0, 0, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 0, 0, 0, - 0, 65, 265, 67, 15, 0, 108, 109, 18, 19, - 0, 0, 0, 0, 0, 110, 111, 112, 23, 24, - 25, 26, 0, 0, 113, 0, 0, 0, 0, 0, - 0, 0, 0, 31, 32, 33, 260, 35, 36, 37, - 38, 39, 0, 40, 41, 42, 0, 0, 43, 0, - 0, 44, 45, 0, 261, 47, 48, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, - 50, 0, 0, 0, 0, 0, 211, 0, 0, 119, - 53, 0, 54, 55, 0, 723, 0, 263, 0, 57, - 0, 58, 59, 60, 0, 61, 62, 63, 0, 64, - 0, 0, 0, 0, 0, 0, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 0, 0, 0, 0, 65, - 265, 67, 15, 0, 108, 109, 18, 19, 0, 0, - 0, 0, 0, 110, 111, 112, 23, 24, 25, 26, - 0, 0, 113, 0, 0, 0, 0, 0, 0, 0, - 0, 31, 32, 33, 260, 35, 36, 37, 38, 39, - 0, 40, 41, 42, 0, 0, 43, 0, 0, 44, - 45, 0, 261, 47, 48, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 49, 50, 0, - 0, 0, 0, 0, 211, 0, 0, 119, 53, 0, - 54, 55, 0, 0, 0, 263, 0, 57, 0, 58, - 59, 60, 0, 61, 62, 63, 0, 64, 0, 0, - 0, 0, 0, 0, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 0, 0, 0, 0, 65, 265, 67, - 15, 0, 16, 17, 18, 19, 0, 0, 0, 0, - 0, 20, 21, 22, 23, 24, 25, 26, 0, 0, - 113, 0, 0, 0, 0, 0, 0, 0, 0, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 0, 40, - 41, 42, 0, 0, 43, 0, 0, 44, 45, 0, - 46, 47, 48, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 49, 50, 0, 0, 0, - 0, 0, 211, 0, 0, 119, 53, 0, 54, 55, - 0, 605, 0, 0, 0, 57, 0, 58, 59, 60, - 0, 61, 62, 63, 0, 64, 0, 0, 0, 0, - 0, 0, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 0, 0, 0, 0, 65, 265, 67, 15, 0, - 108, 109, 18, 19, 0, 0, 0, 0, 0, 110, - 111, 112, 23, 24, 25, 26, 0, 0, 113, 0, - 0, 0, 0, 0, 0, 0, 0, 31, 32, 33, - 34, 35, 36, 37, 38, 39, 0, 40, 41, 42, - 0, 0, 43, 0, 0, 44, 45, 0, 46, 47, - 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 49, 50, 0, 0, 0, 0, 0, - 211, 0, 0, 119, 53, 0, 54, 55, 0, 262, - 0, 0, 0, 57, 0, 58, 59, 60, 0, 61, - 62, 63, 0, 64, 0, 0, 0, 0, 0, 0, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, - 0, 0, 0, 65, 265, 67, 15, 0, 108, 109, - 18, 19, 0, 0, 0, 0, 0, 110, 111, 112, - 23, 24, 25, 26, 0, 0, 113, 0, 0, 0, - 0, 0, 0, 0, 0, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 0, 40, 41, 42, 0, 0, - 43, 0, 0, 44, 45, 0, 46, 47, 48, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 49, 50, 0, 0, 0, 0, 0, 211, 0, - 0, 119, 53, 0, 54, 55, 0, 605, 0, 0, - 0, 57, 0, 58, 59, 60, 0, 61, 62, 63, - 0, 64, 0, 0, 0, 0, 0, 0, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 0, 0, 0, - 0, 65, 265, 67, 15, 0, 108, 109, 18, 19, - 0, 0, 0, 0, 0, 110, 111, 112, 23, 24, - 25, 26, 0, 0, 113, 0, 0, 0, 0, 0, - 0, 0, 0, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 0, 40, 41, 42, 0, 0, 43, 0, - 0, 44, 45, 0, 46, 47, 48, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, - 50, 0, 0, 0, 0, 0, 211, 0, 0, 119, - 53, 0, 54, 55, 0, 900, 0, 0, 0, 57, - 0, 58, 59, 60, 0, 61, 62, 63, 0, 64, - 0, 0, 0, 0, 0, 0, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 0, 0, 0, 0, 65, - 265, 67, 15, 0, 108, 109, 18, 19, 0, 0, - 0, 0, 0, 110, 111, 112, 23, 24, 25, 26, - 0, 0, 113, 0, 0, 0, 0, 0, 0, 0, - 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 0, 40, 41, 42, 0, 0, 43, 0, 0, 44, - 45, 0, 46, 47, 48, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 49, 50, 0, - 0, 0, 0, 0, 211, 0, 0, 119, 53, 0, - 54, 55, 0, 723, 0, 0, 0, 57, 0, 58, - 59, 60, 0, 61, 62, 63, 0, 64, 0, 0, - 0, 0, 0, 0, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 0, 0, 0, 0, 65, 265, 67, - 15, 0, 16, 17, 18, 19, 0, 0, 0, 0, - 0, 20, 21, 22, 23, 24, 25, 26, 0, 0, - 27, 0, 0, 0, 0, 0, 0, 0, 0, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 0, 40, - 41, 42, 0, 0, 43, 0, 0, 44, 45, 0, - 46, 47, 48, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 49, 50, 0, 0, 0, - 0, 0, 211, 0, 0, 119, 53, 0, 54, 55, - 0, 0, 0, 0, 0, 57, 0, 58, 59, 60, - 0, 61, 62, 63, 0, 64, 0, 0, 0, 0, - 0, 0, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 0, 0, 0, 0, 65, 66, 67, 15, 0, - 108, 109, 18, 19, 0, 0, 0, 0, 0, 110, - 111, 112, 23, 24, 25, 26, 0, 0, 113, 0, - 0, 0, 0, 0, 0, 0, 0, 31, 32, 33, - 34, 35, 36, 37, 38, 39, 0, 40, 41, 42, - 0, 0, 43, 0, 0, 44, 45, 0, 46, 47, - 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 49, 50, 0, 0, 0, 0, 0, - 211, 0, 0, 119, 53, 0, 54, 55, 0, 0, - 0, 0, 0, 57, 0, 58, 59, 60, 0, 61, - 62, 63, 0, 64, 0, 0, 0, 0, 0, 0, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, - 0, 0, 0, 65, 265, 67, 15, 0, 16, 17, - 18, 19, 0, 0, 0, 0, 0, 20, 21, 22, - 23, 24, 25, 26, 0, 0, 113, 0, 0, 0, - 0, 0, 0, 0, 0, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 0, 40, 41, 42, 0, 0, - 43, 0, 0, 44, 45, 0, 46, 47, 48, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 49, 50, 0, 0, 0, 0, 0, 211, 0, - 0, 119, 53, 0, 54, 55, 0, 0, 0, 0, - 0, 57, 0, 58, 59, 60, 0, 61, 62, 63, - 0, 64, 0, 0, 0, 0, 0, 0, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 0, 0, 0, - 0, 65, 265, 67, 15, 0, 108, 109, 18, 19, - 0, 0, 0, 0, 0, 110, 111, 112, 23, 24, - 25, 26, 0, 0, 113, 0, 0, 0, 0, 0, - 0, 0, 0, 31, 32, 33, 114, 35, 36, 37, - 115, 39, 0, 40, 41, 42, 0, 0, 43, 0, - 0, 44, 45, 0, 116, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 117, 0, 0, 118, 0, 0, 119, - 53, 0, 54, 55, 0, 0, 0, 0, 0, 57, - 0, 58, 59, 60, 0, 61, 62, 63, 0, 64, - 0, 0, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 0, 0, 0, 0, 0, 0, 0, 15, 120, - 108, 109, 18, 19, 0, 0, 0, 0, 0, 110, - 111, 112, 23, 24, 25, 26, 0, 0, 113, 0, - 0, 0, 0, 0, 0, 0, 0, 31, 32, 33, - 34, 35, 36, 37, 38, 39, 0, 40, 41, 42, - 0, 0, 43, 0, 0, 44, 45, 0, 225, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 226, 0, 0, 52, 53, 0, 54, 55, 0, 56, - 0, 0, 0, 57, 0, 58, 59, 60, 0, 61, - 62, 63, 0, 64, 0, 0, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 0, 0, 0, 0, 0, - 0, 0, 15, 120, 108, 109, 18, 19, 0, 0, - 0, 0, 0, 110, 111, 112, 23, 24, 25, 26, - 0, 0, 113, 0, 0, 0, 0, 0, 0, 0, - 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 0, 40, 41, 42, 0, 0, 43, 0, 0, 44, - 45, 0, 116, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 313, 0, 0, 399, 53, 0, - 54, 55, 0, 400, 0, 0, 0, 57, 0, 58, - 59, 60, 0, 61, 62, 63, 0, 64, 0, 0, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, - 0, 0, 0, 0, 0, 0, 15, 120, 108, 109, - 18, 19, 0, 0, 0, 0, 0, 110, 111, 112, - 23, 24, 25, 26, 0, 0, 113, 0, 0, 0, - 0, 0, 0, 0, 0, 31, 32, 33, 114, 35, - 36, 37, 115, 39, 0, 40, 41, 42, 0, 0, - 43, 0, 0, 44, 45, 0, 116, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, - 0, 119, 53, 0, 54, 55, 0, 0, 0, 0, - 0, 57, 0, 58, 59, 60, 0, 61, 62, 63, - 0, 64, 0, 0, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 0, 0, 0, 0, 0, 0, 0, - 15, 120, 108, 109, 18, 19, 0, 0, 0, 0, - 0, 110, 111, 112, 23, 24, 25, 26, 0, 0, - 113, 0, 0, 0, 0, 0, 0, 0, 0, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 0, 40, - 41, 42, 0, 0, 43, 0, 0, 44, 45, 0, - 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 203, 204, -662, -662, -662, + -662, -662, -662, -662, -662, -662, 0, 0, 0, 0, + 0, 0, 0, -662, 0, -662, -662, -662, -662, 0, + -662, 0, 0, 0, -662, -662, -662, -662, -662, -662, + -662, 0, 0, -662, 0, 0, 0, 0, 0, 0, + 0, 0, -662, -662, -662, -662, -662, -662, -662, -662, + -662, 0, -662, -662, -662, 0, 0, -662, 0, 0, + -662, -662, 0, -662, -662, -662, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, -662, -662, + 0, 0, 0, 0, 0, -662, 0, 0, -662, -662, + 0, -662, -662, 0, -662, 0, -662, -662, -662, 0, + -662, -662, -662, 0, -662, -662, -662, 0, -662, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 313, 0, 0, 399, 53, 0, 54, 55, - 0, 0, 0, 0, 0, 57, 0, 58, 59, 60, - 0, 61, 62, 63, 0, 64, 0, 0, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 0, 0, 0, - 0, 0, 0, 0, 15, 120, 108, 109, 18, 19, - 0, 0, 0, 0, 0, 110, 111, 112, 23, 24, - 25, 26, 0, 0, 113, 0, 0, 0, 0, 0, - 0, 0, 0, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 0, 40, 41, 42, 0, 0, 43, 0, - 0, 44, 45, 0, 116, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 967, 0, 0, 119, - 53, 0, 54, 55, 0, 0, 0, 0, 0, 57, - 0, 58, 59, 60, 0, 61, 62, 63, 0, 64, - 0, 0, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 0, 0, 0, 0, 0, 0, 0, 15, 120, - 108, 109, 18, 19, 0, 0, 0, 0, 0, 110, - 111, 112, 23, 24, 25, 26, 0, 0, 113, 0, - 0, 0, 0, 0, 0, 0, 0, 31, 32, 33, - 34, 35, 36, 37, 38, 39, 0, 40, 41, 42, - 0, 0, 43, 0, 0, 44, 45, 0, 225, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 990, 0, 0, 119, 53, 0, 54, 55, 0, 0, - 659, 660, 0, 57, 661, 58, 59, 60, 0, 61, - 62, 63, 0, 64, 0, 0, 0, 0, 0, 174, - 175, 176, 177, 178, 179, 180, 181, 0, 0, 182, - 183, 0, 0, 120, 0, 184, 185, 186, 187, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 188, - 189, 190, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 191, 192, 193, 194, 195, 196, 197, - 198, 199, 200, 0, 201, 202, 680, 651, 0, 0, - 681, 0, 203, 276, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 174, 175, 176, 177, 178, - 179, 180, 181, 0, 0, 182, 183, 0, 0, 0, - 0, 184, 185, 186, 187, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 188, 189, 190, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 191, - 192, 193, 194, 195, 196, 197, 198, 199, 200, 0, - 201, 202, 665, 660, 0, 0, 666, 0, 203, 276, + 0, 0, 0, 0, 0, 0, 0, 0, -662, -662, + -662, 0, -662, 0, 0, 0, 0, 0, -662, -663, + -663, -663, -663, -663, -663, -663, -663, -663, 0, 0, + 0, 0, 0, 0, 0, -663, 0, -663, -663, -663, + -663, 0, -663, 0, 0, 0, -663, -663, -663, -663, + -663, -663, -663, 0, 0, -663, 0, 0, 0, 0, + 0, 0, 0, 0, -663, -663, -663, -663, -663, -663, + -663, -663, -663, 0, -663, -663, -663, 0, 0, -663, + 0, 0, -663, -663, 0, -663, -663, -663, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 174, 175, 176, 177, 178, 179, 180, 181, 0, - 0, 182, 183, 0, 0, 0, 0, 184, 185, 186, - 187, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 188, 189, 190, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 191, 192, 193, 194, 195, - 196, 197, 198, 199, 200, 0, 201, 202, 697, 651, - 0, 0, 698, 0, 203, 276, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 174, 175, 176, - 177, 178, 179, 180, 181, 0, 0, 182, 183, 0, - 0, 0, 0, 184, 185, 186, 187, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 188, 189, 190, + -663, -663, 0, 0, 0, 0, 0, -663, 0, 0, + -663, -663, 0, -663, -663, 0, -663, 0, -663, -663, + -663, 0, -663, -663, -663, 0, -663, -663, -663, 0, + -663, 0, 0, 0, 0, 0, 0, -665, -665, -665, + -665, -665, -665, -665, -665, -665, 0, 0, 0, 0, + -663, -663, -663, -665, -663, -665, -665, -665, -665, 0, + -663, 0, 0, 0, -665, -665, -665, -665, -665, -665, + -665, 0, 0, -665, 0, 0, 0, 0, 0, 0, + 0, 0, -665, -665, -665, -665, -665, -665, -665, -665, + -665, 0, -665, -665, -665, 0, 0, -665, 0, 0, + -665, -665, 0, -665, -665, -665, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, -665, -665, + 0, 0, 0, 0, 0, -665, 869, 0, -665, -665, + 0, -665, -665, 0, -665, 0, -665, -665, -665, 0, + -665, -665, -665, 0, -665, -665, -665, 0, -665, 0, + 0, 0, 0, 0, 0, -110, -666, -666, -666, -666, + -666, -666, -666, -666, -666, 0, 0, 0, -665, -665, + -665, 0, -666, 0, -666, -666, -666, -666, -665, 0, + 0, 0, 0, -666, -666, -666, -666, -666, -666, -666, + 0, 0, -666, 0, 0, 0, 0, 0, 0, 0, + 0, -666, -666, -666, -666, -666, -666, -666, -666, -666, + 0, -666, -666, -666, 0, 0, -666, 0, 0, -666, + -666, 0, -666, -666, -666, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, -666, -666, 0, + 0, 0, 0, 0, -666, 870, 0, -666, -666, 0, + -666, -666, 0, -666, 0, -666, -666, -666, 0, -666, + -666, -666, 0, -666, -666, -666, 0, -666, 0, 0, + 0, 0, 0, 0, -112, -667, -667, -667, -667, -667, + -667, -667, -667, -667, 0, 0, 0, -666, -666, -666, + 0, -667, 0, -667, -667, -667, -667, -666, 0, 0, + 0, 0, -667, -667, -667, -667, -667, -667, -667, 0, + 0, -667, 0, 0, 0, 0, 0, 0, 0, 0, + -667, -667, -667, -667, -667, -667, -667, -667, -667, 0, + -667, -667, -667, 0, 0, -667, 0, 0, -667, -667, + 0, -667, -667, -667, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, -667, -667, 0, 0, + 0, 0, 0, -667, 0, 0, -667, -667, 0, -667, + -667, 0, -667, 0, -667, -667, -667, 0, -667, -667, + -667, 0, -667, -667, -667, 0, -667, 0, 0, 0, + 0, 0, 0, -668, -668, -668, -668, -668, -668, -668, + -668, -668, 0, 0, 0, 0, -667, -667, -667, -668, + 0, -668, -668, -668, -668, 0, -667, 0, 0, 0, + -668, -668, -668, -668, -668, -668, -668, 0, 0, -668, + 0, 0, 0, 0, 0, 0, 0, 0, -668, -668, + -668, -668, -668, -668, -668, -668, -668, 0, -668, -668, + -668, 0, 0, -668, 0, 0, -668, -668, 0, -668, + -668, -668, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, -668, -668, 0, 0, 0, 0, + 0, -668, 0, 0, -668, -668, 0, -668, -668, 0, + -668, 0, -668, -668, -668, 0, -668, -668, -668, 0, + -668, -668, -668, 0, -668, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, -668, -668, -668, 0, 0, 0, + 0, 0, 0, 0, -668, 131, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, + 146, 147, 148, 149, 150, 151, 152, 153, 154, 0, + 0, 0, 155, 156, 157, 232, 233, 234, 235, 162, + 163, 164, 0, 0, 0, 0, 0, 165, 166, 167, + 236, 237, 238, 239, 172, 319, 320, 240, 321, 0, + 0, 0, 0, 0, 0, 322, 0, 0, 0, 0, + 0, 323, 174, 175, 176, 177, 178, 179, 180, 181, + 0, 0, 182, 183, 0, 0, 0, 0, 184, 185, + 186, 187, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 188, 189, 190, 0, 0, 0, 0, 324, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 191, 192, 193, 194, 195, 196, 197, 198, 199, - 200, 0, 201, 202, 700, 660, 0, 0, 701, 0, - 203, 276, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 191, 192, 193, 194, + 195, 196, 197, 198, 199, 200, 0, 201, 202, 0, + 0, 0, 0, 0, 0, 203, 131, 132, 133, 134, + 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, + 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, + 0, 0, 0, 155, 156, 157, 232, 233, 234, 235, + 162, 163, 164, 0, 0, 0, 0, 0, 165, 166, + 167, 236, 237, 238, 239, 172, 319, 320, 240, 321, + 0, 0, 0, 0, 0, 0, 322, 0, 0, 0, 0, 0, 0, 174, 175, 176, 177, 178, 179, 180, 181, 0, 0, 182, 183, 0, 0, 0, 0, 184, 185, 186, 187, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 188, 189, 190, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 487, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 0, 201, 202, - 707, 651, 0, 0, 708, 0, 203, 276, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 174, - 175, 176, 177, 178, 179, 180, 181, 0, 0, 182, - 183, 0, 0, 0, 0, 184, 185, 186, 187, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 188, - 189, 190, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 203, 131, 132, 133, + 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, 0, 0, 0, 155, 156, 157, 232, 233, 234, + 235, 162, 163, 164, 0, 0, 0, 0, 0, 165, + 166, 167, 236, 237, 238, 239, 172, 0, 0, 240, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 191, 192, 193, 194, 195, 196, 197, - 198, 199, 200, 0, 201, 202, 710, 660, 0, 0, - 711, 0, 203, 276, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 174, 175, 176, 177, 178, 179, + 180, 181, 0, 0, 182, 183, 0, 0, 0, 0, + 184, 185, 186, 187, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 188, 189, 190, 0, 0, 0, + 241, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 191, 192, + 193, 194, 195, 196, 197, 198, 199, 200, 0, 201, + 202, 0, 0, 0, 0, 0, 0, 203, 131, 132, + 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, + 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, + 153, 154, 0, 0, 0, 155, 156, 157, 232, 233, + 234, 235, 162, 163, 164, 0, 0, 0, 0, 0, + 165, 166, 167, 236, 237, 238, 239, 172, 0, 0, + 240, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 174, 175, 176, 177, 178, 179, 180, 181, 0, 0, 182, 183, 0, 0, 0, 0, 184, 185, 186, 187, 0, 0, 0, 0, 0, @@ -3692,608 +4363,1110 @@ static const yytype_int16 yytable[] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 0, - 201, 202, 746, 651, 0, 0, 747, 0, 203, 276, + 201, 202, 0, 0, 0, 0, 0, 0, 203, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 0, 0, + 0, 0, 0, 0, 0, 14, 0, 108, 109, 17, + 18, 0, 0, 0, 0, 0, 110, 111, 112, 22, + 23, 24, 25, 0, 0, 113, 0, 0, 0, 0, + 0, 0, 0, 0, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 0, 39, 40, 41, 0, 0, 42, + 0, 0, 43, 44, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 174, 175, 176, 177, 178, 179, 180, 181, 0, - 0, 182, 183, 0, 0, 0, 0, 184, 185, 186, - 187, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 188, 189, 190, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 312, 0, 0, + 119, 52, 0, 53, 54, 0, 0, 0, 0, 0, + 56, 0, 57, 58, 59, 0, 60, 61, 62, 0, + 63, 0, 0, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 0, 0, 0, 0, 0, 0, 0, 14, + 120, 108, 109, 17, 18, 0, 0, 0, 313, 0, + 110, 111, 112, 22, 23, 24, 25, 0, 0, 113, + 0, 0, 0, 0, 0, 0, 0, 0, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 0, 39, 40, + 41, 0, 0, 42, 0, 0, 43, 44, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 191, 192, 193, 194, 195, - 196, 197, 198, 199, 200, 0, 201, 202, 749, 660, - 0, 0, 750, 0, 203, 276, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 174, 175, 176, - 177, 178, 179, 180, 181, 0, 0, 182, 183, 0, - 0, 0, 0, 184, 185, 186, 187, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 188, 189, 190, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 312, 0, 0, 119, 52, 0, 53, 54, 0, + 0, 0, 0, 0, 56, 0, 57, 58, 59, 0, + 60, 61, 62, 0, 63, 0, 0, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 0, 0, 0, + 0, 0, 0, 14, 120, 15, 16, 17, 18, 0, + 0, 0, 613, 0, 19, 20, 21, 22, 23, 24, + 25, 0, 0, 26, 0, 0, 0, 0, 0, 27, + 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 0, 39, 40, 41, 0, 0, 42, 0, 0, + 43, 44, 0, 45, 46, 47, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 48, 49, + 0, 0, 0, 0, 0, 50, 0, 0, 51, 52, + 0, 53, 54, 0, 55, 0, 0, 0, 56, 0, + 57, 58, 59, 0, 60, 61, 62, 0, 63, 0, + 0, 0, 0, 0, 0, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 0, 0, 0, 64, 65, + 66, 14, 0, 15, 16, 17, 18, 0, 0, 0, + 0, 0, 19, 20, 21, 22, 23, 24, 25, 0, + 0, 26, 0, 0, 0, 0, 0, 27, 0, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 0, + 39, 40, 41, 0, 0, 42, 0, 0, 43, 44, + 0, 45, 46, 47, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 48, 49, 0, 0, + 0, 0, 0, 50, 0, 0, 51, 52, 0, 53, + 54, 0, 55, 0, 0, 0, 56, 0, 57, 58, + 59, 0, 60, 61, 62, 0, 63, 0, 0, 0, + 0, 0, 0, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 0, 0, 0, 0, 64, 65, 66, 14, + 0, 15, 16, 17, 18, 0, 0, 0, 0, 0, + 19, 20, 21, 22, 23, 24, 25, 0, 0, 113, + 0, 0, 0, 0, 0, 0, 0, 0, 30, 31, + 32, 259, 34, 35, 36, 37, 38, 0, 39, 40, + 41, 0, 0, 42, 0, 0, 43, 44, 0, 260, + 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 48, 49, 0, 0, 0, 0, + 0, 211, 0, 0, 119, 52, 0, 53, 54, 0, + 261, 0, 262, 263, 56, 0, 57, 58, 59, 0, + 60, 61, 62, 0, 63, 0, 0, 0, 0, 0, + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 0, 0, 0, 0, 64, 264, 66, 14, 0, 15, + 16, 17, 18, 0, 0, 0, 0, 0, 19, 20, + 21, 22, 23, 24, 25, 0, 0, 113, 0, 0, + 0, 0, 0, 0, 0, 0, 30, 31, 32, 259, + 34, 35, 36, 37, 38, 0, 39, 40, 41, 0, + 0, 42, 0, 0, 43, 44, 0, 260, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 191, 192, 193, 194, 195, 196, 197, 198, 199, - 200, 0, 201, 202, 905, 651, 0, 0, 906, 0, - 203, 276, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 174, 175, 176, 177, 178, 179, 180, - 181, 0, 0, 182, 183, 0, 0, 0, 0, 184, - 185, 186, 187, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 188, 189, 190, 0, 0, 0, 0, + 0, 0, 48, 509, 0, 0, 0, 0, 0, 211, + 0, 0, 119, 52, 0, 53, 54, 0, 261, 0, + 262, 263, 56, 0, 57, 58, 59, 0, 60, 61, + 62, 0, 63, 0, 0, 0, 0, 0, 0, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 0, 0, + 0, 0, 64, 264, 66, 14, 0, 108, 109, 17, + 18, 0, 0, 0, 0, 0, 110, 111, 112, 22, + 23, 24, 25, 0, 0, 113, 0, 0, 0, 0, + 0, 0, 0, 0, 30, 31, 32, 259, 34, 35, + 36, 37, 38, 0, 39, 40, 41, 0, 0, 42, + 0, 0, 43, 44, 0, 260, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 191, 192, 193, - 194, 195, 196, 197, 198, 199, 200, 0, 201, 202, - 908, 660, 0, 0, 909, 0, 203, 276, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 174, - 175, 176, 177, 178, 179, 180, 181, 0, 0, 182, - 183, 0, 0, 0, 0, 184, 185, 186, 187, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 188, - 189, 190, 0, 0, 0, 0, 0, 0, 0, 0, + 48, 49, 0, 0, 0, 0, 0, 211, 0, 0, + 119, 52, 0, 53, 54, 0, 727, 0, 262, 263, + 56, 0, 57, 58, 59, 0, 60, 61, 62, 0, + 63, 0, 0, 0, 0, 0, 0, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 0, 0, 0, 0, + 64, 264, 66, 14, 0, 108, 109, 17, 18, 0, + 0, 0, 0, 0, 110, 111, 112, 22, 23, 24, + 25, 0, 0, 113, 0, 0, 0, 0, 0, 0, + 0, 0, 30, 31, 32, 259, 34, 35, 36, 37, + 38, 0, 39, 40, 41, 0, 0, 42, 0, 0, + 43, 44, 0, 260, 46, 47, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 48, 887, + 0, 0, 0, 0, 0, 211, 0, 0, 119, 52, + 0, 53, 54, 0, 727, 0, 262, 263, 56, 0, + 57, 58, 59, 0, 60, 61, 62, 0, 63, 0, + 0, 0, 0, 0, 0, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 0, 0, 0, 0, 64, 264, + 66, 14, 0, 108, 109, 17, 18, 0, 0, 0, + 0, 0, 110, 111, 112, 22, 23, 24, 25, 0, + 0, 113, 0, 0, 0, 0, 0, 0, 0, 0, + 30, 31, 32, 259, 34, 35, 36, 37, 38, 0, + 39, 40, 41, 0, 0, 42, 0, 0, 43, 44, + 0, 260, 46, 47, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 48, 49, 0, 0, + 0, 0, 0, 211, 0, 0, 119, 52, 0, 53, + 54, 0, 261, 0, 262, 0, 56, 0, 57, 58, + 59, 0, 60, 61, 62, 0, 63, 0, 0, 0, + 0, 0, 0, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 0, 0, 0, 0, 64, 264, 66, 14, + 0, 108, 109, 17, 18, 0, 0, 0, 0, 0, + 110, 111, 112, 22, 23, 24, 25, 0, 0, 113, + 0, 0, 0, 0, 0, 0, 0, 0, 30, 31, + 32, 259, 34, 35, 36, 37, 38, 0, 39, 40, + 41, 0, 0, 42, 0, 0, 43, 44, 0, 260, + 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 48, 49, 0, 0, 0, 0, + 0, 211, 0, 0, 119, 52, 0, 53, 54, 0, + 0, 0, 262, 263, 56, 0, 57, 58, 59, 0, + 60, 61, 62, 0, 63, 0, 0, 0, 0, 0, + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 0, 0, 0, 0, 64, 264, 66, 14, 0, 108, + 109, 17, 18, 0, 0, 0, 0, 0, 110, 111, + 112, 22, 23, 24, 25, 0, 0, 113, 0, 0, + 0, 0, 0, 0, 0, 0, 30, 31, 32, 259, + 34, 35, 36, 37, 38, 0, 39, 40, 41, 0, + 0, 42, 0, 0, 43, 44, 0, 260, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 191, 192, 193, 194, 195, 196, 197, - 198, 199, 200, 0, 201, 202, 1049, 651, 0, 0, - 1050, 0, 203, 276, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 174, 175, 176, 177, 178, - 179, 180, 181, 0, 0, 182, 183, 0, 0, 0, - 0, 184, 185, 186, 187, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 188, 189, 190, 0, 0, + 0, 0, 48, 49, 0, 0, 0, 0, 0, 211, + 0, 0, 119, 52, 0, 53, 54, 0, 727, 0, + 262, 0, 56, 0, 57, 58, 59, 0, 60, 61, + 62, 0, 63, 0, 0, 0, 0, 0, 0, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 0, 0, + 0, 0, 64, 264, 66, 14, 0, 108, 109, 17, + 18, 0, 0, 0, 0, 0, 110, 111, 112, 22, + 23, 24, 25, 0, 0, 113, 0, 0, 0, 0, + 0, 0, 0, 0, 30, 31, 32, 259, 34, 35, + 36, 37, 38, 0, 39, 40, 41, 0, 0, 42, + 0, 0, 43, 44, 0, 260, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 191, - 192, 193, 194, 195, 196, 197, 198, 199, 200, 0, - 201, 202, 1061, 651, 0, 0, 1062, 0, 203, 276, + 48, 49, 0, 0, 0, 0, 0, 211, 0, 0, + 119, 52, 0, 53, 54, 0, 0, 0, 262, 0, + 56, 0, 57, 58, 59, 0, 60, 61, 62, 0, + 63, 0, 0, 0, 0, 0, 0, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 0, 0, 0, 0, + 64, 264, 66, 14, 0, 15, 16, 17, 18, 0, + 0, 0, 0, 0, 19, 20, 21, 22, 23, 24, + 25, 0, 0, 113, 0, 0, 0, 0, 0, 0, + 0, 0, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 0, 39, 40, 41, 0, 0, 42, 0, 0, + 43, 44, 0, 45, 46, 47, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 48, 49, + 0, 0, 0, 0, 0, 211, 0, 0, 119, 52, + 0, 53, 54, 0, 607, 0, 0, 0, 56, 0, + 57, 58, 59, 0, 60, 61, 62, 0, 63, 0, + 0, 0, 0, 0, 0, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 0, 0, 0, 0, 64, 264, + 66, 14, 0, 108, 109, 17, 18, 0, 0, 0, + 0, 0, 110, 111, 112, 22, 23, 24, 25, 0, + 0, 113, 0, 0, 0, 0, 0, 0, 0, 0, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 0, + 39, 40, 41, 0, 0, 42, 0, 0, 43, 44, + 0, 45, 46, 47, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 48, 49, 0, 0, + 0, 0, 0, 211, 0, 0, 119, 52, 0, 53, + 54, 0, 261, 0, 0, 0, 56, 0, 57, 58, + 59, 0, 60, 61, 62, 0, 63, 0, 0, 0, + 0, 0, 0, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 0, 0, 0, 0, 64, 264, 66, 14, + 0, 108, 109, 17, 18, 0, 0, 0, 0, 0, + 110, 111, 112, 22, 23, 24, 25, 0, 0, 113, + 0, 0, 0, 0, 0, 0, 0, 0, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 0, 39, 40, + 41, 0, 0, 42, 0, 0, 43, 44, 0, 45, + 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 48, 49, 0, 0, 0, 0, + 0, 211, 0, 0, 119, 52, 0, 53, 54, 0, + 607, 0, 0, 0, 56, 0, 57, 58, 59, 0, + 60, 61, 62, 0, 63, 0, 0, 0, 0, 0, + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 0, 0, 0, 0, 64, 264, 66, 14, 0, 108, + 109, 17, 18, 0, 0, 0, 0, 0, 110, 111, + 112, 22, 23, 24, 25, 0, 0, 113, 0, 0, + 0, 0, 0, 0, 0, 0, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 0, 39, 40, 41, 0, + 0, 42, 0, 0, 43, 44, 0, 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 174, 175, 176, 177, 178, 179, 180, 181, 0, - 0, 182, 183, 0, 0, 0, 0, 184, 185, 186, - 187, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 188, 189, 190, 0, 0, 0, 0, 0, 0, + 0, 0, 48, 49, 0, 0, 0, 0, 0, 211, + 0, 0, 119, 52, 0, 53, 54, 0, 933, 0, + 0, 0, 56, 0, 57, 58, 59, 0, 60, 61, + 62, 0, 63, 0, 0, 0, 0, 0, 0, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 0, 0, + 0, 0, 64, 264, 66, 14, 0, 108, 109, 17, + 18, 0, 0, 0, 0, 0, 110, 111, 112, 22, + 23, 24, 25, 0, 0, 113, 0, 0, 0, 0, + 0, 0, 0, 0, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 0, 39, 40, 41, 0, 0, 42, + 0, 0, 43, 44, 0, 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 191, 192, 193, 194, 195, - 196, 197, 198, 199, 200, 0, 201, 202, 1064, 660, - 0, 0, 1065, 0, 203, 276, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 174, 175, 176, - 177, 178, 179, 180, 181, 0, 0, 182, 183, 0, - 0, 0, 0, 184, 185, 186, 187, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 188, 189, 190, + 48, 49, 0, 0, 0, 0, 0, 211, 0, 0, + 119, 52, 0, 53, 54, 0, 727, 0, 0, 0, + 56, 0, 57, 58, 59, 0, 60, 61, 62, 0, + 63, 0, 0, 0, 0, 0, 0, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 0, 0, 0, 0, + 64, 264, 66, 14, 0, 15, 16, 17, 18, 0, + 0, 0, 0, 0, 19, 20, 21, 22, 23, 24, + 25, 0, 0, 26, 0, 0, 0, 0, 0, 0, + 0, 0, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 0, 39, 40, 41, 0, 0, 42, 0, 0, + 43, 44, 0, 45, 46, 47, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 48, 49, + 0, 0, 0, 0, 0, 211, 0, 0, 119, 52, + 0, 53, 54, 0, 0, 0, 0, 0, 56, 0, + 57, 58, 59, 0, 60, 61, 62, 0, 63, 0, + 0, 0, 0, 0, 0, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 0, 0, 0, 0, 64, 65, + 66, 14, 0, 108, 109, 17, 18, 0, 0, 0, + 0, 0, 110, 111, 112, 22, 23, 24, 25, 0, + 0, 113, 0, 0, 0, 0, 0, 0, 0, 0, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 0, + 39, 40, 41, 0, 0, 42, 0, 0, 43, 44, + 0, 45, 46, 47, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 48, 49, 0, 0, + 0, 0, 0, 211, 0, 0, 119, 52, 0, 53, + 54, 0, 0, 0, 0, 0, 56, 0, 57, 58, + 59, 0, 60, 61, 62, 0, 63, 0, 0, 0, + 0, 0, 0, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 0, 0, 0, 0, 64, 264, 66, 14, + 0, 15, 16, 17, 18, 0, 0, 0, 0, 0, + 19, 20, 21, 22, 23, 24, 25, 0, 0, 113, + 0, 0, 0, 0, 0, 0, 0, 0, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 0, 39, 40, + 41, 0, 0, 42, 0, 0, 43, 44, 0, 45, + 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 48, 49, 0, 0, 0, 0, + 0, 211, 0, 0, 119, 52, 0, 53, 54, 0, + 0, 0, 0, 0, 56, 0, 57, 58, 59, 0, + 60, 61, 62, 0, 63, 0, 0, 0, 0, 0, + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 0, 0, 0, 0, 64, 264, 66, 14, 0, 108, + 109, 17, 18, 0, 0, 0, 0, 0, 110, 111, + 112, 22, 23, 24, 25, 0, 0, 113, 0, 0, + 0, 0, 0, 0, 0, 0, 30, 31, 32, 114, + 34, 35, 36, 115, 38, 0, 39, 40, 41, 0, + 0, 42, 0, 0, 43, 44, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 117, 0, 0, 118, + 0, 0, 119, 52, 0, 53, 54, 0, 0, 0, + 0, 0, 56, 0, 57, 58, 59, 0, 60, 61, + 62, 0, 63, 0, 0, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 0, 0, 0, 0, 0, 0, + 0, 14, 120, 108, 109, 17, 18, 0, 0, 0, + 0, 0, 110, 111, 112, 22, 23, 24, 25, 0, + 0, 113, 0, 0, 0, 0, 0, 0, 0, 0, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 0, + 39, 40, 41, 0, 0, 42, 0, 0, 43, 44, + 0, 224, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 191, 192, 193, 194, 195, 196, 197, 198, 199, - 200, 0, 201, 202, 665, 660, 0, 0, 666, 0, - 203, 276, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 174, 175, 176, 177, 178, 179, 180, - 181, 0, 0, 182, 183, 0, 0, 0, 0, 184, - 185, 186, 187, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 188, 189, 190, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 872, - 0, 0, 0, 0, 0, 0, 0, 191, 192, 193, - 194, 195, 196, 197, 198, 199, 200, 0, 201, 202, - 0, 0, 0, 0, 0, 0, 203, 403, 404, 405, - 406, 407, 408, 409, 410, 411, 412, 413, 414, 0, - 0, 0, 0, 415, 416, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 418, 0, 0, 0, - 0, 885, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 419, 0, 420, - 421, 422, 423, 424, 425, 426, 427, 428, 429, 403, - 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, - 414, 0, 0, 0, 0, 415, 416, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 418, 0, + 0, 0, 0, 225, 0, 0, 51, 52, 0, 53, + 54, 0, 55, 0, 0, 0, 56, 0, 57, 58, + 59, 0, 60, 61, 62, 0, 63, 0, 0, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 0, 0, + 0, 0, 0, 0, 0, 14, 120, 108, 109, 17, + 18, 0, 0, 0, 0, 0, 110, 111, 112, 22, + 23, 24, 25, 0, 0, 113, 0, 0, 0, 0, + 0, 0, 0, 0, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 0, 39, 40, 41, 0, 0, 42, + 0, 0, 43, 44, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 419, - 0, 420, 421, 422, 423, 424, 425, 426, 427, 428, - 429, 403, 404, 405, 406, 407, 408, 409, 410, 411, - 412, 413, 414, 0, 0, 0, 0, 415, 416, 0, + 0, 0, 0, 0, 0, 0, 0, 312, 0, 0, + 399, 52, 0, 53, 54, 0, 400, 0, 0, 0, + 56, 0, 57, 58, 59, 0, 60, 61, 62, 0, + 63, 0, 0, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 0, 0, 0, 0, 0, 0, 0, 14, + 120, 108, 109, 17, 18, 0, 0, 0, 0, 0, + 110, 111, 112, 22, 23, 24, 25, 0, 0, 113, + 0, 0, 0, 0, 0, 0, 0, 0, 30, 31, + 32, 114, 34, 35, 36, 115, 38, 0, 39, 40, + 41, 0, 0, 42, 0, 0, 43, 44, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 418, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 419, 0, 420, 421, 422, 423, 424, 425, 426, - 427, 428, 429, 0, 0, 0, 0, 0, 0, 0, - 0, -277, 403, 404, 405, 406, 407, 408, 409, 410, - 411, 412, 413, 414, 0, 0, 0, 0, 415, 416, + 0, 118, 0, 0, 119, 52, 0, 53, 54, 0, + 0, 0, 0, 0, 56, 0, 57, 58, 59, 0, + 60, 61, 62, 0, 63, 0, 0, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 0, 0, 0, 0, + 0, 0, 0, 14, 120, 108, 109, 17, 18, 0, + 0, 0, 0, 0, 110, 111, 112, 22, 23, 24, + 25, 0, 0, 113, 0, 0, 0, 0, 0, 0, + 0, 0, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 0, 39, 40, 41, 0, 0, 42, 0, 0, + 43, 44, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 418, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 312, 0, 0, 399, 52, + 0, 53, 54, 0, 0, 0, 0, 0, 56, 0, + 57, 58, 59, 0, 60, 61, 62, 0, 63, 0, + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 0, 0, 0, 0, 0, 0, 0, 14, 120, 108, + 109, 17, 18, 0, 0, 0, 0, 0, 110, 111, + 112, 22, 23, 24, 25, 0, 0, 113, 0, 0, + 0, 0, 0, 0, 0, 0, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 0, 39, 40, 41, 0, + 0, 42, 0, 0, 43, 44, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 419, 0, 420, 421, 422, 423, 424, 425, - 426, 427, 428, 429, 0, 0, 0, 0, 0, 0, - 0, 0, -279, 403, 404, 405, 406, 407, 408, 409, - 410, 411, 412, 413, 414, 0, 0, 0, 0, 415, - 416, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 418, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1028, + 0, 0, 119, 52, 0, 53, 54, 0, 0, 0, + 0, 0, 56, 0, 57, 58, 59, 0, 60, 61, + 62, 0, 63, 0, 0, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 0, 0, 0, 0, 0, 0, + 0, 14, 120, 108, 109, 17, 18, 0, 0, 0, + 0, 0, 110, 111, 112, 22, 23, 24, 25, 0, + 0, 113, 0, 0, 0, 0, 0, 0, 0, 0, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 0, + 39, 40, 41, 0, 0, 42, 0, 0, 43, 44, + 0, 224, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 419, 0, 420, 421, 422, 423, 424, - 425, 426, 427, 428, 429, 0, 0, 0, 0, 0, - 0, 0, 0, -280, 403, 404, 405, 406, 407, 408, - 409, 410, 411, 412, 413, 414, 0, 0, 0, 0, - 415, 416, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 418, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1066, 0, 0, 119, 52, 0, 53, + 54, 0, 0, 653, 654, 0, 56, 655, 57, 58, + 59, 0, 60, 61, 62, 0, 63, 0, 0, 0, + 0, 0, 174, 175, 176, 177, 178, 179, 180, 181, + 0, 0, 182, 183, 0, 0, 120, 0, 184, 185, + 186, 187, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 188, 189, 190, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 419, 0, 420, 421, 422, 423, - 424, 425, 426, 427, 428, 429, 0, 0, 0, 0, - 0, 0, 0, 0, -281, 403, 404, 405, 406, 407, - 408, 409, 410, 411, 412, 413, 414, 0, 0, 0, - 0, 415, 416, 0, 0, 0, 417, 0, 0, 0, - 0, 0, 0, 0, 418, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 191, 192, 193, 194, + 195, 196, 197, 198, 199, 200, 0, 201, 202, 662, + 663, 0, 0, 664, 0, 203, 275, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 174, 175, + 176, 177, 178, 179, 180, 181, 0, 0, 182, 183, + 0, 0, 0, 0, 184, 185, 186, 187, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 188, 189, + 190, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 419, 0, 420, 421, 422, - 423, 424, 425, 426, 427, 428, 429, 403, 404, 405, - 406, 407, 408, 409, 410, 411, 412, 413, 414, 0, - 0, 0, 0, 415, 416, 0, 0, 0, 500, 0, - 0, 0, 0, 0, 0, 0, 418, 0, 0, 0, + 0, 0, 191, 192, 193, 194, 195, 196, 197, 198, + 199, 200, 0, 201, 202, 683, 654, 0, 0, 684, + 0, 203, 275, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 174, 175, 176, 177, 178, 179, + 180, 181, 0, 0, 182, 183, 0, 0, 0, 0, + 184, 185, 186, 187, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 188, 189, 190, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 419, 0, 420, - 421, 422, 423, 424, 425, 426, 427, 428, 429, 403, - 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, - 414, 0, 0, 0, 0, 415, 416, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 418, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 191, 192, + 193, 194, 195, 196, 197, 198, 199, 200, 0, 201, + 202, 668, 663, 0, 0, 669, 0, 203, 275, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 419, - 0, 420, 421, 422, 423, 424, 425, 426, 427, 428, - 429, 403, 404, 405, 406, 407, 408, 409, 410, 411, - 412, -619, -619, 0, 0, 0, 0, 415, 416, 0, + 174, 175, 176, 177, 178, 179, 180, 181, 0, 0, + 182, 183, 0, 0, 0, 0, 184, 185, 186, 187, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 418, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 188, 189, 190, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 420, 421, 422, 423, 424, 425, 426, - 427, 428, 429 -}; - -static const yytype_int16 yycheck[] = -{ - 2, 285, 16, 17, 69, 27, 20, 66, 89, 28, - 2, 222, 4, 5, 6, 22, 14, 9, 10, 21, - 480, 13, 15, 15, 16, 17, 7, 378, 20, 4, - 28, 16, 17, 14, 118, 20, 7, 593, 489, 272, - 87, 88, 402, 314, 2, 56, 4, 28, 75, 82, - 442, 443, 54, 55, 307, 596, 52, 503, 311, 546, - 52, 507, 5, 6, 56, 29, 789, 21, 22, 54, - 13, 507, 74, 75, 66, 319, 69, 58, 16, 17, - 593, 691, 20, 331, 476, 477, 270, 58, 272, 761, - 82, 373, 702, 16, 10, 75, 668, 669, 105, 15, - 430, 26, 538, 25, 434, 432, 296, 437, 975, 111, - 25, 950, 394, 56, 26, 0, 54, 55, 60, 61, - 62, 63, 306, 90, 611, 117, 370, 119, 458, 62, - 457, 64, 65, 525, 16, 17, 26, 92, 20, 82, - 92, 25, 122, 473, 79, 475, 25, 474, 402, 113, - 57, 105, 58, 59, 484, 26, 483, 111, 112, 25, - 25, 25, 144, 16, 17, 332, 121, 20, 335, 121, - 337, 218, 339, 25, 341, 129, 138, 459, 90, 90, - 147, 57, 229, 116, 117, 1052, 213, 214, 442, 443, - 142, 126, 115, 105, 524, 118, 119, 753, 90, 90, - 90, 54, 55, 664, 1043, 593, 667, 90, 596, 399, - 766, 213, 214, 138, 121, 105, 92, 16, 210, 549, - 142, 548, 16, 146, 685, 148, 138, 142, 140, 442, - 443, 223, 224, 298, 214, 147, 147, 18, 91, 20, - 753, 306, 307, 290, 90, 121, 311, 819, 138, 972, - 140, 92, 975, 766, 144, 147, 147, 147, 142, 252, - 144, 242, 276, 142, 147, 144, 280, 138, 270, 316, - 272, 242, 55, 144, 276, 546, 142, 142, 142, 512, - 121, 314, 90, 955, 276, 92, 732, 733, 280, 90, - 142, 276, 284, 285, 121, 280, 288, 733, 90, 28, - 296, 147, 92, 295, 296, 298, 550, 223, 224, 557, - 797, 303, 92, 869, 121, 92, 115, 51, 90, 118, - 119, 115, 314, 879, 118, 119, 142, 511, 512, 1052, - 690, 121, 121, 105, 875, 69, 946, 295, 397, 147, - 611, 121, 280, 402, 121, 303, 147, 146, 142, 148, - 37, 38, 146, 142, 148, 147, 348, 349, 350, 351, - 352, 353, 354, 355, 810, 753, 103, 101, 102, 103, - 92, 314, 456, 348, 376, 147, 378, 92, 766, 808, - 90, 373, 324, 442, 443, 814, 846, 144, 555, 400, - 348, 128, 92, 121, 128, 353, 115, 679, 280, 118, - 119, 789, 394, 399, 125, 397, 121, 399, 400, 55, - 402, 51, 397, 805, 806, 55, 972, 402, 432, 811, - 812, 121, 773, 276, 608, 92, 121, 280, 115, 148, - 432, 118, 119, 349, 350, 351, 352, 147, 722, 92, - 432, 60, 142, 457, 63, 837, 838, 92, 840, 841, - 442, 443, 55, 434, 121, 457, 437, 400, 25, 146, - 474, 148, 142, 465, 20, 457, 142, 459, 460, 483, - 57, 801, 474, 803, 658, 802, 121, 458, 470, 501, - 434, 483, 474, 16, 101, 57, 478, 875, 107, 1045, - 743, 483, 101, 726, 475, 946, 488, 142, 92, 319, - 1056, 520, 948, 484, 458, 138, 717, 492, 790, 511, - 512, 544, 948, 546, 131, 132, 133, 510, 92, 521, - 804, 475, 520, 145, 92, 917, 797, 121, 115, 521, - 484, 118, 119, 717, 548, 817, 123, 121, 530, 520, - 891, 892, 726, 524, 92, 799, 548, 121, 940, 141, - 370, 805, 544, 121, 546, 55, 548, 811, 812, 74, - 75, 148, 139, 521, 556, 503, 37, 38, 549, 583, - 524, 142, 530, 121, 621, 58, 59, 861, 611, 432, - 862, 101, 115, 101, 972, 118, 119, 975, 602, 17, - 18, 583, 805, 57, 142, 549, 121, 535, 811, 812, - 121, 544, 604, 546, 457, 142, 121, 122, 92, 215, - 602, 887, 888, 146, 51, 148, 222, 639, 142, 611, - 90, 474, 855, 121, 122, 142, 2, 51, 4, 142, - 483, 142, 51, 9, 10, 105, 387, 121, 389, 15, - 16, 17, 121, 51, 20, 142, 928, 27, 655, 99, - 503, 15, 92, 259, 507, 13, 658, 664, 142, 121, - 667, 121, 664, 917, 1052, 667, 668, 669, 611, 90, - 140, 855, 26, 1024, 144, 16, 52, 147, 743, 63, - 15, 121, 535, 685, 105, 538, 678, 679, 690, 691, - 66, 145, 145, 695, 713, 548, 139, 142, 213, 214, - 702, 655, 142, 142, 917, 690, 142, 688, 142, 1039, - 664, 1038, 121, 667, 15, 713, 15, 688, 44, 140, - 712, 932, 15, 26, 726, 141, 147, 938, 141, 683, - 550, 685, 713, 18, 815, 115, 90, 141, 118, 119, - 799, 117, 348, 119, 141, 52, 805, 806, 139, 15, - 139, 105, 811, 812, 141, 44, 362, 139, 90, 579, - 1044, 142, 678, 57, 797, 142, 146, 586, 148, 142, - 142, 773, 142, 105, 380, 44, 596, 9, 10, 599, - 789, 15, 93, 15, 138, 14, 140, 90, 802, 15, - 144, 15, 145, 147, 732, 142, 712, 142, 790, 142, - 802, 15, 105, 142, 319, 797, 798, 799, 140, 142, - 802, 141, 119, 805, 806, 147, 139, 819, 15, 811, - 812, 139, 803, 15, 15, 817, 818, 829, 15, 115, - 832, 126, 118, 119, 210, 138, 142, 140, 126, 831, - 798, 144, 834, 55, 147, 15, 90, 223, 224, 803, - 55, 843, 844, 855, 797, 370, 142, 139, 917, 851, - 146, 105, 148, 51, 142, 53, 54, 55, 56, 142, - 862, 863, 810, 479, 480, 142, 9, 10, 142, 732, - 733, 69, 15, 16, 17, 117, 26, 20, 142, 891, - 892, 90, 142, 142, 15, 115, 140, 889, 118, 119, - 276, 144, 894, 147, 280, 144, 105, 115, 284, 285, - 118, 119, 288, 141, 47, 48, 49, 50, 834, 295, - 296, 54, 55, 142, 530, 917, 146, 303, 148, 13, - 26, 537, 521, 66, 67, 927, 928, 145, 146, 931, - 148, 140, 6, 935, 946, 992, 889, 1041, 147, 802, - 90, 894, 789, 1043, 142, 61, 90, 810, 64, 65, - 969, 816, 1040, 972, 254, 105, 975, 7, 977, 789, - 789, 105, 348, 349, 350, 351, 352, 353, 354, 355, - 90, 288, 586, 1005, 117, 969, 972, -1, 931, 296, - -1, 223, 224, -1, 90, 105, -1, 373, 138, -1, - 140, -1, -1, 995, 144, 997, 140, 147, 1000, 105, - 116, 117, -1, 147, -1, -1, 1025, -1, 394, -1, - -1, 397, 1024, 399, 1038, -1, 402, 778, 779, 780, - 140, 782, -1, 784, -1, 550, 1038, 147, 1040, 1041, - -1, -1, 138, 1052, 140, 1054, 1038, 1056, 144, 1058, - -1, 147, 284, 285, -1, 875, 432, 877, 1039, 115, - -1, 881, 118, 119, 90, -1, 442, 443, -1, 1078, - 51, 677, 53, 54, 55, 56, -1, -1, 90, 105, - -1, 457, -1, 459, 460, 1039, -1, -1, 69, 90, - 223, 224, 399, 105, 470, 948, 115, -1, 474, 118, - 119, -1, 478, -1, 105, 115, 300, 483, 118, 119, - 304, 717, 488, 62, 140, 64, 65, 349, 350, 351, - 352, 147, 354, 355, -1, 90, 90, 90, 140, 262, - 263, 264, 265, 953, 954, 147, 146, -1, 148, 140, - 105, 105, 105, 276, 90, 521, 147, 280, -1, -1, - 969, 284, 285, 972, 530, 975, 975, 977, 977, 105, - -1, 142, -1, 470, -1, -1, -1, 116, 117, 775, - -1, 478, 548, 90, -1, 140, 140, 140, -1, -1, - 556, 488, 147, 147, 147, 1038, -1, 61, 105, -1, - 64, 65, 1012, -1, 140, 1015, -1, 63, 64, 65, - -1, 147, 63, 64, 65, -1, 1025, 583, 959, 960, - 961, 962, -1, -1, -1, 821, 349, 350, 351, 352, - -1, 354, 355, 140, -1, -1, 602, 1047, 460, -1, - 147, -1, 1052, 1052, 1054, 1054, -1, 1056, 1058, 1058, - 846, 374, 116, 117, 40, 41, 42, 43, 44, 556, - 116, 117, 385, -1, -1, 116, 117, -1, 1078, 1078, - -1, 586, -1, -1, 397, 590, 63, 64, 65, 402, - 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, - 413, 414, 415, 416, -1, 418, 419, 420, 421, 422, - 423, 424, 425, 426, 427, 428, 429, 1048, -1, 432, - -1, -1, 678, 679, 63, 64, 65, -1, -1, 442, - 443, 63, 64, 65, 508, 63, 64, 65, -1, 116, - 117, 515, -1, -1, 457, -1, 932, 460, 63, 64, - 65, -1, 938, 527, -1, -1, 712, -1, -1, 472, - -1, 474, -1, 476, 477, 2, -1, 4, 5, 6, - 483, -1, -1, 63, 64, 65, 13, 116, 117, 492, - -1, -1, 495, 496, 116, 117, -1, 500, 116, 117, - 503, -1, 505, -1, 507, 508, 88, 89, -1, -1, - 101, 116, 117, -1, -1, -1, 580, 581, -1, 101, - -1, -1, 525, -1, -1, 52, -1, 896, 897, 56, - -1, -1, 535, -1, -1, 538, 116, 117, 129, 130, - 131, 132, 133, -1, 790, 548, 610, 129, 130, 131, - 132, 133, 798, 799, -1, 82, 802, -1, -1, 805, - 806, -1, 565, 566, -1, 811, 812, -1, -1, -1, - -1, 817, 818, -1, -1, -1, 678, -1, -1, -1, - 583, -1, -1, -1, -1, 831, -1, -1, 834, -1, - -1, -1, 119, -1, 789, 88, 89, 843, 844, 602, - -1, -1, 605, 972, -1, 851, -1, -1, 101, 51, - 712, 53, 54, 55, 56, -1, 862, 863, -1, -1, - -1, -1, -1, 687, -1, -1, 51, 69, 53, 54, - 55, 56, -1, 126, 127, 128, 129, 130, 131, 132, - 133, 818, -1, -1, 69, 2, -1, 4, 5, 6, - -1, -1, 94, -1, 831, -1, 13, -1, 100, 1028, - 1029, 1030, -1, 1032, 1033, -1, 843, 844, -1, 94, - -1, 917, -1, -1, 851, 678, -1, -1, 742, -1, - -1, 927, 928, 210, -1, -1, 863, 690, -1, 935, - 693, 694, -1, -1, -1, 52, -1, -1, 762, 56, - -1, 88, 89, 1072, 1073, 1074, 1075, -1, -1, 712, - -1, -1, -1, 1082, 101, -1, -1, -1, -1, -1, - 723, -1, -1, -1, -1, 82, -1, -1, -1, 732, - 733, -1, 834, -1, -1, 51, -1, 53, 54, 55, - 56, 128, 129, 130, 131, 132, 133, -1, -1, 995, - 927, 997, -1, 69, 1000, -1, -1, -1, 935, -1, - -1, 288, 119, -1, -1, -1, -1, -1, 295, 296, - -1, -1, -1, -1, 969, -1, 303, 972, 94, -1, - 975, -1, 977, -1, 100, -1, -1, 314, -1, -1, - 854, 794, 1038, -1, -1, -1, 799, 800, -1, 802, - -1, -1, 805, 806, -1, -1, 870, 810, 811, 812, - 51, -1, 53, 54, 55, 56, -1, -1, 995, -1, - 997, 348, -1, 1000, -1, -1, 353, -1, 69, -1, - 1025, 834, -1, -1, 837, 838, -1, 840, 841, -1, - -1, -1, -1, -1, -1, -1, 373, 850, -1, -1, - -1, 854, -1, 210, -1, -1, -1, 1052, -1, 1054, - -1, 1056, -1, 1058, -1, -1, -1, 394, 871, 872, - -1, -1, 399, 400, -1, 402, -1, -1, -1, -1, - -1, 884, 885, 1078, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 900, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 910, 911, 2, - -1, 4, 5, 6, 917, 442, 443, -1, -1, -1, - 13, -1, 51, -1, 53, 54, 55, 56, -1, -1, - -1, 288, 459, -1, -1, -1, -1, 940, 295, 296, - 69, -1, -1, 470, -1, 948, 303, -1, -1, -1, - -1, 478, -1, -1, 83, -1, -1, 314, -1, 52, - -1, 488, -1, 56, -1, 94, -1, -1, -1, -1, - -1, 100, 101, 102, 103, -1, -1, -1, -1, -1, - -1, -1, 51, -1, 53, 54, 55, 56, -1, 82, - -1, 348, 121, -1, 521, -1, 353, -1, -1, 128, - 69, -1, 131, 530, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 144, 373, 544, 51, 546, - 53, 54, 55, 56, -1, 94, 119, -1, -1, 556, - -1, 100, 101, 102, 103, 1038, 69, 394, 0, -1, - -1, -1, 399, 400, -1, 402, -1, -1, -1, -1, - 83, 13, 14, 15, 16, 17, 18, -1, 20, 128, - -1, 94, 131, -1, 26, 27, -1, 100, 101, 102, - 103, -1, -1, 142, -1, 37, 38, -1, 40, 41, - 42, 43, 44, -1, 611, 442, 443, -1, -1, -1, - -1, -1, -1, -1, -1, 128, -1, -1, 131, -1, - -1, -1, 459, -1, -1, -1, -1, -1, -1, -1, - -1, 144, -1, 470, -1, -1, -1, 210, -1, -1, - -1, 478, -1, -1, -1, 2, -1, 4, 90, -1, - -1, 488, -1, -1, -1, -1, 13, -1, -1, -1, - -1, -1, -1, 105, -1, -1, -1, -1, -1, -1, - -1, -1, 679, 115, -1, -1, 118, 119, -1, -1, - -1, -1, -1, 51, 521, 53, 54, 55, 56, -1, - -1, -1, -1, 530, -1, 52, 138, 139, -1, -1, - -1, 69, 144, 145, 146, 147, 148, 544, -1, 546, - -1, -1, -1, -1, -1, 288, -1, -1, -1, 556, - -1, -1, 295, 296, -1, -1, 94, -1, -1, -1, - 303, -1, 100, 101, 102, 103, -1, -1, -1, -1, - -1, 314, -1, 51, -1, 53, 54, 55, 56, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 128, 69, 119, 131, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 611, 348, 144, -1, -1, -1, - 353, -1, -1, 790, -1, -1, 94, -1, -1, -1, - 797, 798, 799, 101, 102, 103, -1, -1, 805, -1, - 373, -1, -1, -1, 811, 812, -1, -1, -1, -1, - 817, 818, -1, -1, 51, -1, 53, 54, 55, 56, - 128, 394, -1, -1, 831, -1, 399, 400, -1, 402, - -1, -1, 69, -1, -1, -1, 843, 844, -1, -1, - -1, -1, 679, -1, 851, -1, -1, -1, 85, -1, - -1, -1, -1, 210, -1, 862, 863, 94, -1, -1, - -1, -1, -1, 100, 101, 102, 103, -1, -1, 442, - 443, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 889, -1, -1, -1, 459, 894, -1, -1, - -1, 128, -1, -1, 131, -1, -1, 470, -1, -1, - -1, -1, -1, -1, -1, 478, -1, -1, -1, -1, - 917, -1, -1, -1, -1, 488, -1, -1, -1, -1, - 927, 928, -1, -1, 931, -1, -1, -1, 935, -1, - -1, 288, -1, -1, -1, -1, -1, -1, 295, 296, - -1, -1, -1, -1, -1, -1, 303, -1, 521, -1, - -1, -1, -1, 790, -1, -1, -1, 530, -1, -1, - 797, 798, 799, -1, -1, -1, -1, -1, 805, -1, - -1, 544, -1, 546, 811, 812, -1, -1, -1, -1, - 817, 818, -1, 556, -1, -1, -1, -1, 995, -1, - 997, 348, -1, 1000, 831, -1, 353, -1, -1, 2, - -1, 4, 5, 6, 7, -1, 843, 844, -1, -1, - 13, -1, -1, -1, 851, 51, 373, 53, 54, 55, - 56, -1, -1, -1, -1, 862, 863, -1, -1, -1, - -1, -1, -1, 69, -1, -1, -1, 394, 611, -1, - -1, -1, 399, -1, -1, 402, -1, -1, -1, 52, - -1, -1, 889, 56, -1, -1, -1, 894, 94, -1, - -1, -1, -1, -1, 100, 101, 102, 103, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 82, - 917, -1, -1, -1, -1, 442, 443, -1, -1, -1, - 927, 928, 128, -1, 931, 131, -1, -1, 935, -1, - -1, -1, 459, -1, 44, -1, 679, -1, -1, -1, - -1, -1, -1, 470, -1, -1, 119, -1, -1, -1, - -1, 478, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 488, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, -1, -1, -1, -1, 88, 89, - -1, -1, -1, -1, -1, -1, -1, -1, 995, -1, - 997, 101, -1, 1000, 521, -1, -1, -1, -1, -1, - -1, -1, -1, 530, -1, -1, -1, -1, -1, -1, - -1, -1, 122, -1, 124, 125, 126, 127, 128, 129, - 130, 131, 132, 133, -1, -1, -1, -1, -1, 556, - -1, -1, 142, -1, -1, -1, -1, 210, -1, -1, - 2, -1, 4, -1, -1, -1, -1, 790, -1, -1, - -1, -1, -1, -1, 797, 798, 799, -1, -1, -1, - -1, -1, 805, -1, -1, -1, -1, -1, 811, 812, - -1, -1, -1, -1, 817, 818, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 831, -1, - 52, -1, -1, 51, -1, 53, 54, 55, 56, -1, - 843, 844, -1, -1, -1, -1, -1, -1, 851, -1, - -1, 69, -1, -1, -1, 288, -1, -1, -1, 862, - 863, -1, 295, 296, -1, -1, -1, 85, -1, -1, - 303, -1, -1, -1, -1, -1, 94, -1, -1, -1, - -1, 314, 100, 101, 102, 103, 889, -1, -1, -1, - -1, 894, 679, -1, -1, -1, -1, 119, -1, 51, - -1, 53, 54, 55, 56, -1, -1, -1, -1, -1, - 128, -1, -1, 131, 917, 348, -1, 69, -1, -1, - 353, -1, -1, -1, 927, 928, -1, -1, 931, -1, - -1, -1, 935, 85, -1, -1, -1, -1, -1, -1, - 373, -1, 94, -1, -1, -1, -1, -1, 100, 101, - 102, 103, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 394, -1, -1, -1, -1, 399, 400, -1, -1, - -1, -1, -1, -1, -1, -1, 128, -1, -1, 131, - -1, -1, -1, -1, -1, -1, -1, -1, 210, -1, - -1, -1, 995, -1, 997, -1, -1, 1000, -1, -1, - -1, -1, -1, 790, -1, -1, -1, -1, -1, -1, - -1, 798, 799, -1, -1, -1, -1, -1, 805, -1, - -1, -1, -1, -1, 811, 812, 459, -1, -1, -1, - 817, 818, -1, -1, -1, -1, -1, 470, -1, -1, - -1, -1, -1, -1, 831, 478, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 488, 843, 844, -1, -1, - -1, -1, -1, -1, 851, -1, 288, -1, -1, -1, - -1, -1, -1, 295, 296, 862, 863, -1, -1, -1, - -1, 303, -1, -1, -1, -1, -1, -1, 521, -1, - -1, -1, -1, -1, -1, -1, -1, 530, -1, -1, - 72, 73, 74, 75, 76, 77, 78, -1, 80, 81, - -1, 544, -1, 546, -1, -1, 88, 89, -1, -1, - -1, -1, -1, 556, -1, -1, 348, -1, -1, 101, - 917, 353, -1, -1, -1, -1, -1, -1, -1, -1, - 927, 928, -1, -1, 931, -1, -1, -1, 935, -1, - 0, 373, 124, 125, 126, 127, 128, 129, 130, 131, - 132, 133, -1, 13, 14, 15, -1, 17, 18, -1, - 20, -1, 394, -1, -1, -1, 26, 399, 611, -1, - 402, -1, -1, -1, -1, -1, -1, 37, 38, -1, - 40, 41, 42, 43, 44, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 995, -1, - 997, -1, -1, 1000, -1, -1, -1, -1, -1, -1, - 442, 443, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, -1, -1, -1, 459, 88, 89, - 90, -1, 92, 93, -1, -1, 679, -1, 470, -1, - -1, 101, -1, -1, -1, 105, 478, -1, -1, -1, - -1, -1, -1, -1, -1, 115, 488, -1, 118, 119, - -1, 121, 122, -1, 124, 125, 126, 127, 128, 129, - 130, 131, 132, 133, -1, -1, -1, -1, 138, 139, - 140, 44, 142, -1, -1, 145, 146, 147, 148, 521, - -1, -1, -1, -1, -1, -1, -1, -1, 530, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 72, - 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, - 83, -1, -1, -1, 556, 88, 89, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 101, -1, - -1, -1, -1, -1, -1, -1, -1, 790, -1, -1, - -1, -1, -1, -1, 797, 798, -1, -1, -1, 122, - -1, 124, 125, 126, 127, 128, 129, 130, 131, 132, - 133, -1, -1, -1, 817, 818, -1, -1, -1, -1, - -1, -1, 72, 73, 74, 75, 76, 77, 831, 0, - 80, 81, -1, -1, -1, -1, -1, -1, 88, 89, - 843, 844, 13, 14, 15, -1, 17, 18, 851, 20, - -1, 101, -1, -1, -1, 26, -1, -1, -1, 862, - 863, -1, -1, -1, -1, -1, 37, 38, -1, 40, - 41, 42, 43, 44, 124, 125, 126, 127, 128, 129, - 130, 131, 132, 133, -1, -1, 889, 679, -1, -1, - -1, 894, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 191, 192, 193, 194, 195, 196, + 197, 198, 199, 200, 0, 201, 202, 700, 654, 0, + 0, 701, 0, 203, 275, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 174, 175, 176, 177, + 178, 179, 180, 181, 0, 0, 182, 183, 0, 0, + 0, 0, 184, 185, 186, 187, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 188, 189, 190, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, + 0, 201, 202, 703, 663, 0, 0, 704, 0, 203, + 275, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 174, 175, 176, 177, 178, 179, 180, 181, + 0, 0, 182, 183, 0, 0, 0, 0, 184, 185, + 186, 187, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 188, 189, 190, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 191, 192, 193, 194, + 195, 196, 197, 198, 199, 200, 0, 201, 202, 710, + 654, 0, 0, 711, 0, 203, 275, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 174, 175, + 176, 177, 178, 179, 180, 181, 0, 0, 182, 183, + 0, 0, 0, 0, 184, 185, 186, 187, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 188, 189, + 190, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 191, 192, 193, 194, 195, 196, 197, 198, + 199, 200, 0, 201, 202, 713, 663, 0, 0, 714, + 0, 203, 275, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 174, 175, 176, 177, 178, 179, + 180, 181, 0, 0, 182, 183, 0, 0, 0, 0, + 184, 185, 186, 187, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 188, 189, 190, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 191, 192, + 193, 194, 195, 196, 197, 198, 199, 200, 0, 201, + 202, 750, 654, 0, 0, 751, 0, 203, 275, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 174, 175, 176, 177, 178, 179, 180, 181, 0, 0, + 182, 183, 0, 0, 0, 0, 184, 185, 186, 187, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 188, 189, 190, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 191, 192, 193, 194, 195, 196, + 197, 198, 199, 200, 0, 201, 202, 753, 663, 0, + 0, 754, 0, 203, 275, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 174, 175, 176, 177, + 178, 179, 180, 181, 0, 0, 182, 183, 0, 0, + 0, 0, 184, 185, 186, 187, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 188, 189, 190, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, + 0, 201, 202, 938, 654, 0, 0, 939, 0, 203, + 275, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 174, 175, 176, 177, 178, 179, 180, 181, + 0, 0, 182, 183, 0, 0, 0, 0, 184, 185, + 186, 187, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 188, 189, 190, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 191, 192, 193, 194, + 195, 196, 197, 198, 199, 200, 0, 201, 202, 941, + 663, 0, 0, 942, 0, 203, 275, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 174, 175, + 176, 177, 178, 179, 180, 181, 0, 0, 182, 183, + 0, 0, 0, 0, 184, 185, 186, 187, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 188, 189, + 190, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 191, 192, 193, 194, 195, 196, 197, 198, + 199, 200, 0, 201, 202, 1139, 654, 0, 0, 1140, + 0, 203, 275, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 174, 175, 176, 177, 178, 179, + 180, 181, 0, 0, 182, 183, 0, 0, 0, 0, + 184, 185, 186, 187, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 188, 189, 190, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 191, 192, + 193, 194, 195, 196, 197, 198, 199, 200, 0, 201, + 202, 1154, 654, 0, 0, 1155, 0, 203, 275, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 174, 175, 176, 177, 178, 179, 180, 181, 0, 0, + 182, 183, 0, 0, 0, 0, 184, 185, 186, 187, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 188, 189, 190, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 191, 192, 193, 194, 195, 196, + 197, 198, 199, 200, 0, 201, 202, 1157, 663, 0, + 0, 1158, 0, 203, 275, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 174, 175, 176, 177, + 178, 179, 180, 181, 0, 0, 182, 183, 0, 0, + 0, 0, 184, 185, 186, 187, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 188, 189, 190, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, + 0, 201, 202, 668, 663, 0, 0, 669, 0, 203, + 275, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 174, 175, 176, 177, 178, 179, 180, 181, + 0, 0, 182, 183, 0, 0, 0, 0, 184, 185, + 186, 187, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 188, 189, 190, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 804, 0, + 0, 0, 0, 0, 0, 0, 191, 192, 193, 194, + 195, 196, 197, 198, 199, 200, 0, 201, 202, 0, + 0, 0, 0, 0, 0, 203, 404, 405, 406, 407, + 408, 409, 410, 411, 412, 413, 414, 415, 0, 0, + 0, 0, 416, 417, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 419, 0, 0, 0, 0, + 905, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 420, 0, 421, 422, + 423, 424, 425, 426, 427, 428, 429, 430, 404, 405, + 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, + 0, 0, 0, 0, 416, 417, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 419, 0, 0, + 0, 0, 918, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 420, 0, + 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, + 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, + 414, 415, 0, 0, 0, 0, 416, 417, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 419, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 420, 0, 421, 422, 423, 424, 425, 426, 427, 428, + 429, 430, 404, 405, 406, 407, 408, 409, 410, 411, + 412, 413, 414, 415, 0, 0, 0, 0, 416, 417, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 419, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 420, 0, 421, 422, 423, 424, 425, 426, + 427, 428, 429, 430, 0, 0, 0, 0, 0, 0, + 0, 0, -280, 404, 405, 406, 407, 408, 409, 410, + 411, 412, 413, 414, 415, 0, 0, 0, 0, 416, + 417, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 419, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 420, 0, 421, 422, 423, 424, 425, + 426, 427, 428, 429, 430, 0, 0, 0, 0, 0, + 0, 0, 0, -282, 404, 405, 406, 407, 408, 409, + 410, 411, 412, 413, 414, 415, 0, 0, 0, 0, + 416, 417, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 419, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 420, 0, 421, 422, 423, 424, + 425, 426, 427, 428, 429, 430, 0, 0, 0, 0, + 0, 0, 0, 0, -283, 404, 405, 406, 407, 408, + 409, 410, 411, 412, 413, 414, 415, 0, 0, 0, + 0, 416, 417, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 419, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 420, 0, 421, 422, 423, + 424, 425, 426, 427, 428, 429, 430, 0, 0, 0, + 0, 0, 0, 0, 0, -285, 404, 405, 406, 407, + 408, 409, 410, 411, 412, 413, 414, 415, 0, 0, + 0, 0, 416, 417, 0, 0, 0, 501, 0, 0, + 0, 0, 0, 0, 0, 419, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 420, 0, 421, 422, + 423, 424, 425, 426, 427, 428, 429, 430, 404, 405, + 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, + 0, 0, 0, 0, 416, 417, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 419, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 420, 0, + 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, + 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, + -693, -693, 0, 0, 0, 0, 416, 417, 404, 405, + 406, 407, 408, 409, 0, 0, 412, 413, 0, 419, + 0, 0, 0, 0, 416, 417, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 419, 0, 0, + 0, 0, 421, 422, 423, 424, 425, 426, 427, 428, + 429, 430, 0, 0, 0, 0, 0, 0, 0, 0, + 421, 422, 423, 424, 425, 426, 427, 428, 429, 430 +}; + +static const yytype_int16 yycheck[] = +{ + 1, 26, 271, 8, 9, 86, 87, 27, 88, 14, + 378, 490, 21, 481, 13, 313, 284, 3, 6, 20, + 65, 118, 597, 814, 793, 13, 6, 9, 27, 820, + 4, 5, 14, 508, 671, 672, 55, 402, 12, 27, + 594, 640, 765, 81, 15, 16, 331, 68, 19, 433, + 504, 74, 53, 54, 508, 51, 1036, 306, 15, 16, + 594, 310, 19, 597, 539, 1011, 1, 815, 3, 57, + 221, 26, 73, 74, 458, 431, 318, 57, 967, 435, + 594, 55, 438, 25, 402, 34, 547, 0, 25, 14, + 74, 475, 26, 20, 21, 104, 53, 54, 144, 18, + 484, 20, 51, 459, 694, 295, 373, 81, 698, 25, + 111, 57, 117, 28, 25, 705, 443, 444, 474, 718, + 476, 59, 60, 61, 62, 443, 444, 394, 370, 485, + 25, 15, 16, 15, 16, 19, 217, 19, 122, 142, + 73, 74, 1, 68, 3, 4, 5, 228, 25, 8, + 9, 29, 613, 12, 25, 14, 15, 16, 1046, 1048, + 19, 138, 1142, 57, 51, 549, 55, 90, 387, 525, + 389, 53, 90, 332, 79, 121, 335, 104, 337, 25, + 339, 90, 341, 138, 111, 112, 51, 1133, 121, 122, + 213, 214, 51, 460, 550, 16, 55, 121, 92, 121, + 142, 92, 129, 757, 138, 142, 65, 144, 289, 399, + 144, 16, 213, 214, 851, 102, 770, 222, 223, 967, + 90, 126, 81, 757, 147, 34, 142, 121, 144, 147, + 214, 142, 144, 90, 315, 113, 770, 102, 147, 125, + 222, 223, 51, 757, 513, 90, 90, 142, 105, 547, + 92, 90, 139, 241, 1045, 90, 770, 55, 117, 793, + 119, 241, 737, 1151, 1033, 142, 105, 1036, 269, 60, + 271, 142, 63, 558, 275, 313, 297, 147, 283, 284, + 213, 214, 736, 737, 305, 306, 269, 93, 271, 310, + 147, 15, 16, 1016, 115, 19, 142, 118, 119, 295, + 1048, 140, 147, 147, 275, 144, 92, 278, 147, 551, + 115, 121, 147, 118, 119, 613, 107, 58, 59, 90, + 126, 278, 305, 90, 92, 146, 251, 148, 693, 53, + 54, 121, 142, 908, 25, 121, 92, 142, 90, 313, + 801, 146, 55, 148, 349, 350, 351, 352, 902, 354, + 355, 210, 397, 121, 27, 92, 142, 402, 912, 294, + 457, 1152, 348, 222, 223, 121, 90, 349, 350, 351, + 352, 92, 297, 1142, 908, 376, 147, 378, 92, 142, + 147, 400, 92, 92, 121, 318, 324, 51, 842, 53, + 54, 55, 56, 275, 278, 147, 278, 556, 443, 444, + 121, 20, 92, 399, 20, 69, 25, 121, 51, 777, + 878, 121, 121, 348, 103, 682, 275, 1007, 353, 278, + 667, 142, 806, 670, 283, 284, 400, 62, 287, 64, + 65, 121, 433, 142, 92, 294, 295, 370, 101, 128, + 51, 688, 115, 302, 55, 118, 119, 435, 18, 805, + 438, 807, 142, 840, 313, 25, 461, 458, 726, 846, + 142, 730, 433, 121, 57, 466, 129, 130, 131, 132, + 133, 459, 138, 146, 475, 148, 119, 502, 142, 1033, + 101, 116, 117, 484, 142, 803, 145, 458, 476, 348, + 349, 350, 351, 352, 353, 354, 355, 485, 747, 1033, + 141, 521, 1036, 801, 475, 58, 59, 545, 435, 547, + 837, 512, 513, 484, 373, 397, 843, 844, 587, 837, + 402, 522, 521, 121, 26, 843, 844, 794, 1007, 512, + 513, 1130, 459, 521, 1009, 394, 1126, 525, 397, 139, + 399, 400, 623, 402, 37, 38, 55, 504, 549, 476, + 142, 275, 16, 101, 278, 1009, 924, 925, 485, 101, + 92, 92, 550, 782, 783, 784, 92, 786, 836, 788, + 721, 545, 57, 547, 433, 613, 40, 41, 549, 536, + 92, 1135, 849, 121, 443, 444, 511, 522, 90, 121, + 121, 121, 1146, 1162, 1163, 121, 531, 142, 525, 458, + 121, 460, 461, 105, 51, 606, 92, 142, 1142, 121, + 142, 493, 471, 584, 17, 18, 475, 642, 551, 888, + 479, 92, 142, 550, 1193, 484, 894, 610, 895, 51, + 489, 1200, 1201, 604, 142, 121, 138, 51, 140, 613, + 99, 26, 144, 101, 287, 147, 142, 69, 975, 658, + 121, 115, 295, 142, 118, 119, 92, 975, 667, 302, + 661, 670, 13, 522, 115, 15, 667, 118, 119, 670, + 671, 672, 531, 131, 132, 133, 681, 121, 661, 101, + 102, 103, 146, 121, 148, 121, 545, 688, 547, 16, + 549, 115, 693, 694, 118, 119, 716, 698, 557, 681, + 1, 63, 3, 691, 705, 90, 128, 8, 9, 433, + 715, 691, 15, 14, 15, 16, 145, 716, 19, 986, + 105, 90, 40, 41, 793, 584, 747, 16, 716, 730, + 881, 658, 145, 715, 458, 1103, 105, 139, 721, 1123, + 667, 16, 142, 670, 142, 604, 142, 730, 920, 921, + 51, 475, 15, 138, 613, 140, 399, 90, 803, 686, + 484, 688, 147, 801, 65, 121, 122, 847, 1124, 15, + 142, 140, 105, 44, 26, 121, 777, 141, 147, 736, + 504, 141, 15, 18, 508, 26, 141, 141, 15, 141, + 26, 90, 837, 838, 139, 139, 139, 44, 843, 844, + 142, 1020, 1021, 1022, 1023, 806, 105, 140, 57, 57, + 142, 693, 536, 142, 147, 539, 117, 587, 119, 807, + 142, 591, 681, 682, 44, 549, 115, 801, 471, 118, + 119, 142, 55, 51, 51, 806, 479, 26, 90, 990, + 115, 140, 142, 118, 119, 996, 489, 142, 147, 90, + 851, 90, 142, 105, 90, 15, 715, 146, 14, 148, + 861, 866, 93, 864, 105, 15, 1134, 802, 15, 105, + 142, 146, 145, 148, 40, 41, 42, 43, 44, 1, + 807, 3, 4, 5, 866, 842, 138, 888, 140, 142, + 12, 142, 144, 51, 15, 147, 141, 138, 142, 140, + 142, 90, 138, 144, 140, 888, 147, 55, 144, 210, + 142, 147, 141, 90, 557, 139, 105, 142, 15, 1138, + 142, 222, 223, 924, 925, 139, 1077, 1078, 105, 51, + 975, 142, 142, 55, 15, 794, 15, 15, 139, 793, + 142, 126, 801, 802, 803, 90, 299, 806, 922, 138, + 303, 140, 126, 927, 62, 144, 64, 65, 147, 81, + 105, 1030, 55, 140, 1033, 55, 142, 1036, 139, 1038, + 147, 15, 115, 142, 275, 118, 119, 278, 837, 838, + 37, 38, 283, 284, 843, 844, 287, 1068, 90, 142, + 849, 850, 144, 294, 295, 140, 142, 119, 1003, 1004, + 142, 302, 147, 105, 863, 148, 1007, 866, 116, 117, + 142, 142, 736, 737, 142, 989, 875, 876, 142, 90, + 15, 1003, 1004, 793, 90, 884, 142, 1178, 51, 15, + 53, 54, 55, 56, 105, 1104, 895, 896, 140, 105, + 90, 144, 90, 1194, 1195, 147, 69, 348, 349, 350, + 351, 352, 353, 354, 355, 105, 141, 105, 115, 1084, + 142, 118, 119, 922, 814, 142, 15, 15, 927, 140, + 820, 40, 373, 1142, 140, 1144, 147, 1146, 1079, 1148, + 41, 147, 806, 61, 12, 522, 64, 65, 210, 146, + 140, 148, 140, 394, 443, 444, 397, 147, 399, 147, + 115, 402, 1103, 118, 119, 5, 793, 1133, 832, 1126, + 848, 1050, 964, 1182, 815, 1125, 975, 115, 842, 142, + 118, 119, 1123, 253, 1125, 1126, 985, 986, 477, 478, + 989, 146, 433, 148, 993, 6, 1124, 587, 116, 117, + 1030, 1033, 443, 444, 1003, 1004, 1005, -1, 146, -1, + 148, -1, 1123, 90, -1, -1, 509, 458, -1, 460, + 461, 1162, 1163, 516, -1, 287, 90, -1, 105, -1, + 471, -1, 294, 295, 475, 528, 1030, 526, 479, 1033, + 302, 105, 1036, 484, 1038, 1190, 1191, 90, 489, -1, + -1, 313, 1193, 943, -1, -1, 57, 1124, -1, 1200, + 1201, -1, 105, 140, 929, 930, -1, 850, 1190, 1191, + 147, -1, 1071, -1, 1073, 965, 140, 1076, -1, -1, + 863, 522, -1, 147, -1, -1, 348, -1, 581, 582, + 531, 353, 875, 876, 90, -1, -1, 140, -1, -1, + -1, 884, 61, -1, 147, 64, 65, -1, 549, 105, + 1104, 373, -1, 896, 115, -1, 557, 118, 119, 612, + 1030, -1, 123, 1033, 1123, -1, 1036, -1, 1038, 1128, + 1129, -1, 394, -1, -1, 215, -1, 399, 400, -1, + 402, 221, -1, 584, 140, 1009, -1, 148, 1142, -1, + 1144, 147, 1146, -1, 1148, 1045, 1046, 116, 117, -1, + 1050, -1, 51, 604, 53, 54, 55, 56, 1033, -1, + -1, 63, 64, 65, -1, -1, -1, -1, 258, -1, + 69, 443, 444, -1, -1, -1, -1, -1, 1182, -1, + 1189, 1190, 1191, -1, 1104, -1, -1, 690, 460, 1198, + 1199, -1, 985, -1, 115, 94, -1, 118, 119, 471, + 993, -1, 101, 102, 103, -1, -1, 479, -1, -1, + -1, -1, 1005, 318, 116, 117, -1, 489, 1118, -1, + -1, 142, 1142, -1, 1144, 146, 1146, 148, 1148, 128, + 681, 682, 1107, 1108, 1109, -1, 1111, 1112, 63, 64, + 65, 625, 51, 746, 53, 54, 55, 56, -1, 1123, + 522, 1151, 1152, 63, 64, 65, 640, -1, 348, 531, + 69, -1, 1182, 766, 715, 370, 63, 64, 65, -1, + -1, -1, 362, 545, -1, 547, -1, -1, 1071, -1, + 1073, -1, -1, 1076, -1, 557, 1186, -1, -1, -1, + 380, 116, 117, -1, 1169, 1170, 1171, 1172, -1, 1, + -1, 3, 4, 5, -1, -1, 116, 117, -1, -1, + 12, 63, 64, 65, 63, 64, 65, 1192, -1, 116, + 117, 13, 14, 15, -1, 17, 18, -1, 20, 63, + 64, 65, -1, 25, 718, 1128, 1129, -1, 837, 838, + -1, 613, -1, 794, 843, 844, -1, -1, -1, 51, + -1, 802, 803, 55, 51, 806, 53, 54, 55, 56, + 63, 64, 65, -1, 116, 117, -1, 116, 117, -1, + 869, 870, 69, 872, 873, -1, -1, -1, -1, 81, + -1, -1, 116, 117, 887, -1, 837, 838, -1, -1, + 480, 481, 843, 844, -1, -1, 1189, 94, 849, 850, + 903, -1, -1, 100, -1, 1198, 1199, -1, -1, -1, + 682, -1, 863, 116, 117, 866, -1, 119, -1, -1, + -1, -1, -1, 115, 875, 876, 118, 119, -1, 44, + 814, -1, -1, 884, -1, -1, 820, -1, -1, -1, + -1, 531, -1, -1, 895, 896, 551, 139, 538, -1, + -1, -1, -1, 145, 146, -1, 148, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, -1, + -1, -1, -1, 88, 89, 580, 975, -1, 1, -1, + 3, 4, 5, -1, -1, -1, 101, -1, -1, 12, + -1, -1, 597, -1, -1, 600, -1, -1, -1, 998, + -1, 51, -1, 53, 54, 55, 56, 122, 210, 124, + 125, 126, 127, 128, 129, 130, 131, 132, 133, 69, + -1, 625, 794, -1, 975, 88, 89, 142, 51, 801, + 802, 803, 55, -1, 985, 986, 640, -1, 101, -1, + -1, -1, 993, 51, 94, 53, 54, 55, 56, -1, + 100, -1, 1003, 1004, 1005, -1, -1, -1, 81, 943, + -1, 69, -1, -1, -1, 837, 129, 130, 131, 132, + 133, 843, 844, -1, -1, -1, -1, 849, 850, -1, + 964, 965, -1, -1, -1, 287, 94, -1, -1, -1, + 680, 863, 294, 295, 88, 89, 119, -1, -1, -1, + 302, -1, -1, 875, 876, -1, -1, 101, -1, -1, + -1, 313, 884, -1, 718, -1, -1, -1, -1, -1, + 1071, -1, 1073, 895, 896, 1076, -1, -1, -1, 625, + -1, 721, 126, 127, 128, 129, 130, 131, 132, 133, + 88, 89, -1, -1, 640, -1, 348, -1, -1, -1, + 922, 353, -1, 101, -1, 927, -1, -1, -1, -1, + -1, 1045, 1046, -1, -1, -1, 1050, -1, -1, -1, + -1, 373, 1123, -1, -1, -1, -1, 1128, 1129, -1, + 128, 129, 130, 131, 132, 133, -1, 210, 793, 779, + -1, -1, 394, -1, -1, -1, -1, 399, 400, -1, + 402, -1, -1, 975, -1, -1, -1, -1, -1, 1, + 814, 3, -1, 985, 986, -1, 820, 989, -1, -1, + 12, 993, 718, -1, -1, -1, 1, -1, 3, 4, + 5, 6, -1, 1005, 1118, -1, -1, 12, 1189, 1190, + 1191, 443, 444, -1, -1, -1, 1130, 1198, 1199, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 460, 51, + -1, -1, -1, 853, 287, -1, -1, 1151, 1152, 471, + -1, 294, 295, -1, -1, -1, 51, 479, -1, 302, + 55, -1, -1, -1, -1, -1, -1, 489, 878, -1, + 313, 881, -1, -1, -1, -1, -1, -1, -1, 1071, + -1, 1073, 1186, 908, 1076, 910, 81, -1, -1, 914, + -1, -1, -1, -1, -1, -1, -1, -1, 814, -1, + 522, -1, -1, -1, 820, 348, -1, 119, -1, 531, + 353, -1, -1, -1, -1, -1, -1, -1, -1, 943, + -1, -1, -1, 545, 119, 547, -1, -1, -1, -1, + 373, -1, -1, -1, -1, 557, 1128, 1129, -1, -1, + 964, 965, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 394, -1, -1, -1, -1, 399, 400, -1, 402, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 990, -1, -1, -1, -1, -1, 996, -1, -1, 1014, + 1015, 613, -1, -1, -1, -1, -1, 1189, 210, -1, + 443, 444, -1, -1, -1, -1, 1198, 1199, 34, 35, + 36, 1036, -1, 1038, -1, 210, -1, 460, -1, -1, + -1, 1045, 1046, -1, -1, 51, 1050, 943, 471, 55, + -1, -1, 58, 59, 60, -1, 479, 63, -1, -1, + -1, -1, -1, -1, -1, -1, 489, -1, 964, 965, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 682, -1, -1, -1, -1, 91, 1091, 1077, 1078, 1094, + -1, 97, 98, -1, 100, 287, -1, -1, -1, 522, + 106, 107, 294, 295, -1, -1, -1, -1, 531, -1, + 302, -1, 287, -1, 1118, -1, -1, -1, -1, 294, + 295, 127, 545, -1, 547, -1, 1130, 302, 134, -1, + -1, -1, 1137, -1, 557, 141, -1, 1142, 313, 1144, + -1, -1, -1, 1148, -1, -1, -1, 1151, 1152, 1045, + 1046, -1, -1, -1, 1050, -1, 348, -1, -1, -1, + -1, 353, -1, -1, -1, -1, 51, -1, 53, 54, + 55, 56, -1, 348, -1, -1, -1, 1182, 353, -1, + -1, 373, 1186, -1, 69, -1, -1, -1, 1178, -1, + 613, -1, 794, -1, -1, -1, -1, -1, 373, 801, + 802, 803, 394, -1, 1194, 1195, -1, 399, -1, 94, + 402, -1, -1, -1, -1, 100, 101, 102, 103, 394, + -1, -1, 1118, -1, 399, 400, 51, -1, 53, 54, + 55, 56, -1, -1, 1130, 837, -1, -1, -1, -1, + -1, 843, 844, 128, 69, -1, 131, 849, 850, -1, + -1, 443, 444, -1, -1, 1151, 1152, 142, 83, 682, + -1, 863, -1, -1, -1, -1, -1, -1, 460, 94, + -1, -1, -1, 875, 876, 100, 101, 102, 103, 471, + -1, -1, 884, -1, -1, 460, -1, 479, -1, -1, + 1186, -1, -1, 895, 896, -1, 471, 489, -1, -1, + -1, -1, -1, 128, 479, -1, 131, -1, 51, -1, + 53, 54, 55, 56, 489, -1, -1, -1, -1, 144, + 922, -1, -1, -1, -1, 927, 69, -1, -1, -1, + 522, -1, -1, -1, -1, -1, -1, -1, -1, 531, + -1, -1, -1, -1, -1, -1, -1, 522, -1, -1, + -1, 94, -1, -1, -1, -1, 531, 100, 101, 102, + 103, -1, -1, -1, -1, 557, -1, -1, -1, -1, + 545, 794, 547, 975, -1, -1, -1, -1, 801, 802, + 803, -1, 557, 985, 986, 128, -1, 989, 131, -1, + -1, 993, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 144, -1, 1005, -1, -1, -1, 51, -1, 53, + 54, 55, 56, -1, 837, -1, -1, -1, -1, -1, + 843, 844, 34, 35, 36, 69, 849, 850, -1, -1, + 1, -1, 3, -1, -1, -1, -1, -1, 613, 51, + 863, 85, -1, 55, -1, -1, 58, 59, 60, -1, + 94, 63, 875, 876, -1, -1, 100, 101, 102, 103, + 51, 884, 53, 54, 55, 56, -1, -1, -1, 1071, + -1, 1073, 895, 896, 1076, -1, -1, -1, 69, 91, + 51, -1, -1, -1, 128, 97, 98, 131, 100, -1, + 682, -1, -1, -1, 106, 107, -1, -1, -1, 922, + -1, -1, -1, 94, 927, -1, -1, 682, -1, 100, + 101, 102, 103, -1, -1, 127, 0, -1, -1, -1, + -1, -1, 134, -1, -1, -1, 1128, 1129, -1, 13, + 14, 15, 16, 17, 18, -1, 20, 128, -1, -1, + 131, 25, 26, 27, -1, -1, -1, -1, 119, -1, + -1, -1, 975, 37, 38, -1, 40, 41, 42, 43, + 44, -1, 985, 986, -1, -1, 989, -1, -1, -1, + 993, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 1005, -1, -1, -1, -1, 1189, -1, -1, + -1, -1, -1, -1, -1, -1, 1198, 1199, -1, -1, + -1, -1, 794, -1, -1, -1, 90, -1, -1, -1, + 802, 803, -1, -1, -1, -1, -1, -1, -1, 794, + -1, 105, -1, -1, -1, -1, 801, 802, -1, -1, + -1, 115, -1, -1, 118, 119, -1, -1, -1, 210, + -1, -1, -1, -1, -1, 837, -1, -1, 1071, -1, + 1073, 843, 844, 1076, 138, 139, -1, 849, 850, -1, + 144, 145, 146, 147, 148, -1, -1, -1, -1, -1, + -1, 863, -1, -1, 849, 850, -1, -1, -1, -1, + -1, -1, -1, 875, 876, -1, -1, -1, 863, -1, + -1, -1, 884, -1, -1, 15, 16, -1, -1, 19, + 875, 876, -1, 895, 896, 1128, 1129, -1, -1, 884, + -1, -1, -1, -1, -1, -1, 287, -1, -1, -1, + 895, 896, -1, 294, 295, -1, 46, 47, 48, 49, + -1, 302, -1, 53, 54, 51, -1, 53, 54, 55, + 56, -1, -1, -1, -1, 65, 66, 922, -1, -1, + -1, -1, 927, 69, -1, 51, -1, 53, 54, 55, + 56, -1, -1, -1, -1, -1, 1189, -1, -1, 85, + -1, -1, -1, 69, -1, 1198, 1199, 348, 94, -1, + -1, -1, 353, 975, 100, 101, 102, 103, -1, 85, + -1, -1, -1, 985, 986, -1, -1, 989, 94, -1, + -1, 993, 373, -1, 100, 101, 102, 103, -1, -1, + 985, 986, 128, 1005, 989, 131, -1, -1, 993, 34, + 35, 36, -1, 394, -1, -1, -1, -1, 399, -1, + 1005, 402, 128, -1, -1, 131, 51, -1, -1, -1, + 55, -1, -1, 58, 59, 60, -1, -1, 63, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 443, 444, -1, -1, 91, -1, -1, 1071, + -1, 1073, 97, 98, 1076, 100, -1, 102, -1, 460, + -1, 106, 107, -1, -1, -1, 1071, -1, 1073, -1, + 471, 1076, -1, -1, -1, 34, 35, 36, 479, -1, + -1, -1, 127, -1, -1, -1, -1, -1, 489, 134, + -1, -1, 51, -1, -1, -1, 55, -1, -1, 58, + 59, 60, -1, -1, 63, -1, 1128, 1129, -1, -1, + -1, 261, 262, 263, 264, -1, -1, -1, -1, -1, + -1, 522, -1, 1128, 1129, 275, -1, -1, 278, -1, + 531, -1, 91, -1, -1, -1, -1, -1, 97, 98, + 51, -1, 53, 54, 55, 56, -1, 106, 107, -1, + -1, -1, -1, -1, -1, -1, 557, -1, 69, -1, + -1, -1, -1, -1, -1, -1, -1, 1189, 127, -1, + -1, -1, 83, -1, -1, 134, 1198, 1199, -1, -1, + -1, -1, -1, 94, 1189, -1, -1, -1, -1, 100, + 101, 102, 103, 1198, 1199, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 121, -1, -1, -1, -1, -1, -1, 128, -1, -1, + 131, -1, -1, -1, 374, -1, -1, -1, -1, -1, + -1, -1, -1, 144, -1, 385, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 397, -1, -1, + -1, -1, 402, -1, 404, 405, 406, 407, 408, 409, + 410, 411, 412, 413, 414, 415, 416, 417, -1, 419, + 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, + 430, 682, -1, 433, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 443, 444, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 458, -1, + -1, -1, -1, 72, 73, 74, 75, 76, 77, 78, + -1, 80, 81, 473, -1, 475, -1, 477, 478, 88, + 89, -1, -1, -1, 484, -1, -1, -1, -1, -1, + -1, -1, 101, 493, -1, -1, 496, 497, -1, -1, + -1, 501, -1, -1, 504, -1, 506, -1, 508, 509, + -1, -1, -1, -1, -1, 124, 125, 126, 127, 128, + 129, 130, 131, 132, 133, -1, 526, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 536, -1, -1, 539, + -1, -1, -1, 794, -1, -1, -1, -1, -1, 549, + -1, 802, 803, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 566, 567, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 584, -1, 837, -1, -1, -1, + -1, -1, 843, 844, -1, -1, -1, -1, 849, 850, + -1, -1, -1, -1, 604, -1, -1, 607, -1, -1, + -1, -1, 863, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 875, 876, -1, -1, -1, -1, + -1, -1, -1, 884, -1, -1, -1, -1, -1, -1, + 0, -1, -1, -1, 895, 896, -1, -1, -1, -1, + -1, -1, -1, 13, 14, 15, 16, 17, 18, -1, + 20, -1, -1, -1, -1, 25, 26, 27, 28, -1, + -1, -1, -1, -1, -1, -1, -1, 37, 38, -1, + 40, 41, 42, 43, 44, -1, -1, -1, -1, -1, + -1, -1, -1, 693, -1, -1, 696, 697, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 975, -1, -1, 727, 88, 89, + 90, -1, -1, 93, 985, 986, 736, 737, -1, 99, + -1, 101, 993, -1, -1, 105, -1, -1, -1, -1, + -1, -1, -1, -1, 1005, 115, -1, -1, 118, 119, + 25, -1, 122, -1, 124, 125, 126, 127, 128, 129, + 130, 131, 132, 133, -1, -1, -1, -1, 138, 139, + 140, 141, 142, -1, 144, 145, 146, 147, 148, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 798, -1, + -1, -1, -1, 803, 804, -1, 806, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, -1, + 1071, -1, 1073, 88, 89, 1076, -1, -1, 93, -1, + -1, -1, -1, -1, -1, -1, 101, 837, 838, -1, + -1, -1, 842, 843, 844, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 122, -1, 124, + 125, 126, 127, 128, 129, 130, 131, 132, 133, 869, + 870, -1, 872, 873, -1, -1, -1, 1128, 1129, -1, + -1, -1, -1, 883, -1, -1, -1, 887, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 72, 73, + 74, 75, 76, 77, 904, 905, 80, 81, -1, -1, + -1, -1, -1, -1, 88, 89, -1, 917, 918, -1, + -1, -1, -1, -1, -1, -1, -1, 101, -1, -1, + -1, -1, -1, 933, -1, -1, -1, -1, 1189, -1, + -1, -1, -1, -1, -1, -1, -1, 1198, 1199, -1, + 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, + -1, -1, -1, -1, -1, -1, -1, -1, 968, 969, + -1, -1, -1, -1, -1, 975, -1, -1, -1, -1, + 0, 1, -1, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, -1, -1, -1, -1, -1, 998, 19, + -1, 21, 22, 23, 24, -1, -1, -1, -1, 1009, + 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, + -1, -1, -1, -1, -1, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, + 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, + 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 84, 85, -1, -1, -1, -1, + -1, 91, -1, -1, 94, 95, -1, 97, 98, -1, + 100, -1, -1, -1, 104, -1, 106, 107, 108, -1, + 110, 111, 112, 0, 114, 115, -1, -1, 118, 119, + -1, -1, -1, -1, -1, -1, 13, 14, 15, 16, + 17, 18, -1, 20, 134, 135, 136, -1, 25, -1, + 27, 28, 29, 1123, -1, -1, 146, -1, 148, -1, + 37, 38, -1, 40, 41, 42, 43, 44, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 57, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 72, 73, 74, 75, 76, + 77, 78, 79, 80, 81, 82, 83, -1, -1, -1, + -1, 88, 89, 90, -1, -1, 93, -1, -1, -1, + -1, -1, 99, -1, 101, -1, -1, -1, 105, -1, + -1, -1, -1, -1, -1, -1, 113, -1, 115, -1, + -1, 118, 119, -1, -1, 122, 123, 124, 125, 126, + 127, 128, 129, 130, 131, 132, 133, -1, -1, 0, + -1, -1, 139, 140, 141, 142, -1, -1, 145, 146, + 147, 148, 13, 14, 15, 16, 17, 18, -1, 20, + -1, -1, -1, -1, 25, -1, 27, 28, -1, -1, + -1, -1, -1, -1, -1, -1, 37, 38, -1, 40, + 41, 42, 43, 44, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 57, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, -1, -1, -1, -1, 88, 89, 90, - -1, 92, 93, -1, 927, 928, -1, -1, 931, -1, - 101, -1, 935, -1, 105, -1, -1, -1, -1, -1, + -1, 92, 93, -1, -1, -1, -1, -1, 99, -1, + 101, -1, -1, -1, 105, -1, -1, -1, -1, -1, -1, -1, -1, -1, 115, -1, -1, 118, 119, -1, 121, 122, -1, 124, 125, 126, 127, 128, 129, 130, - 131, 132, 133, -1, -1, -1, -1, 138, 139, 140, - -1, 142, -1, -1, 145, 146, 147, 148, -1, -1, + 131, 132, 133, -1, -1, 0, -1, -1, 139, 140, + 141, 142, -1, -1, 145, 146, 147, 148, 13, 14, + 15, 16, 17, 18, -1, 20, -1, -1, -1, -1, + 25, 26, 27, 28, -1, -1, -1, -1, -1, -1, + -1, -1, 37, 38, -1, 40, 41, 42, 43, 44, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 995, -1, 997, -1, -1, 1000, 790, -1, - -1, -1, -1, -1, -1, -1, 798, 799, -1, -1, - -1, 51, 52, 805, -1, 55, -1, -1, -1, 811, - 812, -1, -1, -1, -1, 817, 818, -1, -1, -1, - 70, 71, 72, 73, 74, 75, 76, 77, -1, 831, - 80, 81, -1, -1, -1, -1, 86, 87, 88, 89, - -1, 843, 844, -1, -1, -1, -1, -1, -1, 851, - 100, 101, 102, -1, -1, -1, -1, -1, -1, -1, - 862, 863, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 124, 125, 126, 127, 128, 129, - 130, 131, 132, 133, -1, 135, 136, 72, 73, 74, - 75, 76, 77, 143, 144, 80, 81, -1, -1, -1, - -1, -1, -1, 88, 89, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 917, 101, -1, -1, -1, - -1, -1, -1, -1, -1, 927, 928, -1, -1, -1, - -1, -1, -1, 935, -1, -1, -1, -1, -1, 124, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, -1, + -1, -1, -1, 88, 89, 90, -1, -1, 93, -1, + -1, -1, -1, -1, 99, -1, 101, -1, -1, -1, + 105, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 115, -1, -1, 118, 119, -1, -1, 122, -1, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, -1, - -1, -1, -1, -1, -1, 0, 1, -1, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, - -1, -1, -1, -1, 19, -1, 21, 22, 23, 24, - -1, -1, -1, -1, -1, 30, 31, 32, 33, 34, - 35, 36, -1, 995, 39, 997, -1, -1, 1000, -1, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, - -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 84, - 85, -1, -1, -1, -1, -1, 91, -1, -1, 94, - 95, -1, 97, 98, -1, 100, -1, -1, -1, 104, - -1, 106, 107, 108, -1, 110, 111, 112, 0, 114, - 115, -1, -1, 118, 119, -1, -1, -1, -1, -1, - -1, 13, 14, 15, 16, 17, 18, -1, 20, 134, - 135, 136, -1, -1, -1, 27, 28, 29, -1, -1, - -1, 146, -1, 148, -1, 37, 38, -1, 40, 41, - 42, 43, 44, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 57, -1, -1, -1, -1, + -1, 0, -1, 138, 139, 140, 141, 142, -1, 144, + 145, 146, 147, 148, 13, 14, 15, 16, 17, 18, + -1, 20, -1, -1, -1, -1, 25, -1, 27, 28, + -1, -1, -1, -1, -1, -1, -1, -1, 37, 38, + -1, 40, 41, 42, 43, 44, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, - 82, 83, -1, -1, -1, -1, 88, 89, 90, -1, - -1, 93, -1, -1, -1, -1, -1, 99, -1, 101, - -1, -1, -1, 105, -1, -1, -1, -1, -1, -1, - -1, 113, -1, 115, -1, -1, 118, 119, -1, -1, - 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, - 132, 133, -1, -1, 0, -1, -1, 139, 140, 141, - 142, -1, -1, 145, 146, 147, 148, 13, 14, 15, - 16, 17, 18, -1, 20, -1, -1, -1, -1, -1, - 26, 27, 28, -1, -1, -1, -1, -1, -1, -1, - -1, 37, 38, -1, 40, 41, 42, 43, 44, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 72, 73, 74, 75, 76, 77, 78, + 79, 80, 81, 82, 83, -1, -1, -1, -1, 88, + 89, 90, -1, -1, 93, -1, -1, -1, -1, -1, + 99, -1, 101, -1, -1, -1, 105, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 115, -1, -1, 118, + 119, -1, -1, 122, -1, 124, 125, 126, 127, 128, + 129, 130, 131, 132, 133, -1, -1, 0, -1, -1, + 139, 140, 141, 142, -1, 144, 145, 146, 147, 148, + 13, 14, 15, -1, 17, 18, -1, 20, -1, -1, + -1, -1, 25, 26, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 37, 38, -1, 40, 41, 42, + 43, 44, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 72, 73, 74, 75, - 76, 77, 78, 79, 80, 81, 82, 83, -1, -1, - -1, -1, 88, 89, 90, -1, -1, 93, -1, -1, - -1, -1, -1, 99, -1, 101, -1, -1, -1, 105, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 115, - -1, -1, 118, 119, -1, -1, 122, -1, 124, 125, - 126, 127, 128, 129, 130, 131, 132, 133, -1, -1, - 0, -1, 138, 139, 140, 141, 142, -1, 144, 145, - 146, 147, 148, 13, 14, 15, 16, 17, 18, -1, - 20, -1, -1, -1, -1, -1, -1, 27, 28, -1, - -1, -1, -1, -1, -1, -1, -1, 37, 38, -1, - 40, 41, 42, 43, 44, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 57, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 72, + 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, + 83, -1, -1, -1, -1, 88, 89, 90, -1, 92, + 93, -1, -1, -1, -1, -1, -1, -1, 101, -1, + -1, -1, 105, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 115, -1, -1, 118, 119, -1, 121, 122, + -1, 124, 125, 126, 127, 128, 129, 130, 131, 132, + 133, -1, -1, 0, -1, 138, 139, 140, -1, 142, + -1, -1, 145, 146, 147, 148, 13, 14, 15, -1, + 17, 18, -1, 20, -1, -1, -1, -1, 25, 26, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, -1, -1, -1, -1, 88, 89, - 90, -1, 92, 93, -1, -1, -1, -1, -1, 99, - -1, 101, -1, -1, -1, 105, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 115, -1, -1, 118, 119, - -1, 121, 122, -1, 124, 125, 126, 127, 128, 129, - 130, 131, 132, 133, -1, -1, 0, -1, -1, 139, - 140, 141, 142, -1, -1, 145, 146, 147, 148, 13, - 14, 15, 16, 17, 18, -1, 20, -1, -1, -1, - -1, -1, 26, 27, 28, -1, -1, -1, -1, -1, - -1, -1, -1, 37, 38, -1, 40, 41, 42, 43, - 44, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 37, 38, -1, 40, 41, 42, 43, 44, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 72, 73, - 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, - -1, -1, -1, -1, 88, 89, 90, -1, -1, 93, - -1, -1, -1, -1, -1, 99, -1, 101, -1, -1, - -1, 105, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 115, -1, -1, 118, 119, -1, -1, 122, -1, - 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, - -1, -1, 0, -1, 138, 139, 140, 141, 142, -1, - 144, 145, 146, 147, 148, 13, 14, 15, 16, 17, - 18, -1, 20, -1, -1, -1, -1, -1, -1, 27, - 28, -1, -1, -1, -1, -1, -1, -1, -1, 37, - 38, -1, 40, 41, 42, 43, 44, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 72, 73, 74, 75, 76, + 77, 78, 79, 80, 81, 82, 83, -1, -1, -1, + -1, 88, 89, 90, -1, 92, 93, -1, -1, -1, + -1, -1, -1, -1, 101, -1, -1, -1, 105, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 115, -1, + -1, 118, 119, -1, 121, 122, -1, 124, 125, 126, + 127, 128, 129, 130, 131, 132, 133, -1, -1, 0, + -1, 138, 139, 140, -1, 142, -1, -1, 145, 146, + 147, 148, 13, 14, 15, -1, 17, 18, -1, 20, + -1, -1, -1, -1, 25, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 37, 38, -1, 40, + 41, 42, 43, 44, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 72, 73, 74, 75, 76, 77, - 78, 79, 80, 81, 82, 83, -1, -1, -1, -1, - 88, 89, 90, -1, -1, 93, -1, -1, -1, -1, - -1, 99, -1, 101, -1, -1, -1, 105, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 115, -1, -1, - 118, 119, -1, -1, 122, -1, 124, 125, 126, 127, - 128, 129, 130, 131, 132, 133, -1, -1, 0, -1, - -1, 139, 140, 141, 142, -1, 144, 145, 146, 147, - 148, 13, 14, 15, -1, 17, 18, -1, 20, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 37, 38, -1, 40, 41, - 42, 43, 44, -1, -1, -1, -1, -1, -1, -1, + -1, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, -1, -1, -1, -1, 88, 89, 90, + -1, 92, 93, -1, -1, -1, -1, -1, -1, -1, + 101, -1, -1, -1, 105, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 115, -1, -1, 118, 119, -1, + 121, 122, -1, 124, 125, 126, 127, 128, 129, 130, + 131, 132, 133, -1, -1, 0, -1, -1, 139, 140, + -1, 142, -1, -1, 145, 146, 147, 148, 13, 14, + 15, -1, 17, 18, -1, 20, -1, -1, -1, -1, + 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 37, 38, -1, 40, 41, 42, 43, 44, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, - 82, 83, -1, -1, -1, -1, 88, 89, 90, -1, - 92, 93, -1, -1, -1, -1, -1, -1, -1, 101, - -1, -1, -1, 105, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 115, -1, -1, 118, 119, -1, 121, - 122, -1, 124, 125, 126, 127, 128, 129, 130, 131, - 132, 133, -1, -1, 0, -1, -1, 139, 140, -1, - 142, -1, -1, 145, 146, 147, 148, 13, 14, 15, - -1, 17, 18, -1, 20, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, -1, + -1, -1, -1, 88, 89, 90, -1, 92, 93, -1, + -1, -1, -1, -1, -1, -1, 101, -1, -1, -1, + 105, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 115, -1, -1, 118, 119, -1, 121, 122, -1, 124, + 125, 126, 127, 128, 129, 130, 131, 132, 133, -1, + -1, -1, -1, -1, 139, 140, -1, 142, -1, -1, + 145, 146, 147, 148, 1, -1, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, 15, -1, + -1, 18, 19, -1, 21, 22, 23, 24, -1, -1, + -1, -1, -1, 30, 31, 32, 33, 34, 35, 36, + -1, -1, 39, -1, -1, -1, -1, -1, 45, -1, + 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, + 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 84, 85, -1, + -1, -1, -1, -1, 91, -1, -1, 94, 95, -1, + 97, 98, -1, 100, -1, -1, -1, 104, -1, 106, + 107, 108, -1, 110, 111, 112, -1, 114, 115, -1, + -1, 118, 119, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 134, 135, 136, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 146, + 1, 148, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, -1, -1, 15, -1, 17, 18, 19, -1, + 21, 22, 23, 24, -1, -1, -1, -1, -1, 30, + 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, + -1, -1, -1, -1, 45, -1, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, + -1, -1, 63, -1, -1, 66, 67, -1, 69, 70, + 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 84, 85, -1, -1, -1, -1, -1, + 91, -1, -1, 94, 95, -1, 97, 98, -1, 100, + -1, -1, -1, 104, -1, 106, 107, 108, -1, 110, + 111, 112, -1, 114, 115, -1, -1, 118, 119, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 134, 135, 136, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 146, 1, 148, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, + 15, -1, -1, 18, 19, 20, 21, 22, 23, 24, + -1, -1, -1, -1, -1, 30, 31, 32, 33, 34, + 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, + 45, -1, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, + -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 84, + 85, -1, -1, -1, -1, -1, 91, -1, -1, 94, + 95, -1, 97, 98, -1, 100, -1, -1, -1, 104, + -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, + 115, -1, -1, 118, 119, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 134, + 135, 136, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 146, 1, 148, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, -1, -1, 15, -1, -1, 18, + 19, -1, 21, 22, 23, 24, 25, -1, -1, -1, + -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, + 39, -1, -1, -1, -1, -1, 45, -1, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, + 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, + 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 84, 85, -1, -1, -1, + -1, -1, 91, -1, -1, 94, 95, -1, 97, 98, + -1, 100, -1, -1, -1, 104, -1, 106, 107, 108, + -1, 110, 111, 112, -1, 114, 115, -1, -1, 118, + 119, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 134, 135, 136, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 146, 1, 148, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + -1, -1, 15, -1, -1, 18, 19, -1, 21, 22, + 23, 24, -1, -1, -1, -1, -1, 30, 31, 32, + 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, + -1, -1, 45, -1, 47, 48, 49, 50, 51, 52, + 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, + 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 37, 38, -1, 40, 41, 42, 43, 44, -1, + -1, 84, 85, -1, -1, -1, -1, -1, 91, -1, + -1, 94, 95, -1, 97, 98, -1, 100, -1, -1, + -1, 104, -1, 106, 107, 108, -1, 110, 111, 112, + -1, 114, 115, -1, -1, 118, 119, 1, -1, 3, + 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, + -1, 134, 135, 136, -1, 19, -1, 21, 22, 23, + 24, -1, -1, 146, -1, 148, 30, 31, 32, 33, + 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, + -1, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, -1, 58, 59, 60, -1, -1, 63, + -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 84, 85, -1, -1, -1, -1, -1, 91, -1, -1, + 94, 95, -1, 97, 98, -1, 100, -1, -1, -1, + 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, + 114, 115, -1, -1, 118, 119, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 72, 73, 74, 75, - 76, 77, 78, 79, 80, 81, 82, 83, -1, -1, - -1, -1, 88, 89, 90, -1, 92, 93, -1, -1, - -1, -1, -1, -1, -1, 101, -1, -1, -1, 105, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 115, - -1, -1, 118, 119, -1, 121, 122, -1, 124, 125, - 126, 127, 128, 129, 130, 131, 132, 133, -1, -1, - -1, -1, -1, 139, 140, -1, 142, -1, -1, 145, - 146, 147, 148, 1, -1, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, - 18, 19, -1, 21, 22, 23, 24, -1, -1, -1, + 134, 135, 136, -1, -1, 139, -1, -1, -1, -1, + -1, -1, 146, 1, 148, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, -1, 14, 15, -1, -1, + -1, 19, -1, 21, 22, 23, 24, -1, -1, -1, -1, -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, 45, -1, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, @@ -4303,12 +5476,45 @@ static const yytype_int16 yycheck[] = -1, -1, -1, 91, -1, -1, 94, 95, -1, 97, 98, -1, 100, -1, -1, -1, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, 115, -1, -1, - 118, 119, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 134, 135, 136, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 146, 1, - 148, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, -1, -1, 15, -1, 17, 18, 19, -1, 21, - 22, 23, 24, -1, -1, -1, -1, -1, 30, 31, + 118, 119, 1, -1, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, -1, -1, 134, 135, 136, -1, + 19, -1, 21, 22, 23, 24, -1, -1, 146, -1, + 148, 30, 31, 32, 33, 34, 35, 36, -1, -1, + 39, -1, -1, -1, -1, -1, 45, -1, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, + 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, + 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 84, 85, -1, -1, -1, + -1, -1, 91, -1, -1, 94, 95, -1, 97, 98, + -1, 100, -1, -1, -1, 104, -1, 106, 107, 108, + -1, 110, 111, 112, -1, 114, 115, -1, -1, 118, + 119, 1, -1, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, -1, -1, 134, 135, 136, -1, 19, + -1, 21, 22, 23, 24, -1, 145, 146, -1, 148, + 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, + -1, -1, -1, -1, -1, 45, -1, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, + 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, + 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 84, 85, -1, -1, -1, -1, + -1, 91, -1, -1, 94, 95, -1, 97, 98, -1, + 100, -1, -1, -1, 104, -1, 106, 107, 108, -1, + 110, 111, 112, -1, 114, 115, -1, -1, 118, 119, + 1, -1, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, -1, -1, 134, 135, 136, -1, 19, -1, + 21, 22, 23, 24, -1, 145, 146, -1, 148, 30, + 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, + -1, -1, -1, -1, 45, -1, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, + -1, -1, 63, -1, -1, 66, 67, -1, 69, 70, + 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 84, 85, -1, -1, -1, -1, -1, + 91, -1, -1, 94, 95, -1, 97, 98, -1, 100, + -1, -1, -1, 104, -1, 106, 107, 108, -1, 110, + 111, 112, -1, 114, 115, -1, -1, 118, 119, 1, + -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, -1, -1, 134, 135, 136, -1, 19, -1, 21, + 22, 23, 24, -1, 145, 146, -1, 148, 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, 45, -1, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, -1, @@ -4319,10 +5525,10 @@ static const yytype_int16 yycheck[] = -1, -1, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, 115, -1, -1, 118, 119, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 134, 135, 136, -1, -1, -1, -1, -1, + -1, -1, 134, 135, 136, -1, -1, 139, -1, -1, -1, -1, -1, -1, 146, 1, 148, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, 15, - -1, -1, 18, 19, 20, 21, 22, 23, 24, -1, + -1, -1, -1, 19, -1, 21, 22, 23, 24, -1, -1, -1, -1, -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, 45, -1, 47, 48, 49, 50, 51, 52, 53, 54, 55, @@ -4335,9 +5541,9 @@ static const yytype_int16 yycheck[] = -1, -1, 118, 119, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 134, 135, 136, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 146, 1, 148, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, -1, -1, 15, -1, -1, 18, 19, - -1, 21, 22, 23, 24, -1, -1, -1, -1, -1, + 146, -1, 148, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, -1, 17, 18, 19, + 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, 45, -1, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, @@ -4347,9 +5553,9 @@ static const yytype_int16 yycheck[] = -1, 91, -1, -1, 94, 95, -1, 97, 98, -1, 100, -1, -1, -1, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, 115, -1, -1, 118, 119, - 1, -1, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, -1, -1, 134, 135, 136, -1, 19, -1, - 21, 22, 23, 24, -1, -1, 146, -1, 148, 30, + -1, -1, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, -1, -1, 134, 135, 136, -1, 19, 139, + 21, 22, 23, 24, -1, 145, 146, -1, 148, 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, @@ -4359,743 +5565,662 @@ static const yytype_int16 yycheck[] = 91, -1, -1, 94, 95, -1, 97, 98, -1, 100, -1, -1, -1, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, 115, -1, -1, 118, 119, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 134, 135, 136, -1, -1, 139, -1, - -1, -1, -1, -1, -1, 146, 1, 148, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, -1, 14, - 15, -1, -1, -1, 19, -1, 21, 22, 23, 24, - -1, -1, -1, -1, -1, 30, 31, 32, 33, 34, - 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, - 45, -1, 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, - -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 84, - 85, -1, -1, -1, -1, -1, 91, -1, -1, 94, - 95, -1, 97, 98, -1, 100, -1, -1, -1, 104, - -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, - 115, -1, -1, 118, 119, 1, -1, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12, -1, -1, 134, - 135, 136, -1, 19, -1, 21, 22, 23, 24, -1, - -1, 146, -1, 148, 30, 31, 32, 33, 34, 35, - 36, -1, -1, 39, -1, -1, -1, -1, -1, 45, - -1, 47, 48, 49, 50, 51, 52, 53, 54, 55, - 56, -1, 58, 59, 60, -1, -1, 63, -1, -1, - 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 84, 85, - -1, -1, -1, -1, -1, 91, -1, -1, 94, 95, - -1, 97, 98, -1, 100, -1, -1, -1, 104, -1, - 106, 107, 108, -1, 110, 111, 112, -1, 114, 115, - -1, -1, 118, 119, 1, -1, 3, 4, 5, 6, - 7, 8, 9, 10, 11, 12, -1, -1, 134, 135, - 136, -1, 19, -1, 21, 22, 23, 24, -1, 145, - 146, -1, 148, 30, 31, 32, 33, 34, 35, 36, - -1, -1, 39, -1, -1, -1, -1, -1, 45, -1, - 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, - -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, - 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 84, 85, -1, - -1, -1, -1, -1, 91, -1, -1, 94, 95, -1, - 97, 98, -1, 100, -1, -1, -1, 104, -1, 106, - 107, 108, -1, 110, 111, 112, -1, 114, 115, -1, - -1, 118, 119, 1, -1, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, -1, -1, 134, 135, 136, - -1, 19, -1, 21, 22, 23, 24, -1, 145, 146, - -1, 148, 30, 31, 32, 33, 34, 35, 36, -1, - -1, 39, -1, -1, -1, -1, -1, 45, -1, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, - 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, - -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 84, 85, -1, -1, - -1, -1, -1, 91, -1, -1, 94, 95, -1, 97, - 98, -1, 100, -1, -1, -1, 104, -1, 106, 107, - 108, -1, 110, 111, 112, -1, 114, 115, -1, -1, - 118, 119, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 134, 135, 136, -1, - -1, 139, -1, -1, -1, -1, -1, -1, 146, 1, - 148, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, -1, -1, 15, -1, -1, -1, 19, -1, 21, - 22, 23, 24, -1, -1, -1, -1, -1, 30, 31, + -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, + -1, -1, -1, 134, 135, 136, -1, 19, -1, 21, + 22, 23, 24, -1, -1, 146, -1, 148, 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, - -1, -1, -1, 45, -1, 47, 48, 49, 50, 51, + -1, -1, -1, -1, -1, -1, 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 84, 85, -1, -1, -1, -1, -1, 91, - -1, -1, 94, 95, -1, 97, 98, -1, 100, -1, + -1, -1, 94, 95, -1, 97, 98, -1, -1, -1, -1, -1, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, 115, -1, -1, 118, 119, -1, -1, - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1, 134, 135, 136, -1, 19, -1, 21, 22, 23, 24, -1, -1, 146, -1, 148, 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, - -1, -1, 45, 46, 47, 48, 49, 50, 51, 52, + -1, -1, -1, -1, -1, 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 84, 85, -1, -1, -1, -1, -1, 91, -1, - -1, 94, 95, -1, 97, 98, -1, 100, -1, -1, + -1, 94, 95, -1, 97, 98, -1, -1, -1, -1, -1, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, 115, -1, -1, 118, 119, -1, -1, 3, - 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, + 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1, 134, 135, 136, -1, 19, -1, 21, 22, 23, 24, -1, -1, 146, -1, 148, 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, - -1, 45, -1, 47, 48, 49, 50, 51, 52, 53, + -1, -1, -1, -1, 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 84, 85, -1, -1, -1, -1, -1, 91, -1, -1, - 94, 95, -1, 97, 98, -1, 100, -1, -1, -1, + 94, 95, -1, 97, 98, -1, -1, -1, -1, -1, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, - 114, 115, -1, -1, 118, 119, -1, -1, 3, 4, - 5, 6, 7, 8, 9, 10, 11, -1, -1, -1, - 134, 135, 136, -1, 19, -1, 21, 22, 23, 24, - -1, -1, 146, -1, 148, 30, 31, 32, 33, 34, - 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, - -1, -1, -1, 48, 49, 50, 51, 52, 53, 54, - 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, - -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 84, - 85, -1, -1, -1, -1, -1, 91, -1, -1, 94, - 95, -1, 97, 98, -1, -1, -1, -1, -1, 104, - -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, - 115, -1, -1, 118, 119, -1, -1, 3, 4, 5, - 6, 7, 8, 9, 10, 11, -1, -1, -1, 134, - 135, 136, -1, 19, -1, 21, 22, 23, 24, -1, - -1, 146, -1, 148, 30, 31, 32, 33, 34, 35, + 114, 115, -1, -1, 118, 119, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 134, 135, 136, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 148, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, -1, + -1, -1, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 39, -1, -1, -1, -1, -1, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 69, 70, 71, 72, 73, 74, 75, 76, 77, + -1, -1, 80, 81, -1, -1, -1, -1, 86, 87, + 88, 89, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 100, 101, 102, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, -1, 135, 136, -1, + -1, -1, -1, -1, -1, 143, 144, 3, 4, 5, + 6, 7, 8, 9, 10, 11, -1, -1, -1, -1, + -1, -1, -1, 19, -1, 21, 22, 23, 24, -1, + 26, -1, -1, -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, -1, -1, -1, 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 84, 85, -1, -1, -1, -1, -1, 91, -1, -1, 94, 95, - -1, 97, 98, -1, -1, -1, -1, -1, 104, -1, - 106, 107, 108, -1, 110, 111, 112, -1, 114, 115, - -1, -1, 118, 119, -1, -1, 3, 4, 5, 6, - 7, 8, 9, 10, 11, -1, -1, -1, 134, 135, - 136, -1, 19, -1, 21, 22, 23, 24, -1, -1, - 146, -1, 148, 30, 31, 32, 33, 34, 35, 36, - -1, -1, 39, -1, -1, -1, -1, -1, -1, -1, - -1, 48, 49, 50, 51, 52, 53, 54, 55, 56, - -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, - 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 84, 85, -1, - -1, -1, -1, -1, 91, -1, -1, 94, 95, -1, - 97, 98, -1, -1, -1, -1, -1, 104, -1, 106, - 107, 108, -1, 110, 111, 112, -1, 114, 115, -1, - -1, 118, 119, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 134, 135, 136, + -1, 97, 98, -1, 100, -1, 102, 103, 104, -1, + 106, 107, 108, -1, 110, 111, 112, -1, 114, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 148, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, -1, -1, -1, 30, - 31, 32, 33, 34, 35, 36, 37, 38, 39, -1, - -1, -1, -1, -1, 45, 46, 47, 48, 49, 50, - 51, 52, 53, 54, 55, 56, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 70, - 71, 72, 73, 74, 75, 76, 77, -1, -1, 80, - 81, -1, -1, -1, -1, 86, 87, 88, 89, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 100, - 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 124, 125, 126, 127, 128, 129, 130, - 131, 132, 133, -1, 135, 136, -1, -1, -1, -1, - -1, -1, 143, 144, 3, 4, 5, 6, 7, 8, - 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, - 19, -1, 21, 22, 23, 24, -1, 26, -1, -1, - -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, - 39, -1, -1, -1, -1, -1, -1, -1, -1, 48, - 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, - 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, - 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 84, 85, -1, -1, -1, - -1, -1, 91, -1, -1, 94, 95, -1, 97, 98, - -1, 100, -1, 102, 103, 104, -1, 106, 107, 108, - -1, 110, 111, 112, -1, 114, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 134, 135, + 136, -1, 138, -1, -1, -1, -1, -1, 144, 3, + 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, + -1, -1, -1, -1, -1, 19, -1, 21, 22, 23, + 24, -1, 26, -1, -1, -1, 30, 31, 32, 33, + 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, + -1, -1, -1, -1, 48, 49, 50, 51, 52, 53, + 54, 55, 56, -1, 58, 59, 60, -1, -1, 63, + -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 134, 135, 136, -1, 138, - -1, -1, -1, -1, -1, 144, 3, 4, 5, 6, - 7, 8, 9, 10, 11, -1, -1, -1, -1, -1, - -1, -1, 19, -1, 21, 22, 23, 24, -1, 26, + 84, 85, -1, -1, -1, -1, -1, 91, -1, -1, + 94, 95, -1, 97, 98, -1, 100, -1, 102, 103, + 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, + 114, -1, -1, -1, -1, -1, -1, 3, 4, 5, + 6, 7, 8, 9, 10, 11, -1, -1, -1, -1, + 134, 135, 136, 19, 138, 21, 22, 23, 24, -1, + 144, -1, -1, -1, 30, 31, 32, 33, 34, 35, + 36, -1, -1, 39, -1, -1, -1, -1, -1, -1, + -1, -1, 48, 49, 50, 51, 52, 53, 54, 55, + 56, -1, 58, 59, 60, -1, -1, 63, -1, -1, + 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 84, 85, + -1, -1, -1, -1, -1, 91, 92, -1, 94, 95, + -1, 97, 98, -1, 100, -1, 102, 103, 104, -1, + 106, 107, 108, -1, 110, 111, 112, -1, 114, -1, + -1, -1, -1, -1, -1, 121, 3, 4, 5, 6, + 7, 8, 9, 10, 11, -1, -1, -1, 134, 135, + 136, -1, 19, -1, 21, 22, 23, 24, 144, -1, -1, -1, -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, -1, -1, -1, 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 84, 85, -1, - -1, -1, -1, -1, 91, -1, -1, 94, 95, -1, + -1, -1, -1, -1, 91, 92, -1, 94, 95, -1, 97, 98, -1, 100, -1, 102, 103, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, -1, -1, - -1, -1, -1, -1, 3, 4, 5, 6, 7, 8, - 9, 10, 11, -1, -1, -1, -1, 134, 135, 136, - 19, 138, 21, 22, 23, 24, -1, 144, -1, -1, - -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, - 39, -1, -1, -1, -1, -1, -1, -1, -1, 48, - 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, - 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, - 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 84, 85, -1, -1, -1, - -1, -1, 91, 92, -1, 94, 95, -1, 97, 98, - -1, 100, -1, 102, 103, 104, -1, 106, 107, 108, - -1, 110, 111, 112, -1, 114, -1, -1, -1, -1, - -1, -1, 121, 3, 4, 5, 6, 7, 8, 9, - 10, 11, -1, -1, -1, 134, 135, 136, -1, 19, - -1, 21, 22, 23, 24, 144, -1, -1, -1, -1, + -1, -1, -1, -1, 121, 3, 4, 5, 6, 7, + 8, 9, 10, 11, -1, -1, -1, 134, 135, 136, + -1, 19, -1, 21, 22, 23, 24, 144, -1, -1, + -1, -1, 30, 31, 32, 33, 34, 35, 36, -1, + -1, 39, -1, -1, -1, -1, -1, -1, -1, -1, + 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, + 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, + -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 84, 85, -1, -1, + -1, -1, -1, 91, -1, -1, 94, 95, -1, 97, + 98, -1, 100, -1, 102, 103, 104, -1, 106, 107, + 108, -1, 110, 111, 112, -1, 114, -1, -1, -1, + -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, + 10, 11, -1, -1, -1, -1, 134, 135, 136, 19, + -1, 21, 22, 23, 24, -1, 144, -1, -1, -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, -1, -1, -1, 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 84, 85, -1, -1, -1, -1, - -1, 91, 92, -1, 94, 95, -1, 97, 98, -1, + -1, 91, -1, -1, 94, 95, -1, 97, 98, -1, 100, -1, 102, 103, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, -1, -1, -1, -1, -1, - -1, 121, 3, 4, 5, 6, 7, 8, 9, 10, - 11, -1, -1, -1, 134, 135, 136, -1, 19, -1, - 21, 22, 23, 24, 144, -1, -1, -1, -1, 30, - 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, - -1, -1, -1, -1, -1, -1, -1, 48, 49, 50, - 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, - -1, -1, 63, -1, -1, 66, 67, -1, 69, 70, - 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 84, 85, -1, -1, -1, -1, -1, - 91, -1, -1, 94, 95, -1, 97, 98, -1, 100, - -1, 102, 103, 104, -1, 106, 107, 108, -1, 110, - 111, 112, -1, 114, -1, -1, -1, -1, -1, -1, - 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, - -1, -1, -1, 134, 135, 136, 19, -1, 21, 22, - 23, 24, -1, 144, -1, -1, -1, 30, 31, 32, - 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, - -1, -1, -1, -1, -1, 48, 49, 50, 51, 52, - 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, - 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 84, 85, -1, -1, -1, -1, -1, 91, -1, - -1, 94, 95, -1, 97, 98, -1, 100, -1, 102, - 103, 104, -1, 106, 107, 108, -1, 110, 111, 112, - -1, 114, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 134, 135, 136, -1, -1, -1, -1, -1, -1, - -1, 144, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, -1, -1, -1, 30, - 31, 32, 33, 34, 35, 36, 37, 38, 39, -1, - -1, -1, -1, -1, 45, 46, 47, 48, 49, 50, - 51, 52, 53, 54, 55, 56, -1, -1, -1, -1, - -1, -1, 63, -1, -1, -1, -1, -1, -1, 70, - 71, 72, 73, 74, 75, 76, 77, -1, -1, 80, - 81, -1, -1, -1, -1, 86, 87, 88, 89, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 100, - 101, 102, -1, -1, -1, -1, 107, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 124, 125, 126, 127, 128, 129, 130, - 131, 132, 133, -1, 135, 136, -1, -1, -1, -1, - -1, -1, 143, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, -1, -1, -1, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - -1, -1, -1, -1, -1, 45, 46, 47, 48, 49, - 50, 51, 52, 53, 54, 55, 56, -1, -1, -1, - -1, -1, -1, 63, -1, -1, -1, -1, -1, -1, - 70, 71, 72, 73, 74, 75, 76, 77, -1, -1, - 80, 81, -1, -1, -1, -1, 86, 87, 88, 89, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 100, 101, 102, -1, -1, -1, -1, 107, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 124, 125, 126, 127, 128, 129, - 130, 131, 132, 133, -1, 135, 136, -1, -1, -1, - -1, -1, -1, 143, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, -1, -1, - -1, 30, 31, 32, 33, 34, 35, 36, 37, 38, - 39, -1, -1, -1, -1, -1, 45, 46, 47, 48, - 49, 50, 51, 52, -1, -1, 55, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 70, 71, 72, 73, 74, 75, 76, 77, -1, - -1, 80, 81, -1, -1, -1, -1, 86, 87, 88, - 89, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 100, 101, 102, -1, -1, -1, 106, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 124, 125, 126, 127, 128, - 129, 130, 131, 132, 133, -1, 135, 136, -1, -1, - -1, -1, -1, -1, 143, 3, 4, 5, 6, 7, + -1, -1, -1, -1, 134, 135, 136, -1, -1, -1, + -1, -1, -1, -1, 144, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, -1, -1, -1, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, -1, -1, -1, -1, -1, 45, 46, 47, - 48, 49, 50, 51, 52, -1, -1, 55, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 70, 71, 72, 73, 74, 75, 76, 77, + 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, + -1, -1, -1, -1, -1, 63, -1, -1, -1, -1, + -1, 69, 70, 71, 72, 73, 74, 75, 76, 77, -1, -1, 80, 81, -1, -1, -1, -1, 86, 87, 88, 89, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 100, 101, 102, -1, -1, -1, -1, -1, + -1, -1, 100, 101, 102, -1, -1, -1, -1, 107, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, -1, 135, 136, -1, -1, -1, -1, -1, -1, 143, 3, 4, 5, 6, - 7, 8, 9, 10, 11, -1, -1, -1, -1, -1, - -1, -1, 19, -1, 21, 22, 23, 24, -1, -1, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, -1, -1, -1, 30, 31, 32, 33, 34, 35, 36, - -1, -1, 39, -1, -1, -1, -1, -1, -1, -1, - -1, 48, 49, 50, 51, 52, 53, 54, 55, 56, - -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, - 67, -1, 69, -1, -1, -1, -1, -1, -1, -1, + 37, 38, 39, -1, -1, -1, -1, -1, 45, 46, + 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + -1, -1, -1, -1, -1, -1, 63, -1, -1, -1, + -1, -1, -1, 70, 71, 72, 73, 74, 75, 76, + 77, -1, -1, 80, 81, -1, -1, -1, -1, 86, + 87, 88, 89, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 100, 101, 102, -1, -1, -1, -1, + 107, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 124, 125, 126, + 127, 128, 129, 130, 131, 132, 133, -1, 135, 136, + -1, -1, -1, -1, -1, -1, 143, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 26, -1, -1, -1, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, -1, -1, -1, -1, -1, 45, + 46, 47, 48, 49, 50, 51, 52, -1, -1, 55, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 91, -1, -1, 94, 95, -1, - 97, 98, -1, -1, -1, -1, -1, 104, -1, 106, - 107, 108, -1, 110, 111, 112, -1, 114, -1, -1, - 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, - -1, -1, -1, -1, -1, -1, 19, 134, 21, 22, - 23, 24, -1, -1, -1, 142, -1, 30, 31, 32, - 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, - -1, -1, -1, -1, -1, 48, 49, 50, 51, 52, - 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, - 63, -1, -1, 66, 67, -1, 69, -1, -1, -1, + -1, -1, -1, -1, 70, 71, 72, 73, 74, 75, + 76, 77, -1, -1, 80, 81, -1, -1, -1, -1, + 86, 87, 88, 89, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 100, 101, 102, -1, -1, -1, + 106, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 124, 125, + 126, 127, 128, 129, 130, 131, 132, 133, -1, 135, + 136, -1, -1, -1, -1, -1, -1, 143, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, -1, -1, -1, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, -1, -1, -1, -1, -1, + 45, 46, 47, 48, 49, 50, 51, 52, -1, -1, + 55, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 70, 71, 72, 73, 74, + 75, 76, 77, -1, -1, 80, 81, -1, -1, -1, + -1, 86, 87, 88, 89, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 100, 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 91, -1, - -1, 94, 95, -1, 97, 98, -1, -1, -1, -1, - -1, 104, -1, 106, 107, 108, -1, 110, 111, 112, - -1, 114, -1, -1, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, -1, -1, -1, -1, -1, -1, - 19, 134, 21, 22, 23, 24, -1, -1, -1, 142, - -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, - 39, -1, -1, -1, -1, -1, 45, 46, 47, 48, - 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, - 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, - 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 84, 85, -1, -1, -1, - -1, -1, 91, -1, -1, 94, 95, -1, 97, 98, - -1, 100, -1, -1, -1, 104, -1, 106, 107, 108, - -1, 110, 111, 112, -1, 114, -1, -1, -1, -1, - -1, -1, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, -1, -1, -1, 134, 135, 136, 19, -1, - 21, 22, 23, 24, -1, -1, -1, -1, -1, 30, - 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, - -1, -1, -1, -1, 45, -1, 47, 48, 49, 50, - 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, - -1, -1, 63, -1, -1, 66, 67, -1, 69, 70, - 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 84, 85, -1, -1, -1, -1, -1, - 91, -1, -1, 94, 95, -1, 97, 98, -1, 100, - -1, -1, -1, 104, -1, 106, 107, 108, -1, 110, - 111, 112, -1, 114, -1, -1, -1, -1, -1, -1, - 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, - -1, -1, -1, 134, 135, 136, 19, -1, 21, 22, - 23, 24, -1, -1, -1, -1, -1, 30, 31, 32, - 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, - -1, -1, -1, -1, -1, 48, 49, 50, 51, 52, - 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, - 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 124, + 125, 126, 127, 128, 129, 130, 131, 132, 133, -1, + 135, 136, -1, -1, -1, -1, -1, -1, 143, 3, + 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, + -1, -1, -1, -1, -1, 19, -1, 21, 22, 23, + 24, -1, -1, -1, -1, -1, 30, 31, 32, 33, + 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, + -1, -1, -1, -1, 48, 49, 50, 51, 52, 53, + 54, 55, 56, -1, 58, 59, 60, -1, -1, 63, + -1, -1, 66, 67, -1, 69, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 84, 85, -1, -1, -1, -1, -1, 91, -1, - -1, 94, 95, -1, 97, 98, -1, 100, -1, 102, - 103, 104, -1, 106, 107, 108, -1, 110, 111, 112, - -1, 114, -1, -1, -1, -1, -1, -1, 3, 4, - 5, 6, 7, 8, 9, 10, 11, -1, -1, -1, - -1, 134, 135, 136, 19, -1, 21, 22, 23, 24, - -1, -1, -1, -1, -1, 30, 31, 32, 33, 34, - 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, - -1, -1, -1, 48, 49, 50, 51, 52, 53, 54, - 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, - -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 84, - 85, -1, -1, -1, -1, -1, 91, -1, -1, 94, - 95, -1, 97, 98, -1, 100, -1, 102, 103, 104, - -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, - -1, -1, -1, -1, -1, -1, 3, 4, 5, 6, - 7, 8, 9, 10, 11, -1, -1, -1, -1, 134, - 135, 136, 19, -1, 21, 22, 23, 24, -1, -1, - -1, -1, -1, 30, 31, 32, 33, 34, 35, 36, - -1, -1, 39, -1, -1, -1, -1, -1, -1, -1, - -1, 48, 49, 50, 51, 52, 53, 54, 55, 56, - -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, - 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 84, 85, -1, - -1, -1, -1, -1, 91, -1, -1, 94, 95, -1, - 97, 98, -1, 100, -1, 102, 103, 104, -1, 106, - 107, 108, -1, 110, 111, 112, -1, 114, -1, -1, - -1, -1, -1, -1, 3, 4, 5, 6, 7, 8, - 9, 10, 11, -1, -1, -1, -1, 134, 135, 136, - 19, -1, 21, 22, 23, 24, -1, -1, -1, -1, - -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, - 39, -1, -1, -1, -1, -1, -1, -1, -1, 48, - 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, - 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, - 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 84, 85, -1, -1, -1, - -1, -1, 91, -1, -1, 94, 95, -1, 97, 98, - -1, 100, -1, 102, 103, 104, -1, 106, 107, 108, - -1, 110, 111, 112, -1, 114, -1, -1, -1, -1, - -1, -1, 3, 4, 5, 6, 7, 8, 9, 10, - 11, -1, -1, -1, -1, 134, 135, 136, 19, -1, - 21, 22, 23, 24, -1, -1, -1, -1, -1, 30, - 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, - -1, -1, -1, -1, -1, -1, -1, 48, 49, 50, - 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, - -1, -1, 63, -1, -1, 66, 67, -1, 69, 70, - 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 84, 85, -1, -1, -1, -1, -1, - 91, -1, -1, 94, 95, -1, 97, 98, -1, 100, - -1, 102, -1, 104, -1, 106, 107, 108, -1, 110, - 111, 112, -1, 114, -1, -1, -1, -1, -1, -1, - 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, - -1, -1, -1, 134, 135, 136, 19, -1, 21, 22, - 23, 24, -1, -1, -1, -1, -1, 30, 31, 32, - 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, - -1, -1, -1, -1, -1, 48, 49, 50, 51, 52, - 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, - 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, + -1, -1, -1, -1, -1, -1, -1, 91, -1, -1, + 94, 95, -1, 97, 98, -1, -1, -1, -1, -1, + 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, + 114, -1, -1, 3, 4, 5, 6, 7, 8, 9, + 10, 11, -1, -1, -1, -1, -1, -1, -1, 19, + 134, 21, 22, 23, 24, -1, -1, -1, 142, -1, + 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, + -1, -1, -1, -1, -1, -1, -1, -1, 48, 49, + 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, + 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 84, 85, -1, -1, -1, -1, -1, 91, -1, - -1, 94, 95, -1, 97, 98, -1, -1, -1, 102, - 103, 104, -1, 106, 107, 108, -1, 110, 111, 112, - -1, 114, -1, -1, -1, -1, -1, -1, 3, 4, - 5, 6, 7, 8, 9, 10, 11, -1, -1, -1, - -1, 134, 135, 136, 19, -1, 21, 22, 23, 24, - -1, -1, -1, -1, -1, 30, 31, 32, 33, 34, - 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, - -1, -1, -1, 48, 49, 50, 51, 52, 53, 54, - 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, - -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 84, - 85, -1, -1, -1, -1, -1, 91, -1, -1, 94, - 95, -1, 97, 98, -1, 100, -1, 102, -1, 104, - -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, - -1, -1, -1, -1, -1, -1, 3, 4, 5, 6, - 7, 8, 9, 10, 11, -1, -1, -1, -1, 134, - 135, 136, 19, -1, 21, 22, 23, 24, -1, -1, - -1, -1, -1, 30, 31, 32, 33, 34, 35, 36, - -1, -1, 39, -1, -1, -1, -1, -1, -1, -1, - -1, 48, 49, 50, 51, 52, 53, 54, 55, 56, - -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, - 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 84, 85, -1, - -1, -1, -1, -1, 91, -1, -1, 94, 95, -1, - 97, 98, -1, -1, -1, 102, -1, 104, -1, 106, - 107, 108, -1, 110, 111, 112, -1, 114, -1, -1, - -1, -1, -1, -1, 3, 4, 5, 6, 7, 8, - 9, 10, 11, -1, -1, -1, -1, 134, 135, 136, - 19, -1, 21, 22, 23, 24, -1, -1, -1, -1, - -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, - 39, -1, -1, -1, -1, -1, -1, -1, -1, 48, - 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, - 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, - 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 84, 85, -1, -1, -1, - -1, -1, 91, -1, -1, 94, 95, -1, 97, 98, - -1, 100, -1, -1, -1, 104, -1, 106, 107, 108, - -1, 110, 111, 112, -1, 114, -1, -1, -1, -1, - -1, -1, 3, 4, 5, 6, 7, 8, 9, 10, - 11, -1, -1, -1, -1, 134, 135, 136, 19, -1, - 21, 22, 23, 24, -1, -1, -1, -1, -1, 30, - 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, - -1, -1, -1, -1, -1, -1, -1, 48, 49, 50, - 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, - -1, -1, 63, -1, -1, 66, 67, -1, 69, 70, - 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 84, 85, -1, -1, -1, -1, -1, - 91, -1, -1, 94, 95, -1, 97, 98, -1, 100, - -1, -1, -1, 104, -1, 106, 107, 108, -1, 110, - 111, 112, -1, 114, -1, -1, -1, -1, -1, -1, - 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, - -1, -1, -1, 134, 135, 136, 19, -1, 21, 22, - 23, 24, -1, -1, -1, -1, -1, 30, 31, 32, - 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, - -1, -1, -1, -1, -1, 48, 49, 50, 51, 52, - 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, - 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 84, 85, -1, -1, -1, -1, -1, 91, -1, - -1, 94, 95, -1, 97, 98, -1, 100, -1, -1, - -1, 104, -1, 106, 107, 108, -1, 110, 111, 112, - -1, 114, -1, -1, -1, -1, -1, -1, 3, 4, - 5, 6, 7, 8, 9, 10, 11, -1, -1, -1, - -1, 134, 135, 136, 19, -1, 21, 22, 23, 24, - -1, -1, -1, -1, -1, 30, 31, 32, 33, 34, - 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, - -1, -1, -1, 48, 49, 50, 51, 52, 53, 54, - 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, - -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 84, - 85, -1, -1, -1, -1, -1, 91, -1, -1, 94, - 95, -1, 97, 98, -1, 100, -1, -1, -1, 104, - -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, - -1, -1, -1, -1, -1, -1, 3, 4, 5, 6, - 7, 8, 9, 10, 11, -1, -1, -1, -1, 134, - 135, 136, 19, -1, 21, 22, 23, 24, -1, -1, - -1, -1, -1, 30, 31, 32, 33, 34, 35, 36, - -1, -1, 39, -1, -1, -1, -1, -1, -1, -1, - -1, 48, 49, 50, 51, 52, 53, 54, 55, 56, - -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, - 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 84, 85, -1, - -1, -1, -1, -1, 91, -1, -1, 94, 95, -1, - 97, 98, -1, 100, -1, -1, -1, 104, -1, 106, - 107, 108, -1, 110, 111, 112, -1, 114, -1, -1, - -1, -1, -1, -1, 3, 4, 5, 6, 7, 8, - 9, 10, 11, -1, -1, -1, -1, 134, 135, 136, - 19, -1, 21, 22, 23, 24, -1, -1, -1, -1, - -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, - 39, -1, -1, -1, -1, -1, -1, -1, -1, 48, - 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, - 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, - 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 84, 85, -1, -1, -1, - -1, -1, 91, -1, -1, 94, 95, -1, 97, 98, - -1, -1, -1, -1, -1, 104, -1, 106, 107, 108, - -1, 110, 111, 112, -1, 114, -1, -1, -1, -1, - -1, -1, 3, 4, 5, 6, 7, 8, 9, 10, - 11, -1, -1, -1, -1, 134, 135, 136, 19, -1, - 21, 22, 23, 24, -1, -1, -1, -1, -1, 30, - 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, - -1, -1, -1, -1, -1, -1, -1, 48, 49, 50, - 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, - -1, -1, 63, -1, -1, 66, 67, -1, 69, 70, - 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 84, 85, -1, -1, -1, -1, -1, - 91, -1, -1, 94, 95, -1, 97, 98, -1, -1, - -1, -1, -1, 104, -1, 106, 107, 108, -1, 110, - 111, 112, -1, 114, -1, -1, -1, -1, -1, -1, - 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, - -1, -1, -1, 134, 135, 136, 19, -1, 21, 22, - 23, 24, -1, -1, -1, -1, -1, 30, 31, 32, - 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, - -1, -1, -1, -1, -1, 48, 49, 50, 51, 52, - 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, - 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, + -1, 91, -1, -1, 94, 95, -1, 97, 98, -1, + -1, -1, -1, -1, 104, -1, 106, 107, 108, -1, + 110, 111, 112, -1, 114, -1, -1, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, -1, -1, -1, + -1, -1, -1, 19, 134, 21, 22, 23, 24, -1, + -1, -1, 142, -1, 30, 31, 32, 33, 34, 35, + 36, -1, -1, 39, -1, -1, -1, -1, -1, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, + 56, -1, 58, 59, 60, -1, -1, 63, -1, -1, + 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 84, 85, + -1, -1, -1, -1, -1, 91, -1, -1, 94, 95, + -1, 97, 98, -1, 100, -1, -1, -1, 104, -1, + 106, 107, 108, -1, 110, 111, 112, -1, 114, -1, + -1, -1, -1, -1, -1, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, -1, -1, -1, 134, 135, + 136, 19, -1, 21, 22, 23, 24, -1, -1, -1, + -1, -1, 30, 31, 32, 33, 34, 35, 36, -1, + -1, 39, -1, -1, -1, -1, -1, 45, -1, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, + 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, + -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 84, 85, -1, -1, + -1, -1, -1, 91, -1, -1, 94, 95, -1, 97, + 98, -1, 100, -1, -1, -1, 104, -1, 106, 107, + 108, -1, 110, 111, 112, -1, 114, -1, -1, -1, + -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, + 10, 11, -1, -1, -1, -1, 134, 135, 136, 19, + -1, 21, 22, 23, 24, -1, -1, -1, -1, -1, + 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, + -1, -1, -1, -1, -1, -1, -1, -1, 48, 49, + 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, + 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, + 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 84, 85, -1, -1, -1, -1, + -1, 91, -1, -1, 94, 95, -1, 97, 98, -1, + 100, -1, 102, 103, 104, -1, 106, 107, 108, -1, + 110, 111, 112, -1, 114, -1, -1, -1, -1, -1, + -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, + -1, -1, -1, -1, 134, 135, 136, 19, -1, 21, + 22, 23, 24, -1, -1, -1, -1, -1, 30, 31, + 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, + -1, -1, -1, -1, -1, -1, 48, 49, 50, 51, + 52, 53, 54, 55, 56, -1, 58, 59, 60, -1, + -1, 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 84, 85, -1, -1, -1, -1, -1, 91, -1, - -1, 94, 95, -1, 97, 98, -1, -1, -1, -1, - -1, 104, -1, 106, 107, 108, -1, 110, 111, 112, - -1, 114, -1, -1, -1, -1, -1, -1, 3, 4, - 5, 6, 7, 8, 9, 10, 11, -1, -1, -1, - -1, 134, 135, 136, 19, -1, 21, 22, 23, 24, - -1, -1, -1, -1, -1, 30, 31, 32, 33, 34, - 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, - -1, -1, -1, 48, 49, 50, 51, 52, 53, 54, - 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, - -1, 66, 67, -1, 69, -1, -1, -1, -1, -1, + -1, -1, 84, 85, -1, -1, -1, -1, -1, 91, + -1, -1, 94, 95, -1, 97, 98, -1, 100, -1, + 102, 103, 104, -1, 106, 107, 108, -1, 110, 111, + 112, -1, 114, -1, -1, -1, -1, -1, -1, 3, + 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, + -1, -1, 134, 135, 136, 19, -1, 21, 22, 23, + 24, -1, -1, -1, -1, -1, 30, 31, 32, 33, + 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, + -1, -1, -1, -1, 48, 49, 50, 51, 52, 53, + 54, 55, 56, -1, 58, 59, 60, -1, -1, 63, + -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 88, -1, -1, 91, -1, -1, 94, - 95, -1, 97, 98, -1, -1, -1, -1, -1, 104, - -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, - -1, -1, 3, 4, 5, 6, 7, 8, 9, 10, - 11, -1, -1, -1, -1, -1, -1, -1, 19, 134, - 21, 22, 23, 24, -1, -1, -1, -1, -1, 30, - 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, - -1, -1, -1, -1, -1, -1, -1, 48, 49, 50, - 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, - -1, -1, 63, -1, -1, 66, 67, -1, 69, -1, + 84, 85, -1, -1, -1, -1, -1, 91, -1, -1, + 94, 95, -1, 97, 98, -1, 100, -1, 102, 103, + 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, + 114, -1, -1, -1, -1, -1, -1, 3, 4, 5, + 6, 7, 8, 9, 10, 11, -1, -1, -1, -1, + 134, 135, 136, 19, -1, 21, 22, 23, 24, -1, + -1, -1, -1, -1, 30, 31, 32, 33, 34, 35, + 36, -1, -1, 39, -1, -1, -1, -1, -1, -1, + -1, -1, 48, 49, 50, 51, 52, 53, 54, 55, + 56, -1, 58, 59, 60, -1, -1, 63, -1, -1, + 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 84, 85, + -1, -1, -1, -1, -1, 91, -1, -1, 94, 95, + -1, 97, 98, -1, 100, -1, 102, 103, 104, -1, + 106, 107, 108, -1, 110, 111, 112, -1, 114, -1, + -1, -1, -1, -1, -1, 3, 4, 5, 6, 7, + 8, 9, 10, 11, -1, -1, -1, -1, 134, 135, + 136, 19, -1, 21, 22, 23, 24, -1, -1, -1, + -1, -1, 30, 31, 32, 33, 34, 35, 36, -1, + -1, 39, -1, -1, -1, -1, -1, -1, -1, -1, + 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, + 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, + -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 84, 85, -1, -1, + -1, -1, -1, 91, -1, -1, 94, 95, -1, 97, + 98, -1, 100, -1, 102, -1, 104, -1, 106, 107, + 108, -1, 110, 111, 112, -1, 114, -1, -1, -1, + -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, + 10, 11, -1, -1, -1, -1, 134, 135, 136, 19, + -1, 21, 22, 23, 24, -1, -1, -1, -1, -1, + 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, + -1, -1, -1, -1, -1, -1, -1, -1, 48, 49, + 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, + 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, + 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 84, 85, -1, -1, -1, -1, + -1, 91, -1, -1, 94, 95, -1, 97, 98, -1, + -1, -1, 102, 103, 104, -1, 106, 107, 108, -1, + 110, 111, 112, -1, 114, -1, -1, -1, -1, -1, + -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, + -1, -1, -1, -1, 134, 135, 136, 19, -1, 21, + 22, 23, 24, -1, -1, -1, -1, -1, 30, 31, + 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, + -1, -1, -1, -1, -1, -1, 48, 49, 50, 51, + 52, 53, 54, 55, 56, -1, 58, 59, 60, -1, + -1, 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 84, 85, -1, -1, -1, -1, -1, 91, + -1, -1, 94, 95, -1, 97, 98, -1, 100, -1, + 102, -1, 104, -1, 106, 107, 108, -1, 110, 111, + 112, -1, 114, -1, -1, -1, -1, -1, -1, 3, + 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, + -1, -1, 134, 135, 136, 19, -1, 21, 22, 23, + 24, -1, -1, -1, -1, -1, 30, 31, 32, 33, + 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, + -1, -1, -1, -1, 48, 49, 50, 51, 52, 53, + 54, 55, 56, -1, 58, 59, 60, -1, -1, 63, + -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 91, -1, -1, 94, 95, -1, 97, 98, -1, 100, - -1, -1, -1, 104, -1, 106, 107, 108, -1, 110, - 111, 112, -1, 114, -1, -1, 3, 4, 5, 6, - 7, 8, 9, 10, 11, -1, -1, -1, -1, -1, - -1, -1, 19, 134, 21, 22, 23, 24, -1, -1, - -1, -1, -1, 30, 31, 32, 33, 34, 35, 36, - -1, -1, 39, -1, -1, -1, -1, -1, -1, -1, - -1, 48, 49, 50, 51, 52, 53, 54, 55, 56, - -1, 58, 59, 60, -1, -1, 63, -1, -1, 66, - 67, -1, 69, -1, -1, -1, -1, -1, -1, -1, + 84, 85, -1, -1, -1, -1, -1, 91, -1, -1, + 94, 95, -1, 97, 98, -1, -1, -1, 102, -1, + 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, + 114, -1, -1, -1, -1, -1, -1, 3, 4, 5, + 6, 7, 8, 9, 10, 11, -1, -1, -1, -1, + 134, 135, 136, 19, -1, 21, 22, 23, 24, -1, + -1, -1, -1, -1, 30, 31, 32, 33, 34, 35, + 36, -1, -1, 39, -1, -1, -1, -1, -1, -1, + -1, -1, 48, 49, 50, 51, 52, 53, 54, 55, + 56, -1, 58, 59, 60, -1, -1, 63, -1, -1, + 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 84, 85, + -1, -1, -1, -1, -1, 91, -1, -1, 94, 95, + -1, 97, 98, -1, 100, -1, -1, -1, 104, -1, + 106, 107, 108, -1, 110, 111, 112, -1, 114, -1, + -1, -1, -1, -1, -1, 3, 4, 5, 6, 7, + 8, 9, 10, 11, -1, -1, -1, -1, 134, 135, + 136, 19, -1, 21, 22, 23, 24, -1, -1, -1, + -1, -1, 30, 31, 32, 33, 34, 35, 36, -1, + -1, 39, -1, -1, -1, -1, -1, -1, -1, -1, + 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, + 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, + -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 84, 85, -1, -1, + -1, -1, -1, 91, -1, -1, 94, 95, -1, 97, + 98, -1, 100, -1, -1, -1, 104, -1, 106, 107, + 108, -1, 110, 111, 112, -1, 114, -1, -1, -1, + -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, + 10, 11, -1, -1, -1, -1, 134, 135, 136, 19, + -1, 21, 22, 23, 24, -1, -1, -1, -1, -1, + 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, + -1, -1, -1, -1, -1, -1, -1, -1, 48, 49, + 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, + 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, + 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 84, 85, -1, -1, -1, -1, + -1, 91, -1, -1, 94, 95, -1, 97, 98, -1, + 100, -1, -1, -1, 104, -1, 106, 107, 108, -1, + 110, 111, 112, -1, 114, -1, -1, -1, -1, -1, + -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, + -1, -1, -1, -1, 134, 135, 136, 19, -1, 21, + 22, 23, 24, -1, -1, -1, -1, -1, 30, 31, + 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, + -1, -1, -1, -1, -1, -1, 48, 49, 50, 51, + 52, 53, 54, 55, 56, -1, 58, 59, 60, -1, + -1, 63, -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 91, -1, -1, 94, 95, -1, - 97, 98, -1, 100, -1, -1, -1, 104, -1, 106, - 107, 108, -1, 110, 111, 112, -1, 114, -1, -1, - 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, - -1, -1, -1, -1, -1, -1, 19, 134, 21, 22, - 23, 24, -1, -1, -1, -1, -1, 30, 31, 32, - 33, 34, 35, 36, -1, -1, 39, -1, -1, -1, - -1, -1, -1, -1, -1, 48, 49, 50, 51, 52, - 53, 54, 55, 56, -1, 58, 59, 60, -1, -1, - 63, -1, -1, 66, 67, -1, 69, -1, -1, -1, + -1, -1, 84, 85, -1, -1, -1, -1, -1, 91, + -1, -1, 94, 95, -1, 97, 98, -1, 100, -1, + -1, -1, 104, -1, 106, 107, 108, -1, 110, 111, + 112, -1, 114, -1, -1, -1, -1, -1, -1, 3, + 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, + -1, -1, 134, 135, 136, 19, -1, 21, 22, 23, + 24, -1, -1, -1, -1, -1, 30, 31, 32, 33, + 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, + -1, -1, -1, -1, 48, 49, 50, 51, 52, 53, + 54, 55, 56, -1, 58, 59, 60, -1, -1, 63, + -1, -1, 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 91, -1, - -1, 94, 95, -1, 97, 98, -1, -1, -1, -1, - -1, 104, -1, 106, 107, 108, -1, 110, 111, 112, - -1, 114, -1, -1, 3, 4, 5, 6, 7, 8, - 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, - 19, 134, 21, 22, 23, 24, -1, -1, -1, -1, - -1, 30, 31, 32, 33, 34, 35, 36, -1, -1, - 39, -1, -1, -1, -1, -1, -1, -1, -1, 48, - 49, 50, 51, 52, 53, 54, 55, 56, -1, 58, - 59, 60, -1, -1, 63, -1, -1, 66, 67, -1, - 69, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 84, 85, -1, -1, -1, -1, -1, 91, -1, -1, + 94, 95, -1, 97, 98, -1, 100, -1, -1, -1, + 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, + 114, -1, -1, -1, -1, -1, -1, 3, 4, 5, + 6, 7, 8, 9, 10, 11, -1, -1, -1, -1, + 134, 135, 136, 19, -1, 21, 22, 23, 24, -1, + -1, -1, -1, -1, 30, 31, 32, 33, 34, 35, + 36, -1, -1, 39, -1, -1, -1, -1, -1, -1, + -1, -1, 48, 49, 50, 51, 52, 53, 54, 55, + 56, -1, 58, 59, 60, -1, -1, 63, -1, -1, + 66, 67, -1, 69, 70, 71, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 84, 85, + -1, -1, -1, -1, -1, 91, -1, -1, 94, 95, + -1, 97, 98, -1, -1, -1, -1, -1, 104, -1, + 106, 107, 108, -1, 110, 111, 112, -1, 114, -1, + -1, -1, -1, -1, -1, 3, 4, 5, 6, 7, + 8, 9, 10, 11, -1, -1, -1, -1, 134, 135, + 136, 19, -1, 21, 22, 23, 24, -1, -1, -1, + -1, -1, 30, 31, 32, 33, 34, 35, 36, -1, + -1, 39, -1, -1, -1, -1, -1, -1, -1, -1, + 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, + 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, + -1, 69, 70, 71, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 84, 85, -1, -1, + -1, -1, -1, 91, -1, -1, 94, 95, -1, 97, + 98, -1, -1, -1, -1, -1, 104, -1, 106, 107, + 108, -1, 110, 111, 112, -1, 114, -1, -1, -1, + -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, + 10, 11, -1, -1, -1, -1, 134, 135, 136, 19, + -1, 21, 22, 23, 24, -1, -1, -1, -1, -1, + 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, + -1, -1, -1, -1, -1, -1, -1, -1, 48, 49, + 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, + 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, + 70, 71, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 84, 85, -1, -1, -1, -1, + -1, 91, -1, -1, 94, 95, -1, 97, 98, -1, + -1, -1, -1, -1, 104, -1, 106, 107, 108, -1, + 110, 111, 112, -1, 114, -1, -1, -1, -1, -1, + -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, + -1, -1, -1, -1, 134, 135, 136, 19, -1, 21, + 22, 23, 24, -1, -1, -1, -1, -1, 30, 31, + 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, + -1, -1, -1, -1, -1, -1, 48, 49, 50, 51, + 52, 53, 54, 55, 56, -1, 58, 59, 60, -1, + -1, 63, -1, -1, 66, 67, -1, 69, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 91, -1, -1, 94, 95, -1, 97, 98, - -1, -1, -1, -1, -1, 104, -1, 106, 107, 108, - -1, 110, 111, 112, -1, 114, -1, -1, 3, 4, - 5, 6, 7, 8, 9, 10, 11, -1, -1, -1, - -1, -1, -1, -1, 19, 134, 21, 22, 23, 24, - -1, -1, -1, -1, -1, 30, 31, 32, 33, 34, - 35, 36, -1, -1, 39, -1, -1, -1, -1, -1, - -1, -1, -1, 48, 49, 50, 51, 52, 53, 54, - 55, 56, -1, 58, 59, 60, -1, -1, 63, -1, - -1, 66, 67, -1, 69, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 88, -1, -1, 91, + -1, -1, 94, 95, -1, 97, 98, -1, -1, -1, + -1, -1, 104, -1, 106, 107, 108, -1, 110, 111, + 112, -1, 114, -1, -1, 3, 4, 5, 6, 7, + 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, + -1, 19, 134, 21, 22, 23, 24, -1, -1, -1, + -1, -1, 30, 31, 32, 33, 34, 35, 36, -1, + -1, 39, -1, -1, -1, -1, -1, -1, -1, -1, + 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, + 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, + -1, 69, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 91, -1, -1, 94, - 95, -1, 97, 98, -1, -1, -1, -1, -1, 104, - -1, 106, 107, 108, -1, 110, 111, 112, -1, 114, - -1, -1, 3, 4, 5, 6, 7, 8, 9, 10, - 11, -1, -1, -1, -1, -1, -1, -1, 19, 134, - 21, 22, 23, 24, -1, -1, -1, -1, -1, 30, - 31, 32, 33, 34, 35, 36, -1, -1, 39, -1, - -1, -1, -1, -1, -1, -1, -1, 48, 49, 50, - 51, 52, 53, 54, 55, 56, -1, 58, 59, 60, - -1, -1, 63, -1, -1, 66, 67, -1, 69, -1, + -1, -1, -1, 91, -1, -1, 94, 95, -1, 97, + 98, -1, 100, -1, -1, -1, 104, -1, 106, 107, + 108, -1, 110, 111, 112, -1, 114, -1, -1, 3, + 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, + -1, -1, -1, -1, -1, 19, 134, 21, 22, 23, + 24, -1, -1, -1, -1, -1, 30, 31, 32, 33, + 34, 35, 36, -1, -1, 39, -1, -1, -1, -1, + -1, -1, -1, -1, 48, 49, 50, 51, 52, 53, + 54, 55, 56, -1, 58, 59, 60, -1, -1, 63, + -1, -1, 66, 67, -1, 69, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 91, -1, -1, + 94, 95, -1, 97, 98, -1, 100, -1, -1, -1, + 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, + 114, -1, -1, 3, 4, 5, 6, 7, 8, 9, + 10, 11, -1, -1, -1, -1, -1, -1, -1, 19, + 134, 21, 22, 23, 24, -1, -1, -1, -1, -1, + 30, 31, 32, 33, 34, 35, 36, -1, -1, 39, + -1, -1, -1, -1, -1, -1, -1, -1, 48, 49, + 50, 51, 52, 53, 54, 55, 56, -1, 58, 59, + 60, -1, -1, 63, -1, -1, 66, 67, -1, 69, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 91, -1, -1, 94, 95, -1, 97, 98, -1, -1, - 51, 52, -1, 104, 55, 106, 107, 108, -1, 110, - 111, 112, -1, 114, -1, -1, -1, -1, -1, 70, - 71, 72, 73, 74, 75, 76, 77, -1, -1, 80, - 81, -1, -1, 134, -1, 86, 87, 88, 89, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 100, - 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 124, 125, 126, 127, 128, 129, 130, - 131, 132, 133, -1, 135, 136, 51, 52, -1, -1, - 55, -1, 143, 144, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 70, 71, 72, 73, 74, - 75, 76, 77, -1, -1, 80, 81, -1, -1, -1, - -1, 86, 87, 88, 89, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 100, 101, 102, -1, -1, + -1, 91, -1, -1, 94, 95, -1, 97, 98, -1, + -1, -1, -1, -1, 104, -1, 106, 107, 108, -1, + 110, 111, 112, -1, 114, -1, -1, 3, 4, 5, + 6, 7, 8, 9, 10, 11, -1, -1, -1, -1, + -1, -1, -1, 19, 134, 21, 22, 23, 24, -1, + -1, -1, -1, -1, 30, 31, 32, 33, 34, 35, + 36, -1, -1, 39, -1, -1, -1, -1, -1, -1, + -1, -1, 48, 49, 50, 51, 52, 53, 54, 55, + 56, -1, 58, 59, 60, -1, -1, 63, -1, -1, + 66, 67, -1, 69, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 124, - 125, 126, 127, 128, 129, 130, 131, 132, 133, -1, - 135, 136, 51, 52, -1, -1, 55, -1, 143, 144, + -1, -1, -1, -1, -1, 91, -1, -1, 94, 95, + -1, 97, 98, -1, -1, -1, -1, -1, 104, -1, + 106, 107, 108, -1, 110, 111, 112, -1, 114, -1, + -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, + -1, -1, -1, -1, -1, -1, -1, 19, 134, 21, + 22, 23, 24, -1, -1, -1, -1, -1, 30, 31, + 32, 33, 34, 35, 36, -1, -1, 39, -1, -1, + -1, -1, -1, -1, -1, -1, 48, 49, 50, 51, + 52, 53, 54, 55, 56, -1, 58, 59, 60, -1, + -1, 63, -1, -1, 66, 67, -1, 69, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 70, 71, 72, 73, 74, 75, 76, 77, -1, - -1, 80, 81, -1, -1, -1, -1, 86, 87, 88, - 89, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 100, 101, 102, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 91, + -1, -1, 94, 95, -1, 97, 98, -1, -1, -1, + -1, -1, 104, -1, 106, 107, 108, -1, 110, 111, + 112, -1, 114, -1, -1, 3, 4, 5, 6, 7, + 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, + -1, 19, 134, 21, 22, 23, 24, -1, -1, -1, + -1, -1, 30, 31, 32, 33, 34, 35, 36, -1, + -1, 39, -1, -1, -1, -1, -1, -1, -1, -1, + 48, 49, 50, 51, 52, 53, 54, 55, 56, -1, + 58, 59, 60, -1, -1, 63, -1, -1, 66, 67, + -1, 69, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 124, 125, 126, 127, 128, - 129, 130, 131, 132, 133, -1, 135, 136, 51, 52, - -1, -1, 55, -1, 143, 144, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 70, 71, 72, - 73, 74, 75, 76, 77, -1, -1, 80, 81, -1, - -1, -1, -1, 86, 87, 88, 89, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 100, 101, 102, + -1, -1, -1, 91, -1, -1, 94, 95, -1, 97, + 98, -1, -1, 51, 52, -1, 104, 55, 106, 107, + 108, -1, 110, 111, 112, -1, 114, -1, -1, -1, + -1, -1, 70, 71, 72, 73, 74, 75, 76, 77, + -1, -1, 80, 81, -1, -1, 134, -1, 86, 87, + 88, 89, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 100, 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, -1, 135, 136, 51, + 52, -1, -1, 55, -1, 143, 144, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 70, 71, + 72, 73, 74, 75, 76, 77, -1, -1, 80, 81, + -1, -1, -1, -1, 86, 87, 88, 89, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 100, 101, + 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 124, 125, 126, 127, 128, 129, 130, 131, 132, - 133, -1, 135, 136, 51, 52, -1, -1, 55, -1, - 143, 144, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 70, 71, 72, 73, 74, 75, 76, - 77, -1, -1, 80, 81, -1, -1, -1, -1, 86, - 87, 88, 89, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 100, 101, 102, -1, -1, -1, -1, + -1, -1, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, -1, 135, 136, 51, 52, -1, -1, 55, + -1, 143, 144, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 70, 71, 72, 73, 74, 75, + 76, 77, -1, -1, 80, 81, -1, -1, -1, -1, + 86, 87, 88, 89, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 100, 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 124, 125, 126, - 127, 128, 129, 130, 131, 132, 133, -1, 135, 136, - 51, 52, -1, -1, 55, -1, 143, 144, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 70, - 71, 72, 73, 74, 75, 76, 77, -1, -1, 80, - 81, -1, -1, -1, -1, 86, 87, 88, 89, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 100, - 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 124, 125, + 126, 127, 128, 129, 130, 131, 132, 133, -1, 135, + 136, 51, 52, -1, -1, 55, -1, 143, 144, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 124, 125, 126, 127, 128, 129, 130, - 131, 132, 133, -1, 135, 136, 51, 52, -1, -1, - 55, -1, 143, 144, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 70, 71, 72, 73, 74, - 75, 76, 77, -1, -1, 80, 81, -1, -1, -1, - -1, 86, 87, 88, 89, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 100, 101, 102, -1, -1, + 70, 71, 72, 73, 74, 75, 76, 77, -1, -1, + 80, 81, -1, -1, -1, -1, 86, 87, 88, 89, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 124, - 125, 126, 127, 128, 129, 130, 131, 132, 133, -1, - 135, 136, 51, 52, -1, -1, 55, -1, 143, 144, + 100, 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 70, 71, 72, 73, 74, 75, 76, 77, -1, - -1, 80, 81, -1, -1, -1, -1, 86, 87, 88, - 89, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 100, 101, 102, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 124, 125, 126, 127, 128, 129, + 130, 131, 132, 133, -1, 135, 136, 51, 52, -1, + -1, 55, -1, 143, 144, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 70, 71, 72, 73, + 74, 75, 76, 77, -1, -1, 80, 81, -1, -1, + -1, -1, 86, 87, 88, 89, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 100, 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 124, 125, 126, 127, 128, - 129, 130, 131, 132, 133, -1, 135, 136, 51, 52, - -1, -1, 55, -1, 143, 144, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 70, 71, 72, - 73, 74, 75, 76, 77, -1, -1, 80, 81, -1, - -1, -1, -1, 86, 87, 88, 89, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 100, 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, + -1, 135, 136, 51, 52, -1, -1, 55, -1, 143, + 144, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 70, 71, 72, 73, 74, 75, 76, 77, + -1, -1, 80, 81, -1, -1, -1, -1, 86, 87, + 88, 89, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 100, 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 124, 125, 126, 127, 128, 129, 130, 131, 132, - 133, -1, 135, 136, 51, 52, -1, -1, 55, -1, - 143, 144, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 70, 71, 72, 73, 74, 75, 76, - 77, -1, -1, 80, 81, -1, -1, -1, -1, 86, - 87, 88, 89, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 100, 101, 102, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, -1, 135, 136, 51, + 52, -1, -1, 55, -1, 143, 144, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 70, 71, + 72, 73, 74, 75, 76, 77, -1, -1, 80, 81, + -1, -1, -1, -1, 86, 87, 88, 89, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 100, 101, + 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 124, 125, 126, - 127, 128, 129, 130, 131, 132, 133, -1, 135, 136, - 51, 52, -1, -1, 55, -1, 143, 144, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 70, - 71, 72, 73, 74, 75, 76, 77, -1, -1, 80, - 81, -1, -1, -1, -1, 86, 87, 88, 89, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 100, - 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, -1, 135, 136, 51, 52, -1, -1, 55, + -1, 143, 144, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 70, 71, 72, 73, 74, 75, + 76, 77, -1, -1, 80, 81, -1, -1, -1, -1, + 86, 87, 88, 89, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 100, 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 124, 125, 126, 127, 128, 129, 130, - 131, 132, 133, -1, 135, 136, 51, 52, -1, -1, - 55, -1, 143, 144, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 70, 71, 72, 73, 74, - 75, 76, 77, -1, -1, 80, 81, -1, -1, -1, - -1, 86, 87, 88, 89, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 100, 101, 102, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 124, 125, + 126, 127, 128, 129, 130, 131, 132, 133, -1, 135, + 136, 51, 52, -1, -1, 55, -1, 143, 144, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 124, - 125, 126, 127, 128, 129, 130, 131, 132, 133, -1, - 135, 136, 51, 52, -1, -1, 55, -1, 143, 144, + 70, 71, 72, 73, 74, 75, 76, 77, -1, -1, + 80, 81, -1, -1, -1, -1, 86, 87, 88, 89, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 70, 71, 72, 73, 74, 75, 76, 77, -1, - -1, 80, 81, -1, -1, -1, -1, 86, 87, 88, - 89, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 100, 101, 102, -1, -1, -1, -1, -1, -1, + 100, 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 124, 125, 126, 127, 128, - 129, 130, 131, 132, 133, -1, 135, 136, 51, 52, - -1, -1, 55, -1, 143, 144, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 70, 71, 72, - 73, 74, 75, 76, 77, -1, -1, 80, 81, -1, - -1, -1, -1, 86, 87, 88, 89, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 100, 101, 102, + -1, -1, -1, -1, 124, 125, 126, 127, 128, 129, + 130, 131, 132, 133, -1, 135, 136, 51, 52, -1, + -1, 55, -1, 143, 144, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 70, 71, 72, 73, + 74, 75, 76, 77, -1, -1, 80, 81, -1, -1, + -1, -1, 86, 87, 88, 89, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 100, 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 124, 125, 126, 127, 128, 129, 130, 131, 132, - 133, -1, 135, 136, 51, 52, -1, -1, 55, -1, - 143, 144, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 70, 71, 72, 73, 74, 75, 76, - 77, -1, -1, 80, 81, -1, -1, -1, -1, 86, - 87, 88, 89, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 100, 101, 102, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 44, - -1, -1, -1, -1, -1, -1, -1, 124, 125, 126, - 127, 128, 129, 130, 131, 132, 133, -1, 135, 136, - -1, -1, -1, -1, -1, -1, 143, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, -1, - -1, -1, -1, 88, 89, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 101, -1, -1, -1, - -1, 44, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 122, -1, 124, - 125, 126, 127, 128, 129, 130, 131, 132, 133, 72, - 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, - 83, -1, -1, -1, -1, 88, 89, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 101, -1, + 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, + -1, 135, 136, 51, 52, -1, -1, 55, -1, 143, + 144, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 70, 71, 72, 73, 74, 75, 76, 77, + -1, -1, 80, 81, -1, -1, -1, -1, 86, 87, + 88, 89, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 100, 101, 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 122, - -1, 124, 125, 126, 127, 128, 129, 130, 131, 132, - 133, 72, 73, 74, 75, 76, 77, 78, 79, 80, - 81, 82, 83, -1, -1, -1, -1, 88, 89, -1, + -1, -1, -1, -1, -1, -1, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, -1, 135, 136, 51, + 52, -1, -1, 55, -1, 143, 144, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 70, 71, + 72, 73, 74, 75, 76, 77, -1, -1, 80, 81, + -1, -1, -1, -1, 86, 87, 88, 89, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 100, 101, + 102, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, -1, 135, 136, 51, 52, -1, -1, 55, + -1, 143, 144, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 70, 71, 72, 73, 74, 75, + 76, 77, -1, -1, 80, 81, -1, -1, -1, -1, + 86, 87, 88, 89, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 100, 101, 102, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 124, 125, + 126, 127, 128, 129, 130, 131, 132, 133, -1, 135, + 136, 51, 52, -1, -1, 55, -1, 143, 144, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 70, 71, 72, 73, 74, 75, 76, 77, -1, -1, + 80, 81, -1, -1, -1, -1, 86, 87, 88, 89, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 100, 101, 102, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 124, 125, 126, 127, 128, 129, + 130, 131, 132, 133, -1, 135, 136, 51, 52, -1, + -1, 55, -1, 143, 144, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 70, 71, 72, 73, + 74, 75, 76, 77, -1, -1, 80, 81, -1, -1, + -1, -1, 86, 87, 88, 89, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 100, 101, 102, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, + -1, 135, 136, 51, 52, -1, -1, 55, -1, 143, + 144, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 70, 71, 72, 73, 74, 75, 76, 77, + -1, -1, 80, 81, -1, -1, -1, -1, 86, 87, + 88, 89, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 100, 101, 102, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 44, -1, + -1, -1, -1, -1, -1, -1, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, -1, 135, 136, -1, + -1, -1, -1, -1, -1, 143, 72, 73, 74, 75, + 76, 77, 78, 79, 80, 81, 82, 83, -1, -1, + -1, -1, 88, 89, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 101, -1, -1, -1, -1, + 44, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 122, -1, 124, 125, + 126, 127, 128, 129, 130, 131, 132, 133, 72, 73, + 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, + -1, -1, -1, -1, 88, 89, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 101, -1, -1, + -1, -1, 44, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 122, -1, + 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, + 82, 83, -1, -1, -1, -1, 88, 89, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 101, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 101, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 122, -1, 124, 125, 126, 127, 128, 129, 130, - 131, 132, 133, -1, -1, -1, -1, -1, -1, -1, - -1, 142, 72, 73, 74, 75, 76, 77, 78, 79, + 122, -1, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, -1, -1, -1, -1, 88, 89, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 101, -1, -1, -1, -1, -1, -1, -1, -1, @@ -5118,290 +6243,321 @@ static const yytype_int16 yycheck[] = 128, 129, 130, 131, 132, 133, -1, -1, -1, -1, -1, -1, -1, -1, 142, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, -1, -1, -1, - -1, 88, 89, -1, -1, -1, 93, -1, -1, -1, + -1, 88, 89, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 101, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 122, -1, 124, 125, 126, - 127, 128, 129, 130, 131, 132, 133, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, -1, - -1, -1, -1, 88, 89, -1, -1, -1, 93, -1, - -1, -1, -1, -1, -1, -1, 101, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 122, -1, 124, - 125, 126, 127, 128, 129, 130, 131, 132, 133, 72, - 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, - 83, -1, -1, -1, -1, 88, 89, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 101, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 122, - -1, 124, 125, 126, 127, 128, 129, 130, 131, 132, - 133, 72, 73, 74, 75, 76, 77, 78, 79, 80, - 81, 82, 83, -1, -1, -1, -1, 88, 89, -1, + 127, 128, 129, 130, 131, 132, 133, -1, -1, -1, + -1, -1, -1, -1, -1, 142, 72, 73, 74, 75, + 76, 77, 78, 79, 80, 81, 82, 83, -1, -1, + -1, -1, 88, 89, -1, -1, -1, 93, -1, -1, + -1, -1, -1, -1, -1, 101, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 101, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 122, -1, 124, 125, + 126, 127, 128, 129, 130, 131, 132, 133, 72, 73, + 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, + -1, -1, -1, -1, 88, 89, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 101, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 124, 125, 126, 127, 128, 129, 130, - 131, 132, 133 + -1, -1, -1, -1, -1, -1, -1, -1, 122, -1, + 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, + 82, 83, -1, -1, -1, -1, 88, 89, 72, 73, + 74, 75, 76, 77, -1, -1, 80, 81, -1, 101, + -1, -1, -1, -1, 88, 89, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 101, -1, -1, + -1, -1, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, -1, -1, -1, -1, -1, -1, -1, -1, + 124, 125, 126, 127, 128, 129, 130, 131, 132, 133 }; - /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing - symbol of state STATE-NUM. */ +/* YYSTOS[STATE-NUM] -- The symbol kind of the accessing symbol of + state STATE-NUM. */ static const yytype_int16 yystos[] = { - 0, 150, 151, 0, 1, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 19, 21, 22, 23, 24, - 30, 31, 32, 33, 34, 35, 36, 39, 45, 46, - 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, - 58, 59, 60, 63, 66, 67, 69, 70, 71, 84, - 85, 91, 94, 95, 97, 98, 100, 104, 106, 107, - 108, 110, 111, 112, 114, 134, 135, 136, 152, 153, - 154, 159, 161, 163, 164, 165, 168, 169, 172, 173, - 175, 176, 177, 179, 180, 189, 203, 220, 241, 242, - 252, 253, 254, 258, 259, 260, 266, 267, 268, 270, - 271, 272, 273, 274, 275, 311, 324, 154, 21, 22, + 0, 150, 151, 1, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 19, 21, 22, 23, 24, 30, + 31, 32, 33, 34, 35, 36, 39, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 58, + 59, 60, 63, 66, 67, 69, 70, 71, 84, 85, + 91, 94, 95, 97, 98, 100, 104, 106, 107, 108, + 110, 111, 112, 114, 134, 135, 136, 152, 153, 154, + 160, 161, 163, 166, 168, 170, 171, 174, 175, 177, + 178, 179, 181, 182, 191, 205, 222, 243, 244, 275, + 276, 277, 281, 282, 283, 289, 290, 291, 293, 294, + 295, 296, 297, 298, 334, 347, 0, 154, 21, 22, 30, 31, 32, 39, 51, 55, 69, 88, 91, 94, - 134, 164, 165, 181, 182, 203, 220, 272, 275, 311, - 182, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 134, 166, 168, 183, 184, 205, 222, 295, 298, 334, + 184, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 45, 46, 47, 48, 49, 50, 51, 52, 55, 70, 71, 72, 73, 74, 75, 76, 77, 80, 81, 86, 87, 88, 89, 100, 101, 102, 124, 125, 126, 127, 128, 129, 130, 131, 132, - 133, 135, 136, 143, 144, 183, 187, 188, 274, 306, - 204, 91, 163, 164, 165, 167, 180, 189, 220, 272, - 273, 275, 167, 210, 212, 69, 91, 173, 180, 220, - 225, 272, 275, 33, 34, 35, 36, 48, 49, 50, - 51, 55, 106, 183, 184, 185, 268, 115, 118, 119, - 146, 148, 167, 262, 263, 264, 317, 321, 322, 323, - 51, 69, 100, 102, 103, 135, 172, 189, 195, 198, - 201, 254, 309, 310, 195, 195, 144, 192, 193, 196, - 197, 324, 192, 196, 144, 318, 184, 155, 138, 189, - 220, 189, 189, 189, 55, 1, 94, 157, 158, 159, - 174, 175, 324, 205, 207, 190, 201, 309, 324, 189, - 308, 309, 324, 91, 142, 179, 220, 272, 275, 208, - 53, 54, 56, 63, 107, 183, 269, 63, 64, 65, - 116, 117, 255, 256, 61, 255, 62, 255, 63, 255, - 63, 255, 58, 59, 168, 189, 189, 317, 323, 40, + 133, 135, 136, 143, 144, 185, 189, 190, 297, 329, + 206, 91, 163, 166, 168, 169, 182, 222, 295, 296, + 298, 169, 212, 214, 69, 91, 175, 182, 222, 227, + 295, 298, 33, 34, 35, 36, 48, 49, 50, 51, + 55, 106, 185, 186, 187, 291, 115, 118, 119, 146, + 148, 169, 285, 286, 287, 340, 344, 345, 346, 51, + 69, 100, 102, 103, 135, 174, 191, 197, 200, 203, + 277, 332, 333, 197, 197, 144, 194, 195, 198, 199, + 347, 194, 199, 144, 341, 186, 155, 138, 191, 222, + 191, 191, 191, 55, 1, 94, 157, 158, 160, 176, + 177, 347, 207, 209, 192, 203, 332, 347, 191, 331, + 332, 347, 91, 142, 181, 222, 295, 298, 210, 53, + 54, 56, 63, 69, 107, 185, 292, 63, 64, 65, + 116, 117, 278, 279, 61, 278, 62, 278, 63, 278, + 63, 278, 58, 59, 170, 191, 191, 340, 346, 40, 41, 42, 43, 44, 37, 38, 51, 53, 54, 55, 56, 69, 83, 94, 100, 101, 102, 103, 128, 131, - 144, 278, 279, 280, 281, 282, 285, 286, 287, 288, - 290, 291, 292, 293, 295, 296, 297, 300, 301, 302, - 303, 304, 324, 278, 280, 28, 239, 121, 142, 94, - 100, 176, 121, 72, 73, 74, 75, 76, 77, 78, - 79, 80, 81, 82, 83, 88, 89, 93, 101, 122, - 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, - 90, 105, 140, 147, 315, 90, 315, 316, 26, 138, - 243, 254, 92, 92, 192, 196, 243, 163, 51, 55, - 181, 58, 59, 279, 125, 276, 90, 140, 315, 219, - 307, 90, 147, 314, 156, 157, 55, 278, 278, 16, - 221, 321, 121, 90, 140, 315, 92, 92, 221, 167, - 167, 55, 90, 140, 315, 25, 107, 142, 265, 317, - 115, 264, 20, 246, 321, 57, 57, 189, 189, 189, - 93, 142, 199, 200, 324, 57, 199, 200, 85, 194, - 195, 201, 309, 324, 195, 163, 317, 319, 163, 322, - 160, 138, 157, 90, 315, 92, 159, 174, 145, 317, - 323, 319, 159, 319, 141, 200, 320, 323, 200, 320, - 139, 320, 55, 176, 177, 178, 142, 90, 140, 315, - 144, 237, 290, 295, 63, 255, 257, 261, 262, 63, - 256, 61, 62, 63, 63, 101, 101, 154, 167, 167, - 167, 167, 159, 163, 163, 57, 121, 57, 321, 294, - 85, 290, 295, 121, 156, 189, 142, 305, 324, 51, - 142, 305, 321, 142, 289, 189, 142, 289, 51, 142, - 289, 51, 121, 156, 240, 100, 168, 189, 201, 202, - 174, 142, 179, 142, 161, 162, 168, 180, 189, 191, - 202, 220, 275, 189, 189, 189, 189, 189, 189, 189, - 189, 189, 189, 189, 189, 189, 189, 51, 189, 189, - 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, - 51, 52, 55, 187, 192, 312, 313, 194, 201, 51, - 52, 55, 187, 192, 312, 51, 55, 312, 245, 244, - 162, 189, 191, 162, 191, 99, 170, 217, 277, 216, - 51, 55, 181, 312, 194, 312, 156, 163, 166, 15, - 13, 248, 324, 121, 121, 157, 16, 51, 55, 194, - 51, 55, 157, 27, 222, 321, 222, 51, 55, 194, - 51, 55, 214, 186, 157, 246, 189, 201, 15, 189, - 189, 189, 318, 100, 189, 198, 309, 189, 310, 319, - 145, 317, 200, 200, 319, 145, 184, 152, 139, 191, - 319, 159, 206, 309, 176, 178, 51, 55, 194, 51, - 55, 290, 209, 142, 63, 157, 262, 189, 189, 51, - 69, 100, 226, 295, 319, 319, 142, 172, 189, 15, - 51, 69, 282, 287, 304, 85, 288, 293, 300, 302, - 295, 297, 302, 51, 295, 172, 189, 15, 79, 126, - 231, 232, 324, 189, 200, 319, 178, 142, 44, 121, - 44, 90, 140, 315, 318, 92, 92, 192, 196, 141, - 200, 92, 92, 193, 196, 193, 196, 231, 231, 171, - 321, 167, 156, 141, 15, 319, 183, 189, 202, 249, - 324, 18, 224, 324, 17, 223, 224, 92, 92, 141, - 92, 92, 224, 211, 213, 141, 167, 184, 139, 15, - 200, 221, 189, 199, 85, 309, 139, 319, 320, 141, - 234, 318, 29, 113, 238, 139, 142, 292, 319, 142, - 85, 44, 44, 305, 321, 142, 289, 142, 289, 142, - 289, 142, 289, 289, 44, 44, 228, 230, 233, 281, - 283, 284, 287, 295, 296, 298, 299, 302, 304, 156, - 100, 189, 178, 159, 189, 51, 55, 194, 51, 55, - 57, 123, 162, 191, 168, 191, 170, 92, 162, 191, - 162, 191, 170, 243, 239, 156, 157, 231, 218, 321, - 15, 93, 250, 324, 157, 14, 251, 324, 167, 15, - 92, 15, 157, 157, 222, 189, 157, 319, 200, 145, - 146, 156, 157, 227, 142, 100, 319, 189, 189, 295, - 302, 295, 295, 189, 189, 234, 234, 91, 220, 142, - 305, 305, 142, 229, 220, 142, 229, 142, 229, 15, - 189, 141, 189, 189, 162, 191, 15, 139, 157, 156, - 91, 180, 220, 272, 275, 221, 157, 221, 15, 15, - 215, 224, 246, 247, 51, 235, 236, 291, 15, 139, - 295, 295, 142, 292, 289, 142, 289, 289, 289, 126, - 126, 55, 90, 283, 287, 142, 228, 229, 299, 302, - 295, 298, 302, 295, 139, 15, 55, 90, 140, 315, - 157, 157, 157, 142, 318, 142, 295, 142, 295, 51, - 55, 305, 142, 229, 142, 229, 142, 229, 142, 229, - 229, 51, 55, 194, 51, 55, 248, 223, 15, 236, - 295, 289, 295, 302, 295, 295, 141, 229, 142, 229, - 229, 229, 295, 229 + 144, 301, 302, 303, 304, 305, 308, 309, 310, 311, + 313, 314, 315, 316, 318, 319, 320, 323, 324, 325, + 326, 327, 347, 301, 303, 28, 242, 121, 142, 94, + 100, 178, 121, 25, 72, 73, 74, 75, 76, 77, + 78, 79, 80, 81, 82, 83, 88, 89, 93, 101, + 122, 124, 125, 126, 127, 128, 129, 130, 131, 132, + 133, 90, 105, 140, 147, 338, 90, 338, 339, 26, + 138, 246, 277, 92, 92, 194, 199, 246, 163, 51, + 55, 183, 58, 59, 302, 125, 299, 90, 140, 338, + 221, 330, 90, 147, 337, 156, 157, 55, 301, 301, + 16, 223, 344, 121, 90, 140, 338, 92, 92, 223, + 169, 169, 55, 90, 140, 338, 25, 107, 142, 288, + 340, 115, 287, 20, 248, 344, 57, 57, 191, 191, + 191, 93, 142, 201, 202, 347, 57, 201, 202, 85, + 196, 197, 203, 332, 347, 197, 163, 340, 342, 163, + 345, 159, 138, 157, 90, 338, 92, 160, 176, 145, + 340, 346, 342, 157, 342, 141, 202, 343, 346, 202, + 343, 139, 343, 55, 178, 179, 180, 142, 90, 140, + 338, 144, 239, 313, 318, 63, 278, 280, 284, 285, + 63, 279, 61, 62, 63, 63, 101, 101, 154, 169, + 169, 169, 169, 160, 163, 163, 57, 121, 57, 344, + 317, 85, 313, 318, 121, 156, 191, 142, 328, 347, + 51, 142, 328, 344, 142, 312, 191, 142, 312, 51, + 142, 312, 34, 51, 121, 156, 241, 100, 170, 191, + 203, 204, 176, 142, 181, 142, 161, 162, 170, 182, + 191, 193, 204, 222, 298, 165, 191, 191, 191, 191, + 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, + 164, 191, 191, 191, 191, 191, 191, 191, 191, 191, + 191, 191, 191, 51, 52, 55, 189, 194, 335, 336, + 196, 203, 51, 52, 55, 189, 194, 335, 51, 55, + 335, 247, 245, 162, 191, 193, 162, 193, 99, 173, + 219, 300, 218, 51, 55, 183, 335, 196, 335, 156, + 163, 167, 15, 13, 271, 347, 121, 121, 157, 16, + 51, 55, 196, 51, 55, 157, 27, 224, 344, 224, + 51, 55, 196, 51, 55, 216, 188, 157, 25, 248, + 191, 203, 15, 191, 191, 191, 341, 100, 191, 200, + 332, 191, 333, 342, 145, 340, 202, 202, 342, 145, + 186, 152, 139, 193, 342, 160, 208, 332, 178, 180, + 51, 55, 196, 51, 55, 313, 211, 142, 63, 157, + 285, 191, 191, 51, 69, 100, 228, 318, 342, 342, + 142, 174, 191, 15, 51, 69, 305, 310, 327, 85, + 311, 316, 323, 325, 318, 320, 325, 51, 318, 174, + 191, 15, 79, 126, 233, 235, 347, 191, 202, 342, + 180, 142, 44, 121, 44, 90, 140, 338, 34, 35, + 36, 51, 55, 91, 97, 98, 100, 102, 127, 254, + 255, 257, 258, 259, 260, 263, 264, 265, 267, 268, + 269, 270, 276, 290, 294, 254, 341, 92, 92, 194, + 199, 141, 202, 92, 92, 195, 199, 195, 199, 233, + 233, 172, 344, 169, 156, 141, 15, 342, 185, 191, + 204, 272, 347, 18, 226, 347, 17, 225, 226, 92, + 92, 141, 92, 92, 226, 213, 215, 141, 169, 186, + 139, 254, 15, 202, 223, 191, 201, 85, 332, 139, + 342, 343, 141, 236, 341, 29, 113, 240, 139, 142, + 315, 342, 142, 85, 44, 44, 328, 344, 142, 312, + 142, 312, 142, 312, 142, 312, 312, 44, 44, 230, + 232, 234, 304, 306, 307, 310, 318, 319, 321, 322, + 325, 327, 156, 100, 191, 180, 160, 191, 51, 55, + 196, 51, 55, 57, 55, 51, 141, 257, 261, 262, + 263, 51, 139, 266, 267, 269, 51, 34, 51, 51, + 257, 263, 142, 93, 126, 142, 90, 142, 57, 123, + 162, 193, 170, 193, 173, 92, 162, 193, 162, 193, + 173, 246, 242, 156, 157, 233, 220, 344, 15, 93, + 273, 347, 157, 14, 274, 347, 169, 15, 92, 15, + 157, 157, 224, 40, 41, 223, 191, 157, 342, 202, + 145, 146, 156, 157, 229, 142, 100, 342, 191, 191, + 318, 325, 318, 318, 191, 191, 236, 236, 91, 222, + 142, 328, 328, 142, 231, 222, 142, 231, 142, 231, + 15, 191, 141, 257, 141, 142, 142, 139, 142, 142, + 142, 51, 259, 256, 257, 55, 268, 269, 191, 191, + 162, 193, 15, 139, 157, 156, 91, 182, 222, 295, + 298, 223, 157, 223, 15, 15, 217, 169, 169, 157, + 226, 248, 249, 51, 237, 238, 314, 15, 139, 318, + 318, 142, 315, 312, 142, 312, 312, 312, 126, 126, + 55, 90, 306, 310, 142, 230, 231, 322, 325, 318, + 321, 325, 318, 257, 263, 262, 269, 256, 142, 139, + 15, 55, 90, 140, 338, 157, 157, 157, 223, 223, + 25, 226, 250, 142, 341, 142, 318, 142, 318, 51, + 55, 328, 142, 231, 142, 231, 142, 231, 142, 231, + 231, 142, 142, 257, 51, 55, 196, 51, 55, 271, + 225, 15, 157, 157, 254, 15, 238, 318, 312, 318, + 325, 318, 318, 262, 263, 141, 250, 250, 251, 252, + 253, 231, 142, 231, 231, 231, 142, 15, 15, 223, + 40, 41, 318, 157, 169, 169, 231, 250, 223, 223, + 157, 157, 250, 250 }; - /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ +/* YYR1[RULE-NUM] -- Symbol kind of the left-hand side of rule RULE-NUM. */ static const yytype_int16 yyr1[] = { - 0, 149, 151, 150, 152, 153, 153, 153, 153, 154, - 155, 154, 156, 157, 158, 158, 158, 158, 160, 159, - 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, - 159, 159, 159, 159, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 162, 162, 162, 163, - 163, 163, 163, 163, 163, 164, 166, 165, 167, 168, - 168, 169, 169, 171, 170, 172, 172, 172, 172, 172, - 172, 172, 172, 172, 172, 172, 173, 173, 174, 174, - 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, - 176, 176, 177, 177, 178, 178, 179, 179, 179, 179, - 179, 179, 179, 179, 180, 180, 180, 180, 180, 180, - 180, 180, 180, 181, 181, 182, 182, 182, 183, 183, - 183, 183, 183, 184, 184, 185, 186, 185, 187, 187, - 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, - 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, - 187, 187, 187, 187, 187, 187, 187, 187, 188, 188, - 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, - 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, - 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, - 188, 188, 188, 188, 188, 188, 188, 188, 189, 189, - 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, + 0, 149, 150, 151, 152, 153, 153, 153, 153, 154, + 155, 154, 156, 157, 158, 158, 158, 158, 159, 160, + 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161, 162, 162, 162, 163, 163, + 163, 163, 163, 164, 163, 165, 163, 163, 166, 167, + 168, 169, 170, 170, 171, 171, 172, 173, 174, 174, + 174, 174, 174, 174, 174, 174, 174, 174, 174, 175, + 175, 176, 176, 177, 177, 177, 177, 177, 177, 177, + 177, 177, 177, 178, 178, 179, 179, 180, 180, 181, + 181, 181, 181, 181, 181, 181, 181, 182, 182, 182, + 182, 182, 182, 182, 182, 182, 183, 183, 184, 184, + 184, 185, 185, 185, 185, 185, 186, 186, 187, 188, + 187, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, - 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, - 189, 189, 189, 189, 189, 189, 189, 189, 189, 190, - 190, 190, 190, 191, 191, 192, 192, 192, 193, 193, - 194, 194, 194, 194, 194, 195, 195, 195, 195, 195, - 197, 196, 198, 198, 199, 199, 200, 201, 201, 201, - 201, 201, 202, 202, 202, 203, 203, 203, 203, 203, - 203, 203, 203, 203, 204, 203, 205, 206, 203, 207, - 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, - 203, 203, 203, 208, 209, 203, 203, 203, 210, 211, - 203, 212, 213, 203, 203, 203, 214, 215, 203, 216, - 203, 217, 218, 203, 219, 203, 203, 203, 203, 203, - 203, 203, 220, 221, 221, 221, 222, 222, 223, 223, - 224, 224, 225, 225, 226, 226, 226, 226, 226, 226, - 226, 226, 227, 226, 228, 228, 228, 228, 229, 229, - 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, - 230, 230, 230, 230, 230, 231, 231, 233, 232, 232, - 232, 234, 234, 235, 235, 236, 236, 237, 237, 238, - 238, 240, 239, 241, 241, 241, 241, 242, 242, 242, - 242, 242, 242, 242, 242, 242, 244, 243, 245, 243, - 246, 247, 247, 248, 248, 249, 249, 249, 250, 250, - 251, 251, 252, 252, 252, 252, 253, 253, 254, 254, - 254, 254, 255, 255, 256, 257, 256, 256, 256, 258, - 258, 259, 259, 260, 261, 261, 262, 262, 263, 263, - 264, 265, 264, 266, 266, 267, 267, 268, 269, 269, - 269, 269, 269, 269, 270, 270, 271, 271, 271, 271, - 272, 272, 272, 272, 272, 273, 273, 274, 274, 274, - 274, 274, 274, 274, 274, 275, 275, 276, 277, 276, - 278, 278, 279, 279, 279, 280, 280, 280, 280, 281, - 281, 282, 282, 283, 283, 284, 284, 285, 285, 286, - 286, 287, 287, 288, 288, 288, 288, 289, 289, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 291, 291, 291, 291, 291, 292, - 292, 293, 294, 293, 295, 295, 296, 297, 298, 299, - 299, 300, 300, 301, 301, 302, 302, 303, 303, 304, - 304, 305, 305, 306, 307, 306, 308, 308, 309, 309, - 310, 310, 310, 310, 310, 310, 310, 310, 311, 311, - 311, 312, 312, 312, 312, 313, 313, 313, 314, 314, - 315, 315, 316, 316, 317, 317, 318, 318, 319, 320, - 320, 320, 321, 321, 322, 322, 323, 323, 324 + 189, 190, 190, 190, 190, 190, 190, 190, 190, 190, + 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, + 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, + 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, + 190, 191, 191, 191, 191, 191, 191, 191, 191, 191, + 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, + 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, + 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, + 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, + 191, 191, 192, 192, 192, 192, 193, 193, 194, 194, + 194, 195, 195, 196, 196, 196, 196, 196, 197, 197, + 197, 197, 197, 198, 199, 200, 200, 201, 201, 202, + 203, 203, 203, 203, 203, 203, 204, 204, 204, 205, + 205, 205, 205, 205, 205, 205, 205, 206, 205, 207, + 208, 205, 209, 205, 205, 205, 205, 205, 205, 205, + 205, 205, 205, 205, 205, 205, 210, 211, 205, 205, + 205, 212, 213, 205, 214, 215, 205, 205, 205, 205, + 205, 205, 216, 217, 205, 218, 205, 219, 220, 205, + 221, 205, 205, 205, 205, 205, 205, 205, 222, 223, + 223, 223, 224, 224, 225, 225, 226, 226, 227, 227, + 228, 228, 228, 228, 228, 228, 228, 228, 229, 228, + 230, 230, 230, 230, 231, 231, 232, 232, 232, 232, + 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, + 232, 233, 233, 234, 235, 235, 235, 236, 236, 237, + 237, 238, 238, 239, 239, 240, 240, 241, 242, 243, + 243, 243, 243, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 245, 246, 247, 246, 248, 249, 249, 250, + 251, 250, 252, 250, 253, 250, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 255, 255, 256, 256, 257, + 257, 258, 258, 259, 259, 259, 259, 259, 259, 259, + 259, 259, 259, 259, 260, 260, 261, 261, 261, 261, + 261, 261, 262, 262, 263, 263, 264, 264, 264, 265, + 265, 266, 266, 266, 267, 267, 268, 268, 269, 269, + 269, 270, 271, 271, 272, 272, 272, 273, 273, 274, + 274, 275, 275, 275, 275, 276, 276, 277, 277, 277, + 277, 278, 278, 279, 280, 279, 279, 279, 281, 281, + 282, 282, 283, 284, 284, 285, 285, 286, 286, 287, + 288, 287, 289, 289, 290, 290, 290, 291, 292, 292, + 292, 292, 292, 292, 293, 293, 294, 294, 294, 294, + 295, 295, 295, 295, 295, 296, 296, 297, 297, 297, + 297, 297, 297, 297, 297, 297, 298, 298, 299, 300, + 299, 301, 301, 302, 302, 302, 303, 303, 303, 303, + 304, 304, 305, 305, 306, 306, 307, 307, 308, 308, + 309, 309, 310, 310, 311, 311, 311, 311, 312, 312, + 312, 313, 313, 313, 313, 313, 313, 313, 313, 313, + 313, 313, 313, 313, 313, 313, 314, 314, 314, 314, + 314, 315, 315, 316, 317, 316, 318, 318, 319, 320, + 321, 322, 322, 323, 323, 324, 324, 325, 325, 326, + 326, 327, 327, 327, 328, 328, 328, 329, 330, 329, + 331, 331, 332, 332, 333, 333, 333, 333, 333, 333, + 333, 333, 334, 334, 334, 335, 335, 335, 335, 336, + 336, 336, 337, 337, 338, 338, 339, 339, 340, 340, + 341, 341, 342, 343, 343, 343, 344, 344, 345, 345, + 346, 346, 347 }; - /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */ +/* YYR2[RULE-NUM] -- Number of symbols on the right-hand side of rule RULE-NUM. */ static const yytype_int8 yyr2[] = { 0, 2, 0, 2, 2, 1, 1, 3, 2, 1, 0, 5, 4, 2, 1, 1, 3, 2, 0, 4, 2, 3, 3, 3, 3, 3, 4, 1, 3, 3, - 3, 3, 3, 1, 3, 3, 6, 5, 5, 5, - 5, 4, 6, 4, 6, 3, 1, 3, 1, 1, - 3, 3, 3, 2, 1, 2, 0, 5, 1, 1, - 1, 1, 4, 0, 5, 2, 3, 4, 5, 4, - 5, 2, 2, 2, 2, 2, 1, 3, 1, 3, - 1, 2, 3, 5, 2, 4, 2, 4, 1, 3, - 1, 3, 2, 3, 1, 2, 1, 4, 3, 3, - 3, 3, 2, 1, 1, 4, 3, 3, 3, 3, - 2, 1, 1, 1, 1, 2, 1, 3, 1, 1, - 1, 1, 1, 1, 1, 1, 0, 4, 1, 1, + 3, 3, 1, 3, 3, 6, 5, 5, 5, 5, + 4, 6, 4, 6, 3, 1, 3, 1, 1, 3, + 3, 3, 2, 0, 4, 0, 4, 1, 2, 0, + 5, 1, 1, 1, 1, 4, 0, 5, 2, 3, + 4, 5, 4, 5, 2, 2, 2, 2, 2, 1, + 3, 1, 3, 1, 2, 3, 5, 2, 4, 2, + 4, 1, 3, 1, 3, 2, 3, 1, 2, 1, + 4, 3, 3, 3, 3, 2, 1, 1, 4, 3, + 3, 3, 3, 2, 1, 1, 1, 1, 2, 1, + 3, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, - 6, 5, 5, 5, 5, 4, 3, 3, 2, 2, - 3, 2, 2, 3, 3, 3, 3, 3, 3, 4, - 4, 2, 2, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, - 3, 3, 6, 6, 4, 6, 4, 6, 1, 1, - 2, 4, 2, 1, 3, 3, 5, 3, 1, 1, - 1, 2, 2, 4, 2, 1, 2, 2, 4, 1, - 0, 2, 2, 1, 2, 1, 2, 1, 1, 2, - 3, 4, 3, 4, 2, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 0, 4, 0, 0, 5, 0, - 3, 3, 3, 2, 3, 3, 1, 2, 4, 3, - 2, 1, 2, 0, 0, 5, 6, 6, 0, 0, - 7, 0, 0, 7, 5, 4, 0, 0, 9, 0, - 6, 0, 0, 8, 0, 5, 4, 4, 1, 1, - 1, 1, 1, 1, 1, 2, 1, 1, 1, 5, - 1, 2, 1, 1, 1, 4, 6, 3, 5, 2, - 4, 1, 0, 4, 4, 2, 2, 1, 2, 0, - 6, 8, 4, 6, 4, 3, 6, 2, 4, 6, - 2, 4, 2, 4, 1, 1, 1, 0, 4, 1, - 4, 1, 4, 1, 3, 1, 1, 4, 1, 3, - 3, 0, 5, 2, 4, 5, 5, 2, 4, 4, - 3, 3, 3, 2, 1, 4, 0, 5, 0, 5, - 5, 1, 1, 6, 1, 1, 1, 1, 2, 1, - 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, - 2, 3, 1, 2, 1, 0, 4, 1, 2, 2, - 3, 2, 3, 1, 1, 2, 1, 2, 1, 2, - 1, 0, 4, 2, 3, 1, 4, 2, 1, 1, + 1, 3, 3, 6, 5, 5, 5, 5, 4, 3, + 3, 2, 2, 3, 2, 2, 3, 3, 3, 3, + 3, 3, 4, 4, 2, 2, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, + 2, 3, 3, 3, 3, 6, 6, 4, 6, 4, + 6, 1, 1, 2, 4, 2, 1, 3, 3, 5, + 3, 1, 1, 1, 2, 2, 4, 2, 1, 2, + 2, 4, 1, 0, 2, 2, 1, 2, 1, 2, + 1, 1, 2, 3, 3, 4, 3, 4, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 0, 4, 0, + 0, 5, 0, 3, 3, 3, 2, 3, 3, 1, + 2, 4, 3, 2, 1, 2, 0, 0, 5, 6, + 6, 0, 0, 7, 0, 0, 7, 5, 4, 9, + 11, 11, 0, 0, 9, 0, 6, 0, 0, 8, + 0, 5, 4, 4, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 1, 1, 5, 1, 2, 1, 1, + 1, 4, 6, 3, 5, 2, 4, 1, 0, 4, + 4, 2, 2, 1, 2, 0, 6, 8, 4, 6, + 4, 3, 6, 2, 4, 6, 2, 4, 2, 4, + 1, 1, 1, 0, 4, 1, 4, 1, 4, 1, + 3, 1, 1, 4, 1, 3, 3, 0, 5, 2, + 4, 5, 5, 2, 4, 4, 3, 3, 3, 2, + 1, 4, 0, 5, 0, 5, 5, 1, 1, 1, + 0, 6, 0, 8, 0, 8, 1, 2, 2, 4, + 1, 3, 1, 3, 1, 2, 3, 1, 3, 1, + 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 3, 2, 1, 3, 5, 1, + 3, 5, 1, 3, 2, 1, 1, 3, 2, 3, + 2, 1, 3, 1, 1, 3, 3, 2, 2, 2, + 1, 1, 6, 1, 1, 1, 1, 2, 1, 2, + 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, + 3, 1, 2, 1, 0, 4, 1, 2, 2, 3, + 2, 3, 1, 1, 2, 1, 2, 1, 2, 1, + 0, 4, 2, 3, 1, 4, 2, 2, 1, 1, 1, 1, 1, 2, 2, 3, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 0, 0, 4, - 1, 1, 3, 5, 3, 1, 2, 4, 2, 2, - 2, 2, 1, 2, 1, 1, 3, 1, 3, 1, - 1, 2, 1, 4, 2, 2, 1, 2, 0, 6, - 8, 4, 6, 4, 6, 2, 4, 6, 2, 4, - 2, 4, 1, 0, 1, 1, 1, 1, 1, 1, - 1, 1, 0, 4, 1, 3, 2, 2, 2, 1, - 3, 1, 3, 1, 1, 2, 1, 1, 1, 2, - 1, 2, 1, 1, 0, 4, 1, 2, 1, 3, - 3, 3, 2, 2, 3, 3, 2, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 0, 1, 0, 2, 2, 0, - 1, 1, 1, 1, 1, 1, 1, 2, 0 + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, + 4, 1, 1, 3, 5, 3, 1, 2, 4, 2, + 2, 2, 2, 1, 2, 1, 1, 3, 1, 3, + 1, 1, 2, 1, 4, 2, 2, 1, 2, 1, + 0, 6, 8, 4, 6, 4, 6, 2, 4, 6, + 2, 4, 2, 4, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 4, 1, 3, 2, 2, + 2, 1, 3, 1, 3, 1, 1, 2, 1, 1, + 1, 2, 2, 1, 2, 1, 1, 1, 0, 4, + 1, 2, 1, 3, 3, 3, 2, 2, 3, 3, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, + 0, 2, 2, 0, 1, 1, 1, 1, 1, 1, + 1, 2, 0 }; +enum { YYENOMEM = -2 }; + #define yyerrok (yyerrstatus = 0) #define yyclearin (yychar = YYEMPTY) -#define YYEMPTY (-2) -#define YYEOF 0 #define YYACCEPT goto yyacceptlab #define YYABORT goto yyabortlab #define YYERROR goto yyerrorlab +#define YYNOMEM goto yyexhaustedlab #define YYRECOVERING() (!!yyerrstatus) @@ -5418,15 +6574,40 @@ static const yytype_int8 yyr2[] = } \ else \ { \ - yyerror (p, YY_("syntax error: cannot back up")); \ + yyerror (&yylloc, p, YY_("syntax error: cannot back up")); \ YYERROR; \ } \ while (0) -/* Error token number */ -#define YYTERROR 1 -#define YYERRCODE 256 +/* Backward compatibility with an undocumented macro. + Use YYerror or YYUNDEF. */ +#define YYERRCODE YYUNDEF +/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N]. + If N is 0, then set CURRENT to the empty location which ends + the previous symbol: RHS[0] (always defined). */ + +#ifndef YYLLOC_DEFAULT +# define YYLLOC_DEFAULT(Current, Rhs, N) \ + do \ + if (N) \ + { \ + (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ + } \ + else \ + { \ + (Current).first_line = (Current).last_line = \ + YYRHSLOC (Rhs, 0).last_line; \ + (Current).first_column = (Current).last_column = \ + YYRHSLOC (Rhs, 0).last_column; \ + } \ + while (0) +#endif + +#define YYRHSLOC(Rhs, K) ((Rhs)[K]) /* Enable debugging if requested. */ @@ -5443,19 +6624,73 @@ do { \ YYFPRINTF Args; \ } while (0) -/* This macro is provided for backward compatibility. */ -#ifndef YY_LOCATION_PRINT -# define YY_LOCATION_PRINT(File, Loc) ((void) 0) -#endif +/* YYLOCATION_PRINT -- Print the location on the stream. + This macro was not mandated originally: define only if we know + we won't break user code: when these are the locations we know. */ + +# ifndef YYLOCATION_PRINT + +# if defined YY_LOCATION_PRINT -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ + /* Temporary convenience wrapper in case some people defined the + undocumented and private YY_LOCATION_PRINT macros. */ +# define YYLOCATION_PRINT(File, Loc, p) YY_LOCATION_PRINT(File, *(Loc), p) + +# elif defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL + +/* Print *YYLOCP on YYO. Private, do not rely on its existence. */ + +YY_ATTRIBUTE_UNUSED +static int +yy_location_print_ (FILE *yyo, YYLTYPE const * const yylocp) +{ + int res = 0; + int end_col = 0 != yylocp->last_column ? yylocp->last_column - 1 : 0; + if (0 <= yylocp->first_line) + { + res += YYFPRINTF (yyo, "%d", yylocp->first_line); + if (0 <= yylocp->first_column) + res += YYFPRINTF (yyo, ".%d", yylocp->first_column); + } + if (0 <= yylocp->last_line) + { + if (yylocp->first_line < yylocp->last_line) + { + res += YYFPRINTF (yyo, "-%d", yylocp->last_line); + if (0 <= end_col) + res += YYFPRINTF (yyo, ".%d", end_col); + } + else if (0 <= end_col && yylocp->first_column < end_col) + res += YYFPRINTF (yyo, "-%d", end_col); + } + return res; +} + +# define YYLOCATION_PRINT yy_location_print_ + + /* Temporary convenience wrapper in case some people defined the + undocumented and private YY_LOCATION_PRINT macros. */ +# define YY_LOCATION_PRINT(File, Loc, p) YYLOCATION_PRINT(File, &(Loc), p) + +# else + +# define YYLOCATION_PRINT(File, Loc, p) ((void) 0) + /* Temporary convenience wrapper in case some people defined the + undocumented and private YY_LOCATION_PRINT macros. */ +# define YY_LOCATION_PRINT YYLOCATION_PRINT + +# endif +# endif /* !defined YYLOCATION_PRINT */ + + +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location, p) \ do { \ if (yydebug) \ { \ YYFPRINTF (stderr, "%s ", Title); \ yy_symbol_print (stderr, \ - Type, Value, p); \ + Kind, Value, Location, p); \ YYFPRINTF (stderr, "\n"); \ } \ } while (0) @@ -5466,19 +6701,21 @@ do { \ `-----------------------------------*/ static void -yy_symbol_value_print (FILE *yyo, int yytype, YYSTYPE const * const yyvaluep, parser_state *p) +yy_symbol_value_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, YYLTYPE const * const yylocationp, parser_state *p) { FILE *yyoutput = yyo; - YYUSE (yyoutput); - YYUSE (p); + YY_USE (yyoutput); + YY_USE (yylocationp); + YY_USE (p); if (!yyvaluep) return; -# ifdef YYPRINT - if (yytype < YYNTOKENS) - YYPRINT (yyo, yytoknum[yytype], *yyvaluep); -# endif YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN - YYUSE (yytype); +switch (yykind) + { + default: + break; + } YY_IGNORE_MAYBE_UNINITIALIZED_END } @@ -5488,12 +6725,15 @@ yy_symbol_value_print (FILE *yyo, int yytype, YYSTYPE const * const yyvaluep, pa `---------------------------*/ static void -yy_symbol_print (FILE *yyo, int yytype, YYSTYPE const * const yyvaluep, parser_state *p) +yy_symbol_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, YYLTYPE const * const yylocationp, parser_state *p) { YYFPRINTF (yyo, "%s %s (", - yytype < YYNTOKENS ? "token" : "nterm", yytname[yytype]); + yykind < YYNTOKENS ? "token" : "nterm", yysymbol_name (yykind)); - yy_symbol_value_print (yyo, yytype, yyvaluep, p); + YYLOCATION_PRINT (yyo, yylocationp, p); + YYFPRINTF (yyo, ": "); + yy_symbol_value_print (yyo, yykind, yyvaluep, yylocationp, p); YYFPRINTF (yyo, ")"); } @@ -5503,7 +6743,7 @@ yy_symbol_print (FILE *yyo, int yytype, YYSTYPE const * const yyvaluep, parser_s `------------------------------------------------------------------*/ static void -yy_stack_print (yy_state_t *yybottom, yy_state_t *yytop) +yy_stack_print (yy_state_t *yybottom, yy_state_t *yytop, parser_state *p) { YYFPRINTF (stderr, "Stack now"); for (; yybottom <= yytop; yybottom++) @@ -5514,10 +6754,10 @@ yy_stack_print (yy_state_t *yybottom, yy_state_t *yytop) YYFPRINTF (stderr, "\n"); } -# define YY_STACK_PRINT(Bottom, Top) \ +# define YY_STACK_PRINT(Bottom, Top, p) \ do { \ if (yydebug) \ - yy_stack_print ((Bottom), (Top)); \ + yy_stack_print ((Bottom), (Top), p); \ } while (0) @@ -5526,7 +6766,8 @@ do { \ `------------------------------------------------*/ static void -yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp, int yyrule, parser_state *p) +yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp, YYLTYPE *yylsp, + int yyrule, parser_state *p) { int yylno = yyrline[yyrule]; int yynrhs = yyr2[yyrule]; @@ -5538,27 +6779,29 @@ yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp, int yyrule, parser_state *p) { YYFPRINTF (stderr, " $%d = ", yyi + 1); yy_symbol_print (stderr, - yystos[+yyssp[yyi + 1 - yynrhs]], - &yyvsp[(yyi + 1) - (yynrhs)] - , p); + YY_ACCESSING_SYMBOL (+yyssp[yyi + 1 - yynrhs]), + &yyvsp[(yyi + 1) - (yynrhs)], + &(yylsp[(yyi + 1) - (yynrhs)]), p); YYFPRINTF (stderr, "\n"); } } -# define YY_REDUCE_PRINT(Rule) \ +# define YY_REDUCE_PRINT(Rule, p) \ do { \ if (yydebug) \ - yy_reduce_print (yyssp, yyvsp, Rule, p); \ + yy_reduce_print (yyssp, yyvsp, yylsp, Rule, p); \ } while (0) /* Nonzero means print parse trace. It is left uninitialized so that multiple parsers can coexist. */ +#ifndef yydebug int yydebug; +#endif #else /* !YYDEBUG */ -# define YYDPRINTF(Args) -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) -# define YY_STACK_PRINT(Bottom, Top) -# define YY_REDUCE_PRINT(Rule) +# define YYDPRINTF(Args) ((void) 0) +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location, p) +# define YY_STACK_PRINT(Bottom, Top, p) +# define YY_REDUCE_PRINT(Rule, p) #endif /* !YYDEBUG */ @@ -5579,12 +6822,61 @@ int yydebug; #endif -#if YYERROR_VERBOSE +/* Context of a parse error. */ +typedef struct +{ + yy_state_t *yyssp; + yysymbol_kind_t yytoken; + YYLTYPE *yylloc; +} yypcontext_t; -# ifndef yystrlen -# if defined __GLIBC__ && defined _STRING_H -# define yystrlen(S) (YY_CAST (YYPTRDIFF_T, strlen (S))) -# else +/* Put in YYARG at most YYARGN of the expected tokens given the + current YYCTX, and return the number of tokens stored in YYARG. If + YYARG is null, return the number of expected tokens (guaranteed to + be less than YYNTOKENS). Return YYENOMEM on memory exhaustion. + Return 0 if there are more than YYARGN expected tokens, yet fill + YYARG up to YYARGN. */ +static int +yypcontext_expected_tokens (const yypcontext_t *yyctx, + yysymbol_kind_t yyarg[], int yyargn) +{ + /* Actual size of YYARG. */ + int yycount = 0; + int yyn = yypact[+*yyctx->yyssp]; + if (!yypact_value_is_default (yyn)) + { + /* Start YYX at -YYN if negative to avoid negative indexes in + YYCHECK. In other words, skip the first -YYN actions for + this state because they are default actions. */ + int yyxbegin = yyn < 0 ? -yyn : 0; + /* Stay within bounds of both yycheck and yytname. */ + int yychecklim = YYLAST - yyn + 1; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; + int yyx; + for (yyx = yyxbegin; yyx < yyxend; ++yyx) + if (yycheck[yyx + yyn] == yyx && yyx != YYSYMBOL_YYerror + && !yytable_value_is_error (yytable[yyx + yyn])) + { + if (!yyarg) + ++yycount; + else if (yycount == yyargn) + return 0; + else + yyarg[yycount++] = YY_CAST (yysymbol_kind_t, yyx); + } + } + if (yyarg && yycount == 0 && 0 < yyargn) + yyarg[0] = YYSYMBOL_YYEMPTY; + return yycount; +} + + + + +#ifndef yystrlen +# if defined __GLIBC__ && defined _STRING_H +# define yystrlen(S) (YY_CAST (YYPTRDIFF_T, strlen (S))) +# else /* Return the length of YYSTR. */ static YYPTRDIFF_T yystrlen (const char *yystr) @@ -5594,13 +6886,13 @@ yystrlen (const char *yystr) continue; return yylen; } -# endif # endif +#endif -# ifndef yystpcpy -# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE -# define yystpcpy stpcpy -# else +#ifndef yystpcpy +# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE +# define yystpcpy stpcpy +# else /* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in YYDEST. */ static char * @@ -5614,10 +6906,10 @@ yystpcpy (char *yydest, const char *yysrc) return yyd - 1; } -# endif # endif +#endif -# ifndef yytnamerr +#ifndef yytnamerr /* Copy to YYRES the contents of YYSTR after stripping away unnecessary quotes and backslashes, so that it's suitable for yyerror. The heuristic is that double-quoting is unnecessary unless the string @@ -5632,7 +6924,6 @@ yytnamerr (char *yyres, const char *yystr) { YYPTRDIFF_T yyn = 0; char const *yyp = yystr; - for (;;) switch (*++yyp) { @@ -5666,31 +6957,15 @@ yytnamerr (char *yyres, const char *yystr) else return yystrlen (yystr); } -# endif +#endif -/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message - about the unexpected token YYTOKEN for the state stack whose top is - YYSSP. - Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is - not large enough to hold the message. In that case, also set - *YYMSG_ALLOC to the required number of bytes. Return 2 if the - required number of bytes is too large to store. */ static int -yysyntax_error (YYPTRDIFF_T *yymsg_alloc, char **yymsg, - yy_state_t *yyssp, int yytoken) +yy_syntax_error_arguments (const yypcontext_t *yyctx, + yysymbol_kind_t yyarg[], int yyargn) { - enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; - /* Internationalized format string. */ - const char *yyformat = YY_NULLPTR; - /* Arguments of yyformat: reported tokens (one for the "unexpected", - one per "expected"). */ - char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; /* Actual size of YYARG. */ int yycount = 0; - /* Cumulated lengths of YYARG. */ - YYPTRDIFF_T yysize = 0; - /* There are many possibilities here to consider: - If this state is a consistent state with a default action, then the only way this function was invoked is if the default action @@ -5714,52 +6989,54 @@ yysyntax_error (YYPTRDIFF_T *yymsg_alloc, char **yymsg, one exception: it will still contain any token that will not be accepted due to an error action in a later state. */ - if (yytoken != YYEMPTY) + if (yyctx->yytoken != YYSYMBOL_YYEMPTY) { - int yyn = yypact[+*yyssp]; - YYPTRDIFF_T yysize0 = yytnamerr (YY_NULLPTR, yytname[yytoken]); - yysize = yysize0; - yyarg[yycount++] = yytname[yytoken]; - if (!yypact_value_is_default (yyn)) - { - /* Start YYX at -YYN if negative to avoid negative indexes in - YYCHECK. In other words, skip the first -YYN actions for - this state because they are default actions. */ - int yyxbegin = yyn < 0 ? -yyn : 0; - /* Stay within bounds of both yycheck and yytname. */ - int yychecklim = YYLAST - yyn + 1; - int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; - int yyx; - - for (yyx = yyxbegin; yyx < yyxend; ++yyx) - if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR - && !yytable_value_is_error (yytable[yyx + yyn])) - { - if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) - { - yycount = 1; - yysize = yysize0; - break; - } - yyarg[yycount++] = yytname[yyx]; - { - YYPTRDIFF_T yysize1 - = yysize + yytnamerr (YY_NULLPTR, yytname[yyx]); - if (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM) - yysize = yysize1; - else - return 2; - } - } - } + int yyn; + if (yyarg) + yyarg[yycount] = yyctx->yytoken; + ++yycount; + yyn = yypcontext_expected_tokens (yyctx, + yyarg ? yyarg + 1 : yyarg, yyargn - 1); + if (yyn == YYENOMEM) + return YYENOMEM; + else + yycount += yyn; } + return yycount; +} + +/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message + about the unexpected token YYTOKEN for the state stack whose top is + YYSSP. + + Return 0 if *YYMSG was successfully written. Return -1 if *YYMSG is + not large enough to hold the message. In that case, also set + *YYMSG_ALLOC to the required number of bytes. Return YYENOMEM if the + required number of bytes is too large to store. */ +static int +yysyntax_error (YYPTRDIFF_T *yymsg_alloc, char **yymsg, + const yypcontext_t *yyctx, parser_state *p) +{ + enum { YYARGS_MAX = 5 }; + /* Internationalized format string. */ + const char *yyformat = YY_NULLPTR; + /* Arguments of yyformat: reported tokens (one for the "unexpected", + one per "expected"). */ + yysymbol_kind_t yyarg[YYARGS_MAX]; + /* Cumulated lengths of YYARG. */ + YYPTRDIFF_T yysize = 0; + + /* Actual size of YYARG. */ + int yycount = yy_syntax_error_arguments (yyctx, yyarg, YYARGS_MAX); + if (yycount == YYENOMEM) + return YYENOMEM; switch (yycount) { -# define YYCASE_(N, S) \ +#define YYCASE_(N, S) \ case N: \ yyformat = S; \ - break + break default: /* Avoid compiler warnings. */ YYCASE_(0, YY_("syntax error")); YYCASE_(1, YY_("syntax error, unexpected %s")); @@ -5767,17 +7044,23 @@ yysyntax_error (YYPTRDIFF_T *yymsg_alloc, char **yymsg, YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s")); YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s")); YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s")); -# undef YYCASE_ +#undef YYCASE_ } + /* Compute error message size. Don't count the "%s"s, but reserve + room for the terminator. */ + yysize = yystrlen (yyformat) - 2 * yycount + 1; { - /* Don't count the "%s"s in the final size, but reserve room for - the terminator. */ - YYPTRDIFF_T yysize1 = yysize + (yystrlen (yyformat) - 2 * yycount) + 1; - if (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM) - yysize = yysize1; - else - return 2; + int yyi; + for (yyi = 0; yyi < yycount; ++yyi) + { + YYPTRDIFF_T yysize1 + = yysize + yytnamerr (YY_NULLPTR, yytname[yyarg[yyi]]); + if (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM) + yysize = yysize1; + else + return YYENOMEM; + } } if (*yymsg_alloc < yysize) @@ -5786,7 +7069,7 @@ yysyntax_error (YYPTRDIFF_T *yymsg_alloc, char **yymsg, if (! (yysize <= *yymsg_alloc && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; - return 1; + return -1; } /* Avoid sprintf, as that infringes on the user's name space. @@ -5798,7 +7081,7 @@ yysyntax_error (YYPTRDIFF_T *yymsg_alloc, char **yymsg, while ((*yyp = *yyformat) != '\0') if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) { - yyp += yytnamerr (yyp, yyarg[yyi++]); + yyp += yytnamerr (yyp, yytname[yyarg[yyi++]]); yyformat += 2; } else @@ -5809,4309 +7092,4926 @@ yysyntax_error (YYPTRDIFF_T *yymsg_alloc, char **yymsg, } return 0; } -#endif /* YYERROR_VERBOSE */ -/*-----------------------------------------------. -| Release the memory associated to this symbol. | -`-----------------------------------------------*/ -static void -yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep, parser_state *p) -{ - YYUSE (yyvaluep); - YYUSE (p); - if (!yymsg) - yymsg = "Deleting"; - YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +static void +yydestruct (const char *yymsg, + yysymbol_kind_t yykind, YYSTYPE *yyvaluep, YYLTYPE *yylocationp, parser_state *p) +{ + YY_USE (yyvaluep); + YY_USE (yylocationp); + YY_USE (p); + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yykind, yyvaluep, yylocationp, p); + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + switch (yykind) + { + default: + break; + } + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + + + + + +/*----------. +| yyparse. | +`----------*/ + +int +yyparse (parser_state *p) +{ +/* Lookahead token kind. */ +int yychar; + + +/* The semantic value of the lookahead symbol. */ +/* Default value used for initialization, for pacifying older GCCs + or non-GCC compilers. */ +#ifdef __cplusplus +static const YYSTYPE yyval_default = {}; +(void) yyval_default; +#else +YY_INITIAL_VALUE (static const YYSTYPE yyval_default;) +#endif +YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); + +/* Location data for the lookahead symbol. */ +static const YYLTYPE yyloc_default +# if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL + = { 1, 1, 1, 1 } +# endif +; +YYLTYPE yylloc = yyloc_default; + + /* Number of syntax errors so far. */ + int yynerrs = 0; + YY_USE (yynerrs); /* Silence compiler warning. */ + + yy_state_fast_t yystate = 0; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus = 0; + + /* Refer to the stacks through separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* Their size. */ + YYPTRDIFF_T yystacksize = YYINITDEPTH; + + /* The state stack: array, bottom, top. */ + yy_state_t yyssa[YYINITDEPTH]; + yy_state_t *yyss = yyssa; + yy_state_t *yyssp = yyss; + + /* The semantic value stack: array, bottom, top. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs = yyvsa; + YYSTYPE *yyvsp = yyvs; + + /* The location stack: array, bottom, top. */ + YYLTYPE yylsa[YYINITDEPTH]; + YYLTYPE *yyls = yylsa; + YYLTYPE *yylsp = yyls; + + int yyn; + /* The return value of yyparse. */ + int yyresult; + /* Lookahead symbol kind. */ + yysymbol_kind_t yytoken = YYSYMBOL_YYEMPTY; + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + YYLTYPE yyloc; + + /* The locations where the error started and ended. */ + YYLTYPE yyerror_range[3]; + + /* Buffer for error messages, and its allocated size. */ + char yymsgbuf[128]; + char *yymsg = yymsgbuf; + YYPTRDIFF_T yymsg_alloc = sizeof yymsgbuf; + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N), yylsp -= (N)) + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yychar = YYEMPTY; /* Cause a token to be read. */ + + + +#line 7216 "mrbgems/mruby-compiler/core/y.tab.c" + + yylsp[0] = yylloc; + goto yysetstate; + + +/*------------------------------------------------------------. +| yynewstate -- push a new state, which is found in yystate. | +`------------------------------------------------------------*/ +yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; + + +/*--------------------------------------------------------------------. +| yysetstate -- set current state (the top of the stack) to yystate. | +`--------------------------------------------------------------------*/ +yysetstate: + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + YY_ASSERT (0 <= yystate && yystate < YYNSTATES); + YY_IGNORE_USELESS_CAST_BEGIN + *yyssp = YY_CAST (yy_state_t, yystate); + YY_IGNORE_USELESS_CAST_END + YY_STACK_PRINT (yyss, yyssp, p); + + if (yyss + yystacksize - 1 <= yyssp) +#if !defined yyoverflow && !defined YYSTACK_RELOCATE + YYNOMEM; +#else + { + /* Get the current used size of the three stacks, in elements. */ + YYPTRDIFF_T yysize = yyssp - yyss + 1; + +# if defined yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + yy_state_t *yyss1 = yyss; + YYSTYPE *yyvs1 = yyvs; + YYLTYPE *yyls1 = yyls; + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * YYSIZEOF (*yyssp), + &yyvs1, yysize * YYSIZEOF (*yyvsp), + &yyls1, yysize * YYSIZEOF (*yylsp), + &yystacksize); + yyss = yyss1; + yyvs = yyvs1; + yyls = yyls1; + } +# else /* defined YYSTACK_RELOCATE */ + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + YYNOMEM; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + yy_state_t *yyss1 = yyss; + union yyalloc *yyptr = + YY_CAST (union yyalloc *, + YYSTACK_ALLOC (YY_CAST (YYSIZE_T, YYSTACK_BYTES (yystacksize)))); + if (! yyptr) + YYNOMEM; + YYSTACK_RELOCATE (yyss_alloc, yyss); + YYSTACK_RELOCATE (yyvs_alloc, yyvs); + YYSTACK_RELOCATE (yyls_alloc, yyls); +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + yylsp = yyls + yysize - 1; + + YY_IGNORE_USELESS_CAST_BEGIN + YYDPRINTF ((stderr, "Stack size increased to %ld\n", + YY_CAST (long, yystacksize))); + YY_IGNORE_USELESS_CAST_END + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } +#endif /* !defined yyoverflow && !defined YYSTACK_RELOCATE */ + + + if (yystate == YYFINAL) + YYACCEPT; + + goto yybackup; + + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + /* Do appropriate processing given the current state. Read a + lookahead token if we need one and don't already have one. */ + + /* First try to decide what to do without reference to lookahead token. */ + yyn = yypact[yystate]; + if (yypact_value_is_default (yyn)) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either empty, or end-of-input, or a valid lookahead. */ + if (yychar == YYEMPTY) + { + YYDPRINTF ((stderr, "Reading a token\n")); + yychar = yylex (&yylval, &yylloc, p); + } + + if (yychar <= YYEOF) + { + yychar = YYEOF; + yytoken = YYSYMBOL_YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else if (yychar == YYerror) + { + /* The scanner already issued an error message, process directly + to error recovery. But do not keep the error token as + lookahead, it is too special and may lead us to an endless + loop in error recovery. */ + yychar = YYUNDEF; + yytoken = YYSYMBOL_YYerror; + yyerror_range[1] = yylloc; + goto yyerrlab1; + } + else + { + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc, p); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yytable_value_is_error (yyn)) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + /* Shift the lookahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc, p); + yystate = yyn; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + *++yylsp = yylloc; + + + /* Discard the shifted token. */ + yychar = YYEMPTY; + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + '$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; - YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN - YYUSE (yytype); - YY_IGNORE_MAYBE_UNINITIALIZED_END -} + /* Default location. */ + YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen); + yyerror_range[1] = yyloc; + YY_REDUCE_PRINT (yyn, p); + switch (yyn) + { + case 2: /* $@1: %empty */ +#line 2182 "mrbgems/mruby-compiler/core/parse.y" + { + p->lstate = EXPR_BEG; + if (!p->locals) p->locals = cons(0,0); + } +#line 7434 "mrbgems/mruby-compiler/core/y.tab.c" + break; + case 3: /* program: $@1 top_compstmt */ +#line 2187 "mrbgems/mruby-compiler/core/parse.y" + { + p->tree = new_scope(p, (yyvsp[0].nd)); + } +#line 7442 "mrbgems/mruby-compiler/core/y.tab.c" + break; + case 4: /* top_compstmt: top_stmts opt_terms */ +#line 2193 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = (yyvsp[-1].nd); + } +#line 7450 "mrbgems/mruby-compiler/core/y.tab.c" + break; -/*----------. -| yyparse. | -`----------*/ + case 5: /* top_stmts: none */ +#line 2199 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_stmts(p, 0); + } +#line 7458 "mrbgems/mruby-compiler/core/y.tab.c" + break; -int -yyparse (parser_state *p) -{ -/* The lookahead symbol. */ -int yychar; + case 6: /* top_stmts: top_stmt */ +#line 2203 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_stmts(p, (yyvsp[0].nd)); + } +#line 7466 "mrbgems/mruby-compiler/core/y.tab.c" + break; + case 7: /* top_stmts: top_stmts terms top_stmt */ +#line 2207 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = stmts_push(p, (yyvsp[-2].nd), newline_node((yyvsp[0].nd))); + } +#line 7474 "mrbgems/mruby-compiler/core/y.tab.c" + break; -/* The semantic value of the lookahead symbol. */ -/* Default value used for initialization, for pacifying older GCCs - or non-GCC compilers. */ -YY_INITIAL_VALUE (static YYSTYPE yyval_default;) -YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); + case 8: /* top_stmts: error top_stmt */ +#line 2211 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_stmts(p, 0); + } +#line 7482 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* Number of syntax errors so far. */ - int yynerrs; + case 10: /* @2: %empty */ +#line 2218 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = local_switch(p); + nvars_block(p); + } +#line 7491 "mrbgems/mruby-compiler/core/y.tab.c" + break; - yy_state_fast_t yystate; - /* Number of tokens to shift before error messages enabled. */ - int yyerrstatus; + case 11: /* top_stmt: "'BEGIN'" @2 '{' top_compstmt '}' */ +#line 2223 "mrbgems/mruby-compiler/core/parse.y" + { + yyerror(&(yylsp[-4]), p, "BEGIN not supported"); + local_resume(p, (yyvsp[-3].nd)); + nvars_unnest(p); + (yyval.nd) = 0; + } +#line 7502 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* The stacks and their tools: - 'yyss': related to states. - 'yyvs': related to semantic values. + case 12: /* bodystmt: compstmt opt_rescue opt_else opt_ensure */ +#line 2235 "mrbgems/mruby-compiler/core/parse.y" + { + if ((yyvsp[-2].nd)) { + (yyval.nd) = new_rescue(p, (yyvsp[-3].nd), (yyvsp[-2].nd), (yyvsp[-1].nd)); + } + else if ((yyvsp[-1].nd)) { + yywarning(p, "else without rescue is useless"); + (yyval.nd) = stmts_push(p, (yyvsp[-3].nd), (yyvsp[-1].nd)); + } + else { + (yyval.nd) = (yyvsp[-3].nd); + } + if ((yyvsp[0].nd)) { + if ((yyval.nd)) { + (yyval.nd) = new_ensure(p, (yyval.nd), (yyvsp[0].nd)); + } + else { + (yyval.nd) = push((yyvsp[0].nd), new_nil(p)); + } + } + } +#line 7527 "mrbgems/mruby-compiler/core/y.tab.c" + break; - Refer to the stacks through separate pointers, to allow yyoverflow - to reallocate them elsewhere. */ + case 13: /* compstmt: stmts opt_terms */ +#line 2258 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = (yyvsp[-1].nd); + } +#line 7535 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* The state stack. */ - yy_state_t yyssa[YYINITDEPTH]; - yy_state_t *yyss; - yy_state_t *yyssp; + case 14: /* stmts: none */ +#line 2264 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_stmts(p, 0); + } +#line 7543 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* The semantic value stack. */ - YYSTYPE yyvsa[YYINITDEPTH]; - YYSTYPE *yyvs; - YYSTYPE *yyvsp; + case 15: /* stmts: stmt */ +#line 2268 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_stmts(p, (yyvsp[0].nd)); + } +#line 7551 "mrbgems/mruby-compiler/core/y.tab.c" + break; - YYPTRDIFF_T yystacksize; + case 16: /* stmts: stmts terms stmt */ +#line 2272 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = stmts_push(p, (yyvsp[-2].nd), newline_node((yyvsp[0].nd))); + } +#line 7559 "mrbgems/mruby-compiler/core/y.tab.c" + break; - int yyn; - int yyresult; - /* Lookahead token as an internal (translated) token number. */ - int yytoken = 0; - /* The variables used to return semantic value and location from the - action routines. */ - YYSTYPE yyval; + case 17: /* stmts: error stmt */ +#line 2276 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_stmts(p, (yyvsp[0].nd)); + } +#line 7567 "mrbgems/mruby-compiler/core/y.tab.c" + break; -#if YYERROR_VERBOSE - /* Buffer for error messages, and its allocated size. */ - char yymsgbuf[128]; - char *yymsg = yymsgbuf; - YYPTRDIFF_T yymsg_alloc = sizeof yymsgbuf; -#endif + case 18: /* $@3: %empty */ +#line 2281 "mrbgems/mruby-compiler/core/parse.y" + {p->lstate = EXPR_FNAME;} +#line 7573 "mrbgems/mruby-compiler/core/y.tab.c" + break; -#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + case 19: /* stmt: "'alias'" fsym $@3 fsym */ +#line 2282 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_alias(p, (yyvsp[-2].id), (yyvsp[0].id)); + } +#line 7581 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* The number of symbols on the RHS of the reduced rule. - Keep to zero when no symbol should be popped. */ - int yylen = 0; + case 20: /* stmt: "'undef'" undef_list */ +#line 2286 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_undef(p, (yyvsp[0].nd)); + } +#line 7589 "mrbgems/mruby-compiler/core/y.tab.c" + break; - yyssp = yyss = yyssa; - yyvsp = yyvs = yyvsa; - yystacksize = YYINITDEPTH; + case 21: /* stmt: stmt "'if' modifier" expr_value */ +#line 2290 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_if(p, cond((yyvsp[0].nd)), (yyvsp[-2].nd), 0); + } +#line 7597 "mrbgems/mruby-compiler/core/y.tab.c" + break; - YYDPRINTF ((stderr, "Starting parse\n")); + case 22: /* stmt: stmt "'unless' modifier" expr_value */ +#line 2294 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_if(p, cond((yyvsp[0].nd)), 0, (yyvsp[-2].nd)); + } +#line 7605 "mrbgems/mruby-compiler/core/y.tab.c" + break; - yystate = 0; - yyerrstatus = 0; - yynerrs = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ - goto yysetstate; + case 23: /* stmt: stmt "'while' modifier" expr_value */ +#line 2298 "mrbgems/mruby-compiler/core/parse.y" + { + if ((yyvsp[-2].nd) && node_type_p((yyvsp[-2].nd), NODE_BEGIN)) { + (yyval.nd) = new_while_mod(p, cond((yyvsp[0].nd)), (yyvsp[-2].nd)); + } + else { + (yyval.nd) = new_while(p, cond((yyvsp[0].nd)), (yyvsp[-2].nd)); + } + } +#line 7618 "mrbgems/mruby-compiler/core/y.tab.c" + break; + case 24: /* stmt: stmt "'until' modifier" expr_value */ +#line 2307 "mrbgems/mruby-compiler/core/parse.y" + { + if ((yyvsp[-2].nd) && node_type_p((yyvsp[-2].nd), NODE_BEGIN)) { + (yyval.nd) = new_until_mod(p, cond((yyvsp[0].nd)), (yyvsp[-2].nd)); + } + else { + (yyval.nd) = new_until(p, cond((yyvsp[0].nd)), (yyvsp[-2].nd)); + } + } +#line 7631 "mrbgems/mruby-compiler/core/y.tab.c" + break; -/*------------------------------------------------------------. -| yynewstate -- push a new state, which is found in yystate. | -`------------------------------------------------------------*/ -yynewstate: - /* In all cases, when you get here, the value and location stacks - have just been pushed. So pushing a state here evens the stacks. */ - yyssp++; + case 25: /* stmt: stmt "'rescue' modifier" stmt */ +#line 2316 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + } +#line 7639 "mrbgems/mruby-compiler/core/y.tab.c" + break; + case 26: /* stmt: "'END'" '{' compstmt '}' */ +#line 2320 "mrbgems/mruby-compiler/core/parse.y" + { + yyerror(&(yylsp[-3]), p, "END not supported"); + (yyval.nd) = new_postexe(p, (yyvsp[-1].nd)); + } +#line 7648 "mrbgems/mruby-compiler/core/y.tab.c" + break; -/*--------------------------------------------------------------------. -| yysetstate -- set current state (the top of the stack) to yystate. | -`--------------------------------------------------------------------*/ -yysetstate: - YYDPRINTF ((stderr, "Entering state %d\n", yystate)); - YY_ASSERT (0 <= yystate && yystate < YYNSTATES); - YY_IGNORE_USELESS_CAST_BEGIN - *yyssp = YY_CAST (yy_state_t, yystate); - YY_IGNORE_USELESS_CAST_END + case 28: /* stmt: mlhs '=' command_call */ +#line 2326 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_masgn(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + } +#line 7656 "mrbgems/mruby-compiler/core/y.tab.c" + break; - if (yyss + yystacksize - 1 <= yyssp) -#if !defined yyoverflow && !defined YYSTACK_RELOCATE - goto yyexhaustedlab; -#else - { - /* Get the current used size of the three stacks, in elements. */ - YYPTRDIFF_T yysize = yyssp - yyss + 1; + case 29: /* stmt: lhs '=' mrhs */ +#line 2330 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_asgn(p, (yyvsp[-2].nd), new_array(p, (yyvsp[0].nd))); + } +#line 7664 "mrbgems/mruby-compiler/core/y.tab.c" + break; -# if defined yyoverflow - { - /* Give user a chance to reallocate the stack. Use copies of - these so that the &'s don't force the real ones into - memory. */ - yy_state_t *yyss1 = yyss; - YYSTYPE *yyvs1 = yyvs; + case 30: /* stmt: mlhs '=' arg */ +#line 2334 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_masgn(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + } +#line 7672 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* Each stack pointer address is followed by the size of the - data in use in that stack, in bytes. This used to be a - conditional around just the two extra args, but that might - be undefined if yyoverflow is a macro. */ - yyoverflow (YY_("memory exhausted"), - &yyss1, yysize * YYSIZEOF (*yyssp), - &yyvs1, yysize * YYSIZEOF (*yyvsp), - &yystacksize); - yyss = yyss1; - yyvs = yyvs1; - } -# else /* defined YYSTACK_RELOCATE */ - /* Extend the stack our own way. */ - if (YYMAXDEPTH <= yystacksize) - goto yyexhaustedlab; - yystacksize *= 2; - if (YYMAXDEPTH < yystacksize) - yystacksize = YYMAXDEPTH; + case 31: /* stmt: mlhs '=' mrhs */ +#line 2338 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_masgn(p, (yyvsp[-2].nd), new_array(p, (yyvsp[0].nd))); + } +#line 7680 "mrbgems/mruby-compiler/core/y.tab.c" + break; - { - yy_state_t *yyss1 = yyss; - union yyalloc *yyptr = - YY_CAST (union yyalloc *, - YYSTACK_ALLOC (YY_CAST (YYSIZE_T, YYSTACK_BYTES (yystacksize)))); - if (! yyptr) - goto yyexhaustedlab; - YYSTACK_RELOCATE (yyss_alloc, yyss); - YYSTACK_RELOCATE (yyvs_alloc, yyvs); -# undef YYSTACK_RELOCATE - if (yyss1 != yyssa) - YYSTACK_FREE (yyss1); - } -# endif + case 33: /* command_asgn: lhs '=' command_rhs */ +#line 2345 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_asgn(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + } +#line 7688 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 34: /* command_asgn: var_lhs tOP_ASGN command_rhs */ +#line 2349 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_op_asgn(p, (yyvsp[-2].nd), (yyvsp[-1].id), (yyvsp[0].nd)); + } +#line 7696 "mrbgems/mruby-compiler/core/y.tab.c" + break; - yyssp = yyss + yysize - 1; - yyvsp = yyvs + yysize - 1; + case 35: /* command_asgn: primary_value '[' opt_call_args ']' tOP_ASGN command_rhs */ +#line 2353 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-5].nd), intern_op(aref), (yyvsp[-3].nd), '.'), (yyvsp[-1].id), (yyvsp[0].nd)); + } +#line 7704 "mrbgems/mruby-compiler/core/y.tab.c" + break; - YY_IGNORE_USELESS_CAST_BEGIN - YYDPRINTF ((stderr, "Stack size increased to %ld\n", - YY_CAST (long, yystacksize))); - YY_IGNORE_USELESS_CAST_END + case 36: /* command_asgn: primary_value call_op "local variable or method" tOP_ASGN command_rhs */ +#line 2357 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), 0, (yyvsp[-3].num)), (yyvsp[-1].id), (yyvsp[0].nd)); + } +#line 7712 "mrbgems/mruby-compiler/core/y.tab.c" + break; - if (yyss + yystacksize - 1 <= yyssp) - YYABORT; - } -#endif /* !defined yyoverflow && !defined YYSTACK_RELOCATE */ + case 37: /* command_asgn: primary_value call_op "constant" tOP_ASGN command_rhs */ +#line 2361 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), 0, (yyvsp[-3].num)), (yyvsp[-1].id), (yyvsp[0].nd)); + } +#line 7720 "mrbgems/mruby-compiler/core/y.tab.c" + break; - if (yystate == YYFINAL) - YYACCEPT; + case 38: /* command_asgn: primary_value "::" "constant" tOP_ASGN command_call */ +#line 2365 "mrbgems/mruby-compiler/core/parse.y" + { + yyerror(&(yylsp[-4]), p, "constant re-assignment"); + (yyval.nd) = 0; + } +#line 7729 "mrbgems/mruby-compiler/core/y.tab.c" + break; - goto yybackup; + case 39: /* command_asgn: primary_value "::" "local variable or method" tOP_ASGN command_rhs */ +#line 2370 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), 0, tCOLON2), (yyvsp[-1].id), (yyvsp[0].nd)); + } +#line 7737 "mrbgems/mruby-compiler/core/y.tab.c" + break; + case 40: /* command_asgn: defn_head f_opt_arglist_paren '=' command */ +#line 2374 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = (yyvsp[-3].nd); + endless_method_name(p, (yyvsp[-3].nd)); + void_expr_error(p, (yyvsp[0].nd)); + defn_setup(p, (yyval.nd), (yyvsp[-2].nd), (yyvsp[0].nd)); + nvars_unnest(p); + p->in_def--; + } +#line 7750 "mrbgems/mruby-compiler/core/y.tab.c" + break; -/*-----------. -| yybackup. | -`-----------*/ -yybackup: - /* Do appropriate processing given the current state. Read a - lookahead token if we need one and don't already have one. */ + case 41: /* command_asgn: defn_head f_opt_arglist_paren '=' command "'rescue' modifier" arg */ +#line 2383 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = (yyvsp[-5].nd); + endless_method_name(p, (yyvsp[-5].nd)); + void_expr_error(p, (yyvsp[-2].nd)); + defn_setup(p, (yyval.nd), (yyvsp[-4].nd), new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd))); + nvars_unnest(p); + p->in_def--; + } +#line 7763 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* First try to decide what to do without reference to lookahead token. */ - yyn = yypact[yystate]; - if (yypact_value_is_default (yyn)) - goto yydefault; + case 42: /* command_asgn: defs_head f_opt_arglist_paren '=' command */ +#line 2392 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = (yyvsp[-3].nd); + void_expr_error(p, (yyvsp[0].nd)); + defn_setup(p, (yyval.nd), (yyvsp[-2].nd), (yyvsp[0].nd)); + nvars_unnest(p); + p->in_def--; + p->in_single--; + } +#line 7776 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* Not known => get a lookahead token if don't already have one. */ + case 43: /* command_asgn: defs_head f_opt_arglist_paren '=' command "'rescue' modifier" arg */ +#line 2401 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = (yyvsp[-5].nd); + void_expr_error(p, (yyvsp[-2].nd)); + defn_setup(p, (yyval.nd), (yyvsp[-4].nd), new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd))); + nvars_unnest(p); + p->in_def--; + p->in_single--; + } +#line 7789 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ - if (yychar == YYEMPTY) - { - YYDPRINTF ((stderr, "Reading a token: ")); - yychar = yylex (&yylval, p); - } + case 44: /* command_asgn: backref tOP_ASGN command_rhs */ +#line 2410 "mrbgems/mruby-compiler/core/parse.y" + { + backref_error(p, (yyvsp[-2].nd)); + (yyval.nd) = new_stmts(p, 0); + } +#line 7798 "mrbgems/mruby-compiler/core/y.tab.c" + break; - if (yychar <= YYEOF) - { - yychar = yytoken = YYEOF; - YYDPRINTF ((stderr, "Now at end of input.\n")); - } - else - { - yytoken = YYTRANSLATE (yychar); - YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); - } + case 46: /* command_rhs: command_call "'rescue' modifier" stmt */ +#line 2418 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + } +#line 7806 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* If the proper action on seeing token YYTOKEN is to reduce or to - detect an error, take that action. */ - yyn += yytoken; - if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) - goto yydefault; - yyn = yytable[yyn]; - if (yyn <= 0) - { - if (yytable_value_is_error (yyn)) - goto yyerrlab; - yyn = -yyn; - goto yyreduce; - } + case 49: /* expr: expr "'and'" expr */ +#line 2426 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_and(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + } +#line 7814 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* Count tokens shifted since error; after three, turn off error - status. */ - if (yyerrstatus) - yyerrstatus--; + case 50: /* expr: expr "'or'" expr */ +#line 2430 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_or(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + } +#line 7822 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* Shift the lookahead token. */ - YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); - yystate = yyn; - YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN - *++yyvsp = yylval; - YY_IGNORE_MAYBE_UNINITIALIZED_END + case 51: /* expr: "'not'" opt_nl expr */ +#line 2434 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = call_uni_op(p, cond((yyvsp[0].nd)), "!"); + } +#line 7830 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* Discard the shifted token. */ - yychar = YYEMPTY; - goto yynewstate; + case 52: /* expr: '!' command_call */ +#line 2438 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = call_uni_op(p, cond((yyvsp[0].nd)), "!"); + } +#line 7838 "mrbgems/mruby-compiler/core/y.tab.c" + break; + case 53: /* $@4: %empty */ +#line 2441 "mrbgems/mruby-compiler/core/parse.y" + {p->in_kwarg++;} +#line 7844 "mrbgems/mruby-compiler/core/y.tab.c" + break; -/*-----------------------------------------------------------. -| yydefault -- do the default action for the current state. | -`-----------------------------------------------------------*/ -yydefault: - yyn = yydefact[yystate]; - if (yyn == 0) - goto yyerrlab; - goto yyreduce; + case 54: /* expr: arg "=>" $@4 p_expr */ +#line 2442 "mrbgems/mruby-compiler/core/parse.y" + { + /* expr => pattern (raises NoMatchingPatternError on failure) */ + p->in_kwarg--; + (yyval.nd) = new_match_pat(p, (yyvsp[-3].nd), (yyvsp[0].nd), TRUE); + } +#line 7854 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 55: /* $@5: %empty */ +#line 2447 "mrbgems/mruby-compiler/core/parse.y" + {p->in_kwarg++;} +#line 7860 "mrbgems/mruby-compiler/core/y.tab.c" + break; + case 56: /* expr: arg "'in'" $@5 p_expr */ +#line 2448 "mrbgems/mruby-compiler/core/parse.y" + { + /* expr in pattern (returns true/false) */ + p->in_kwarg--; + (yyval.nd) = new_match_pat(p, (yyvsp[-3].nd), (yyvsp[0].nd), FALSE); + } +#line 7870 "mrbgems/mruby-compiler/core/y.tab.c" + break; -/*-----------------------------. -| yyreduce -- do a reduction. | -`-----------------------------*/ -yyreduce: - /* yyn is the number of a rule to reduce with. */ - yylen = yyr2[yyn]; + case 58: /* defn_head: "'def'" fname */ +#line 2457 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_def(p, (yyvsp[0].id)); + p->cmdarg_stack = 0; + p->in_def++; + nvars_block(p); + } +#line 7881 "mrbgems/mruby-compiler/core/y.tab.c" + break; - /* If YYLEN is nonzero, implement the default value of the action: - '$$ = $1'. + case 59: /* $@6: %empty */ +#line 2466 "mrbgems/mruby-compiler/core/parse.y" + { + p->lstate = EXPR_FNAME; + } +#line 7889 "mrbgems/mruby-compiler/core/y.tab.c" + break; - Otherwise, the following line sets YYVAL to garbage. - This behavior is undocumented and Bison - users should not rely upon it. Assigning to YYVAL - unconditionally makes the parser a bit smaller, and it avoids a - GCC warning that YYVAL may be used uninitialized. */ - yyval = yyvsp[1-yylen]; + case 60: /* defs_head: "'def'" singleton dot_or_colon $@6 fname */ +#line 2470 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_sdef(p, (yyvsp[-3].nd), (yyvsp[0].id)); + p->cmdarg_stack = 0; + p->in_def++; + p->in_single++; + nvars_block(p); + p->lstate = EXPR_ENDFN; /* force for args */ + } +#line 7902 "mrbgems/mruby-compiler/core/y.tab.c" + break; + case 61: /* expr_value: expr */ +#line 2481 "mrbgems/mruby-compiler/core/parse.y" + { + if (!(yyvsp[0].nd)) (yyval.nd) = new_nil(p); + else { + (yyval.nd) = (yyvsp[0].nd); + } + } +#line 7913 "mrbgems/mruby-compiler/core/y.tab.c" + break; - YY_REDUCE_PRINT (yyn); - switch (yyn) - { - case 2: -#line 1620 "mrbgems/mruby-compiler/core/parse.y" + case 65: /* block_command: block_call call_op2 operation2 command_args */ +#line 2495 "mrbgems/mruby-compiler/core/parse.y" { - p->lstate = EXPR_BEG; - if (!p->locals) p->locals = cons(0,0); + (yyval.nd) = new_call(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].nd), (yyvsp[-2].num)); } -#line 6101 "mrbgems/mruby-compiler/core/y.tab.c" +#line 7921 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 3: -#line 1625 "mrbgems/mruby-compiler/core/parse.y" + case 66: /* $@7: %empty */ +#line 2501 "mrbgems/mruby-compiler/core/parse.y" { - p->tree = new_scope(p, (yyvsp[0].nd)); - NODE_LINENO(p->tree, (yyvsp[0].nd)); + local_nest(p); + nvars_nest(p); } -#line 6110 "mrbgems/mruby-compiler/core/y.tab.c" +#line 7930 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 4: -#line 1632 "mrbgems/mruby-compiler/core/parse.y" + case 67: /* cmd_brace_block: "{" $@7 opt_block_param compstmt '}' */ +#line 2508 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-1].nd); + (yyval.nd) = new_block(p, (yyvsp[-2].nd), (yyvsp[-1].nd)); + local_unnest(p); + nvars_unnest(p); } -#line 6118 "mrbgems/mruby-compiler/core/y.tab.c" +#line 7940 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 5: -#line 1638 "mrbgems/mruby-compiler/core/parse.y" + case 68: /* command: operation command_args */ +#line 2516 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_begin(p, 0); + (yyval.nd) = new_fcall(p, (yyvsp[-1].id), (yyvsp[0].nd)); } -#line 6126 "mrbgems/mruby-compiler/core/y.tab.c" +#line 7948 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 6: -#line 1642 "mrbgems/mruby-compiler/core/parse.y" + case 69: /* command: operation command_args cmd_brace_block */ +#line 2520 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_begin(p, (yyvsp[0].nd)); - NODE_LINENO((yyval.nd), (yyvsp[0].nd)); + args_with_block(p, (yyvsp[-1].nd), (yyvsp[0].nd)); + (yyval.nd) = new_fcall(p, (yyvsp[-2].id), (yyvsp[-1].nd)); } -#line 6135 "mrbgems/mruby-compiler/core/y.tab.c" +#line 7957 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 7: -#line 1647 "mrbgems/mruby-compiler/core/parse.y" + case 70: /* command: primary_value call_op operation2 command_args */ +#line 2525 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = push((yyvsp[-2].nd), newline_node((yyvsp[0].nd))); + (yyval.nd) = new_call(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].nd), (yyvsp[-2].num)); } -#line 6143 "mrbgems/mruby-compiler/core/y.tab.c" +#line 7965 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 71: /* command: primary_value call_op operation2 command_args cmd_brace_block */ +#line 2529 "mrbgems/mruby-compiler/core/parse.y" + { + args_with_block(p, (yyvsp[-1].nd), (yyvsp[0].nd)); + (yyval.nd) = new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), (yyvsp[-1].nd), (yyvsp[-3].num)); + } +#line 7974 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 8: -#line 1651 "mrbgems/mruby-compiler/core/parse.y" + case 72: /* command: primary_value "::" operation2 command_args */ +#line 2534 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_begin(p, 0); + (yyval.nd) = new_call(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].nd), tCOLON2); } -#line 6151 "mrbgems/mruby-compiler/core/y.tab.c" +#line 7982 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 10: -#line 1658 "mrbgems/mruby-compiler/core/parse.y" + case 73: /* command: primary_value "::" operation2 command_args cmd_brace_block */ +#line 2538 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = local_switch(p); - nvars_block(p); + args_with_block(p, (yyvsp[-1].nd), (yyvsp[0].nd)); + (yyval.nd) = new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), (yyvsp[-1].nd), tCOLON2); } -#line 6160 "mrbgems/mruby-compiler/core/y.tab.c" +#line 7991 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 11: -#line 1663 "mrbgems/mruby-compiler/core/parse.y" + case 74: /* command: "'super'" command_args */ +#line 2543 "mrbgems/mruby-compiler/core/parse.y" { - yyerror(p, "BEGIN not supported"); - local_resume(p, (yyvsp[-3].nd)); - nvars_unnest(p); - (yyval.nd) = 0; + (yyval.nd) = new_super(p, (yyvsp[0].nd)); } -#line 6171 "mrbgems/mruby-compiler/core/y.tab.c" +#line 7999 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 12: -#line 1675 "mrbgems/mruby-compiler/core/parse.y" + case 75: /* command: "'yield'" command_args */ +#line 2547 "mrbgems/mruby-compiler/core/parse.y" { - if ((yyvsp[-2].nd)) { - (yyval.nd) = new_rescue(p, (yyvsp[-3].nd), (yyvsp[-2].nd), (yyvsp[-1].nd)); - NODE_LINENO((yyval.nd), (yyvsp[-3].nd)); - } - else if ((yyvsp[-1].nd)) { - yywarning(p, "else without rescue is useless"); - (yyval.nd) = push((yyvsp[-3].nd), (yyvsp[-1].nd)); - } - else { - (yyval.nd) = (yyvsp[-3].nd); - } - if ((yyvsp[0].nd)) { - if ((yyval.nd)) { - (yyval.nd) = new_ensure(p, (yyval.nd), (yyvsp[0].nd)); - } - else { - (yyval.nd) = push((yyvsp[0].nd), new_nil(p)); - } - } + (yyval.nd) = new_yield(p, (yyvsp[0].nd)); } -#line 6197 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8007 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 13: -#line 1699 "mrbgems/mruby-compiler/core/parse.y" + case 76: /* command: "'return'" call_args */ +#line 2551 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-1].nd); + (yyval.nd) = new_return(p, ret_args(p, (yyvsp[0].nd))); } -#line 6205 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8015 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 14: -#line 1705 "mrbgems/mruby-compiler/core/parse.y" + case 77: /* command: "'break'" call_args */ +#line 2555 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_begin(p, 0); + (yyval.nd) = new_break(p, ret_args(p, (yyvsp[0].nd))); } -#line 6213 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8023 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 15: -#line 1709 "mrbgems/mruby-compiler/core/parse.y" + case 78: /* command: "'next'" call_args */ +#line 2559 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_begin(p, (yyvsp[0].nd)); - NODE_LINENO((yyval.nd), (yyvsp[0].nd)); + (yyval.nd) = new_next(p, ret_args(p, (yyvsp[0].nd))); } -#line 6222 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8031 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 16: -#line 1714 "mrbgems/mruby-compiler/core/parse.y" + case 79: /* mlhs: mlhs_basic */ +#line 2565 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = push((yyvsp[-2].nd), newline_node((yyvsp[0].nd))); + (yyval.nd) = (yyvsp[0].nd); } -#line 6230 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8039 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 17: -#line 1718 "mrbgems/mruby-compiler/core/parse.y" + case 80: /* mlhs: tLPAREN mlhs_inner rparen */ +#line 2569 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_begin(p, (yyvsp[0].nd)); + (yyval.nd) = (yyvsp[-1].nd); } -#line 6238 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8047 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 18: -#line 1723 "mrbgems/mruby-compiler/core/parse.y" - {p->lstate = EXPR_FNAME;} -#line 6244 "mrbgems/mruby-compiler/core/y.tab.c" + case 82: /* mlhs_inner: tLPAREN mlhs_inner rparen */ +#line 2576 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = (yyvsp[-1].nd); + } +#line 8055 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 19: -#line 1724 "mrbgems/mruby-compiler/core/parse.y" + case 83: /* mlhs_basic: mlhs_list */ +#line 2582 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_alias(p, (yyvsp[-2].id), (yyvsp[0].id)); + (yyval.nd) = list1((yyvsp[0].nd)); } -#line 6252 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8063 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 20: -#line 1728 "mrbgems/mruby-compiler/core/parse.y" + case 84: /* mlhs_basic: mlhs_list mlhs_item */ +#line 2586 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[0].nd); + (yyval.nd) = list1(push((yyvsp[-1].nd),(yyvsp[0].nd))); } -#line 6260 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8071 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 21: -#line 1732 "mrbgems/mruby-compiler/core/parse.y" + case 85: /* mlhs_basic: mlhs_list "*" mlhs_node */ +#line 2590 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_if(p, cond((yyvsp[0].nd)), (yyvsp[-2].nd), 0); + (yyval.nd) = list2((yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 6268 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8079 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 22: -#line 1736 "mrbgems/mruby-compiler/core/parse.y" + case 86: /* mlhs_basic: mlhs_list "*" mlhs_node ',' mlhs_post */ +#line 2594 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_unless(p, cond((yyvsp[0].nd)), (yyvsp[-2].nd), 0); + (yyval.nd) = list3((yyvsp[-4].nd), (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 6276 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8087 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 23: -#line 1740 "mrbgems/mruby-compiler/core/parse.y" + case 87: /* mlhs_basic: mlhs_list "*" */ +#line 2598 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_while(p, cond((yyvsp[0].nd)), (yyvsp[-2].nd)); + (yyval.nd) = list2((yyvsp[-1].nd), new_nil(p)); } -#line 6284 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8095 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 24: -#line 1744 "mrbgems/mruby-compiler/core/parse.y" + case 88: /* mlhs_basic: mlhs_list "*" ',' mlhs_post */ +#line 2602 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_until(p, cond((yyvsp[0].nd)), (yyvsp[-2].nd)); + (yyval.nd) = list3((yyvsp[-3].nd), new_nil(p), (yyvsp[0].nd)); } -#line 6292 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8103 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 25: -#line 1748 "mrbgems/mruby-compiler/core/parse.y" + case 89: /* mlhs_basic: "*" mlhs_node */ +#line 2606 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = list2(0, (yyvsp[0].nd)); } -#line 6300 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8111 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 26: -#line 1752 "mrbgems/mruby-compiler/core/parse.y" + case 90: /* mlhs_basic: "*" mlhs_node ',' mlhs_post */ +#line 2610 "mrbgems/mruby-compiler/core/parse.y" { - yyerror(p, "END not supported"); - (yyval.nd) = new_postexe(p, (yyvsp[-1].nd)); + (yyval.nd) = list3(0, (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 6309 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8119 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 28: -#line 1758 "mrbgems/mruby-compiler/core/parse.y" + case 91: /* mlhs_basic: "*" */ +#line 2614 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_masgn(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = list2(0, new_nil(p)); } -#line 6317 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8127 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 29: -#line 1762 "mrbgems/mruby-compiler/core/parse.y" + case 92: /* mlhs_basic: "*" ',' mlhs_post */ +#line 2618 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_asgn(p, (yyvsp[-2].nd), new_array(p, (yyvsp[0].nd))); + (yyval.nd) = list3(0, new_nil(p), (yyvsp[0].nd)); } -#line 6325 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8135 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 30: -#line 1766 "mrbgems/mruby-compiler/core/parse.y" + case 94: /* mlhs_item: tLPAREN mlhs_inner rparen */ +#line 2625 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_masgn(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = new_masgn(p, (yyvsp[-1].nd), NULL); } -#line 6333 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8143 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 31: -#line 1770 "mrbgems/mruby-compiler/core/parse.y" + case 95: /* mlhs_list: mlhs_item ',' */ +#line 2631 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_masgn(p, (yyvsp[-2].nd), new_array(p, (yyvsp[0].nd))); + (yyval.nd) = list1((yyvsp[-1].nd)); } -#line 6341 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8151 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 32: -#line 1774 "mrbgems/mruby-compiler/core/parse.y" + case 96: /* mlhs_list: mlhs_list mlhs_item ',' */ +#line 2635 "mrbgems/mruby-compiler/core/parse.y" { - node *lhs = new_lvar(p, (yyvsp[0].id)); - assignable(p, lhs); - (yyval.nd) = new_asgn(p, lhs, (yyvsp[-2].nd)); + (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[-1].nd)); } -#line 6351 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8159 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 34: -#line 1783 "mrbgems/mruby-compiler/core/parse.y" + case 97: /* mlhs_post: mlhs_item */ +#line 2641 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_asgn(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = list1((yyvsp[0].nd)); } -#line 6359 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8167 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 35: -#line 1787 "mrbgems/mruby-compiler/core/parse.y" + case 98: /* mlhs_post: mlhs_list mlhs_item */ +#line 2645 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_op_asgn(p, (yyvsp[-2].nd), (yyvsp[-1].id), (yyvsp[0].nd)); + (yyval.nd) = push((yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 6367 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8175 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 36: -#line 1791 "mrbgems/mruby-compiler/core/parse.y" + case 99: /* mlhs_node: variable */ +#line 2651 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-5].nd), intern_op(aref), (yyvsp[-3].nd), '.'), (yyvsp[-1].id), (yyvsp[0].nd)); + assignable(p, (yyvsp[0].nd)); } -#line 6375 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8183 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 37: -#line 1795 "mrbgems/mruby-compiler/core/parse.y" + case 100: /* mlhs_node: primary_value '[' opt_call_args ']' */ +#line 2655 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), 0, (yyvsp[-3].num)), (yyvsp[-1].id), (yyvsp[0].nd)); + (yyval.nd) = new_call(p, (yyvsp[-3].nd), intern_op(aref), (yyvsp[-1].nd), '.'); } -#line 6383 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8191 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 38: -#line 1799 "mrbgems/mruby-compiler/core/parse.y" + case 101: /* mlhs_node: primary_value call_op "local variable or method" */ +#line 2659 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), 0, (yyvsp[-3].num)), (yyvsp[-1].id), (yyvsp[0].nd)); + (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, (yyvsp[-1].num)); } -#line 6391 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8199 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 39: -#line 1803 "mrbgems/mruby-compiler/core/parse.y" + case 102: /* mlhs_node: primary_value "::" "local variable or method" */ +#line 2663 "mrbgems/mruby-compiler/core/parse.y" { - yyerror(p, "constant re-assignment"); - (yyval.nd) = 0; + (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, tCOLON2); } -#line 6400 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8207 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 40: -#line 1808 "mrbgems/mruby-compiler/core/parse.y" + case 103: /* mlhs_node: primary_value call_op "constant" */ +#line 2667 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), 0, tCOLON2), (yyvsp[-1].id), (yyvsp[0].nd)); + (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, (yyvsp[-1].num)); } -#line 6408 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8215 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 41: -#line 1812 "mrbgems/mruby-compiler/core/parse.y" + case 104: /* mlhs_node: primary_value "::" "constant" */ +#line 2671 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-3].nd); - endless_method_name(p, (yyvsp[-3].nd)); - void_expr_error(p, (yyvsp[0].nd)); - defn_setup(p, (yyval.nd), (yyvsp[-2].nd), (yyvsp[0].nd)); - nvars_unnest(p); - p->in_def--; + if (p->in_def || p->in_single) + yyerror(&(yylsp[-2]), p, "dynamic constant assignment"); + (yyval.nd) = new_colon2(p, (yyvsp[-2].nd), (yyvsp[0].id)); } -#line 6421 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8225 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 42: -#line 1821 "mrbgems/mruby-compiler/core/parse.y" + case 105: /* mlhs_node: tCOLON3 "constant" */ +#line 2677 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-5].nd); - endless_method_name(p, (yyvsp[-5].nd)); - void_expr_error(p, (yyvsp[-2].nd)); - defn_setup(p, (yyval.nd), (yyvsp[-4].nd), new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd))); - nvars_unnest(p); - p->in_def--; + if (p->in_def || p->in_single) + yyerror(&(yylsp[-1]), p, "dynamic constant assignment"); + (yyval.nd) = new_colon3(p, (yyvsp[0].id)); } -#line 6434 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8235 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 43: -#line 1830 "mrbgems/mruby-compiler/core/parse.y" + case 106: /* mlhs_node: backref */ +#line 2683 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-3].nd); - void_expr_error(p, (yyvsp[0].nd)); - defs_setup(p, (yyval.nd), (yyvsp[-2].nd), (yyvsp[0].nd)); - nvars_unnest(p); - p->in_def--; - p->in_single--; + backref_error(p, (yyvsp[0].nd)); + (yyval.nd) = 0; } -#line 6447 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8244 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 44: -#line 1839 "mrbgems/mruby-compiler/core/parse.y" + case 107: /* lhs: variable */ +#line 2690 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-5].nd); - void_expr_error(p, (yyvsp[-2].nd)); - defs_setup(p, (yyval.nd), (yyvsp[-4].nd), new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd))); - nvars_unnest(p); - p->in_def--; - p->in_single--; + assignable(p, (yyvsp[0].nd)); } -#line 6460 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8252 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 45: -#line 1848 "mrbgems/mruby-compiler/core/parse.y" + case 108: /* lhs: primary_value '[' opt_call_args ']' */ +#line 2694 "mrbgems/mruby-compiler/core/parse.y" { - backref_error(p, (yyvsp[-2].nd)); - (yyval.nd) = new_begin(p, 0); + (yyval.nd) = new_call(p, (yyvsp[-3].nd), intern_op(aref), (yyvsp[-1].nd), '.'); } -#line 6469 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8260 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 47: -#line 1856 "mrbgems/mruby-compiler/core/parse.y" + case 109: /* lhs: primary_value call_op "local variable or method" */ +#line 2698 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, (yyvsp[-1].num)); } -#line 6477 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8268 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 50: -#line 1865 "mrbgems/mruby-compiler/core/parse.y" + case 110: /* lhs: primary_value "::" "local variable or method" */ +#line 2702 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_and(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, tCOLON2); } -#line 6485 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8276 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 51: -#line 1869 "mrbgems/mruby-compiler/core/parse.y" + case 111: /* lhs: primary_value call_op "constant" */ +#line 2706 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_or(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, (yyvsp[-1].num)); } -#line 6493 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8284 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 52: -#line 1873 "mrbgems/mruby-compiler/core/parse.y" + case 112: /* lhs: primary_value "::" "constant" */ +#line 2710 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_uni_op(p, cond((yyvsp[0].nd)), "!"); + if (p->in_def || p->in_single) + yyerror(&(yylsp[-2]), p, "dynamic constant assignment"); + (yyval.nd) = new_colon2(p, (yyvsp[-2].nd), (yyvsp[0].id)); } -#line 6501 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8294 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 53: -#line 1877 "mrbgems/mruby-compiler/core/parse.y" + case 113: /* lhs: tCOLON3 "constant" */ +#line 2716 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_uni_op(p, cond((yyvsp[0].nd)), "!"); + if (p->in_def || p->in_single) + yyerror(&(yylsp[-1]), p, "dynamic constant assignment"); + (yyval.nd) = new_colon3(p, (yyvsp[0].id)); } -#line 6509 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8304 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 55: -#line 1885 "mrbgems/mruby-compiler/core/parse.y" + case 114: /* lhs: backref */ +#line 2722 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_def(p, (yyvsp[0].id), nint(p->cmdarg_stack), local_switch(p)); - p->cmdarg_stack = 0; - p->in_def++; - nvars_block(p); + backref_error(p, (yyvsp[0].nd)); + (yyval.nd) = 0; } -#line 6520 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8313 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 56: -#line 1894 "mrbgems/mruby-compiler/core/parse.y" + case 115: /* lhs: "numbered parameter" */ +#line 2727 "mrbgems/mruby-compiler/core/parse.y" { - p->lstate = EXPR_FNAME; + yyerror(&(yylsp[0]), p, "can't assign to numbered parameter"); } -#line 6528 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8321 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 57: -#line 1898 "mrbgems/mruby-compiler/core/parse.y" + case 116: /* cname: "local variable or method" */ +#line 2733 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_sdef(p, (yyvsp[-3].nd), (yyvsp[0].id), nint(p->cmdarg_stack), local_switch(p)); - p->cmdarg_stack = 0; - p->in_def++; - p->in_single++; - nvars_block(p); - p->lstate = EXPR_ENDFN; /* force for args */ + yyerror(&(yylsp[0]), p, "class/module name must be CONSTANT"); } -#line 6541 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8329 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 58: -#line 1909 "mrbgems/mruby-compiler/core/parse.y" + case 118: /* cpath: tCOLON3 cname */ +#line 2740 "mrbgems/mruby-compiler/core/parse.y" { - if (!(yyvsp[0].nd)) (yyval.nd) = new_nil(p); - else { - (yyval.nd) = (yyvsp[0].nd); - } + (yyval.nd) = cons(int_to_node(1), sym_to_node((yyvsp[0].id))); } -#line 6552 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8337 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 62: -#line 1923 "mrbgems/mruby-compiler/core/parse.y" + case 119: /* cpath: cname */ +#line 2744 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].nd), (yyvsp[-2].num)); + (yyval.nd) = cons(int_to_node(0), sym_to_node((yyvsp[0].id))); } -#line 6560 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8345 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 63: -#line 1929 "mrbgems/mruby-compiler/core/parse.y" + case 120: /* cpath: primary_value "::" cname */ +#line 2748 "mrbgems/mruby-compiler/core/parse.y" { - local_nest(p); - nvars_nest(p); + void_expr_error(p, (yyvsp[-2].nd)); + (yyval.nd) = cons((yyvsp[-2].nd), sym_to_node((yyvsp[0].id))); } -#line 6569 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8354 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 64: -#line 1936 "mrbgems/mruby-compiler/core/parse.y" + case 124: /* fname: op */ +#line 2758 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_block(p, (yyvsp[-2].nd), (yyvsp[-1].nd)); - local_unnest(p); - nvars_unnest(p); + p->lstate = EXPR_ENDFN; + (yyval.id) = (yyvsp[0].id); } -#line 6579 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8363 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 65: -#line 1944 "mrbgems/mruby-compiler/core/parse.y" + case 125: /* fname: reswords */ +#line 2763 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_fcall(p, (yyvsp[-1].id), (yyvsp[0].nd)); + p->lstate = EXPR_ENDFN; + (yyval.id) = (yyvsp[0].id); } -#line 6587 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8372 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 66: -#line 1948 "mrbgems/mruby-compiler/core/parse.y" + case 128: /* undef_list: fsym */ +#line 2774 "mrbgems/mruby-compiler/core/parse.y" { - args_with_block(p, (yyvsp[-1].nd), (yyvsp[0].nd)); - (yyval.nd) = new_fcall(p, (yyvsp[-2].id), (yyvsp[-1].nd)); + (yyval.nd) = cons(sym_to_node((yyvsp[0].id)), 0); } -#line 6596 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8380 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 129: /* $@8: %empty */ +#line 2777 "mrbgems/mruby-compiler/core/parse.y" + {p->lstate = EXPR_FNAME;} +#line 8386 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 67: -#line 1953 "mrbgems/mruby-compiler/core/parse.y" + case 130: /* undef_list: undef_list ',' $@8 fsym */ +#line 2778 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].nd), (yyvsp[-2].num)); + (yyval.nd) = push((yyvsp[-3].nd), sym_to_node((yyvsp[0].id))); + } +#line 8394 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 131: /* op: '|' */ +#line 2783 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(or); } +#line 8400 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 132: /* op: '^' */ +#line 2784 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(xor); } +#line 8406 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 133: /* op: '&' */ +#line 2785 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(and); } +#line 8412 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 134: /* op: "<=>" */ +#line 2786 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(cmp); } +#line 8418 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 135: /* op: "==" */ +#line 2787 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(eq); } +#line 8424 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 136: /* op: "===" */ +#line 2788 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(eqq); } +#line 8430 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 137: /* op: "=~" */ +#line 2789 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(match); } +#line 8436 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 138: /* op: "!~" */ +#line 2790 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(nmatch); } +#line 8442 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 139: /* op: '>' */ +#line 2791 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(gt); } +#line 8448 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 140: /* op: ">=" */ +#line 2792 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(ge); } +#line 8454 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 141: /* op: '<' */ +#line 2793 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(lt); } +#line 8460 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 142: /* op: "<=" */ +#line 2794 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(le); } +#line 8466 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 143: /* op: "!=" */ +#line 2795 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(neq); } +#line 8472 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 144: /* op: "<<" */ +#line 2796 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(lshift); } +#line 8478 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 145: /* op: ">>" */ +#line 2797 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(rshift); } +#line 8484 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 146: /* op: '+' */ +#line 2798 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(add); } +#line 8490 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 147: /* op: '-' */ +#line 2799 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(sub); } +#line 8496 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 148: /* op: '*' */ +#line 2800 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(mul); } +#line 8502 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 149: /* op: "*" */ +#line 2801 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(mul); } +#line 8508 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 150: /* op: '/' */ +#line 2802 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(div); } +#line 8514 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 151: /* op: '%' */ +#line 2803 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(mod); } +#line 8520 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 152: /* op: tPOW */ +#line 2804 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(pow); } +#line 8526 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 153: /* op: "**" */ +#line 2805 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(pow); } +#line 8532 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 154: /* op: '!' */ +#line 2806 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(not); } +#line 8538 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 155: /* op: '~' */ +#line 2807 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(neg); } +#line 8544 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 156: /* op: "unary plus" */ +#line 2808 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(plus); } +#line 8550 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 157: /* op: "unary minus" */ +#line 2809 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(minus); } +#line 8556 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 158: /* op: tAREF */ +#line 2810 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(aref); } +#line 8562 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 159: /* op: tASET */ +#line 2811 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(aset); } +#line 8568 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 160: /* op: '`' */ +#line 2812 "mrbgems/mruby-compiler/core/parse.y" + { (yyval.id) = intern_op(tick); } +#line 8574 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 201: /* arg: lhs '=' arg_rhs */ +#line 2830 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_asgn(p, (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 6604 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8582 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 68: -#line 1957 "mrbgems/mruby-compiler/core/parse.y" + case 202: /* arg: var_lhs tOP_ASGN arg_rhs */ +#line 2834 "mrbgems/mruby-compiler/core/parse.y" { - args_with_block(p, (yyvsp[-1].nd), (yyvsp[0].nd)); - (yyval.nd) = new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), (yyvsp[-1].nd), (yyvsp[-3].num)); - } -#line 6613 "mrbgems/mruby-compiler/core/y.tab.c" + (yyval.nd) = new_op_asgn(p, (yyvsp[-2].nd), (yyvsp[-1].id), (yyvsp[0].nd)); + } +#line 8590 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 69: -#line 1962 "mrbgems/mruby-compiler/core/parse.y" + case 203: /* arg: primary_value '[' opt_call_args ']' tOP_ASGN arg_rhs */ +#line 2838 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].nd), tCOLON2); + (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-5].nd), intern_op(aref), (yyvsp[-3].nd), '.'), (yyvsp[-1].id), (yyvsp[0].nd)); } -#line 6621 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8598 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 70: -#line 1966 "mrbgems/mruby-compiler/core/parse.y" + case 204: /* arg: primary_value call_op "local variable or method" tOP_ASGN arg_rhs */ +#line 2842 "mrbgems/mruby-compiler/core/parse.y" { - args_with_block(p, (yyvsp[-1].nd), (yyvsp[0].nd)); - (yyval.nd) = new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), (yyvsp[-1].nd), tCOLON2); + (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), 0, (yyvsp[-3].num)), (yyvsp[-1].id), (yyvsp[0].nd)); } -#line 6630 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8606 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 71: -#line 1971 "mrbgems/mruby-compiler/core/parse.y" + case 205: /* arg: primary_value call_op "constant" tOP_ASGN arg_rhs */ +#line 2846 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_super(p, (yyvsp[0].nd)); + (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), 0, (yyvsp[-3].num)), (yyvsp[-1].id), (yyvsp[0].nd)); } -#line 6638 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8614 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 72: -#line 1975 "mrbgems/mruby-compiler/core/parse.y" + case 206: /* arg: primary_value "::" "local variable or method" tOP_ASGN arg_rhs */ +#line 2850 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_yield(p, (yyvsp[0].nd)); + (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), 0, tCOLON2), (yyvsp[-1].id), (yyvsp[0].nd)); } -#line 6646 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8622 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 73: -#line 1979 "mrbgems/mruby-compiler/core/parse.y" + case 207: /* arg: primary_value "::" "constant" tOP_ASGN arg_rhs */ +#line 2854 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_return(p, ret_args(p, (yyvsp[0].nd))); + yyerror(&(yylsp[-4]), p, "constant re-assignment"); + (yyval.nd) = new_stmts(p, 0); } -#line 6654 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8631 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 74: -#line 1983 "mrbgems/mruby-compiler/core/parse.y" + case 208: /* arg: tCOLON3 "constant" tOP_ASGN arg_rhs */ +#line 2859 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_break(p, ret_args(p, (yyvsp[0].nd))); + yyerror(&(yylsp[-3]), p, "constant re-assignment"); + (yyval.nd) = new_stmts(p, 0); } -#line 6662 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8640 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 75: -#line 1987 "mrbgems/mruby-compiler/core/parse.y" + case 209: /* arg: backref tOP_ASGN arg_rhs */ +#line 2864 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_next(p, ret_args(p, (yyvsp[0].nd))); + backref_error(p, (yyvsp[-2].nd)); + (yyval.nd) = new_stmts(p, 0); } -#line 6670 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8649 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 76: -#line 1993 "mrbgems/mruby-compiler/core/parse.y" + case 210: /* arg: arg ".." arg */ +#line 2869 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[0].nd); + (yyval.nd) = new_dot2(p, (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 6678 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8657 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 77: -#line 1997 "mrbgems/mruby-compiler/core/parse.y" + case 211: /* arg: arg ".." */ +#line 2873 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-1].nd); + (yyval.nd) = new_dot2(p, (yyvsp[-1].nd), new_nil(p)); } -#line 6686 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8665 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 79: -#line 2004 "mrbgems/mruby-compiler/core/parse.y" + case 212: /* arg: tBDOT2 arg */ +#line 2877 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-1].nd); + (yyval.nd) = new_dot2(p, new_nil(p), (yyvsp[0].nd)); } -#line 6694 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8673 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 80: -#line 2010 "mrbgems/mruby-compiler/core/parse.y" + case 213: /* arg: arg "..." arg */ +#line 2881 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list1((yyvsp[0].nd)); + (yyval.nd) = new_dot3(p, (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 6702 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8681 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 81: -#line 2014 "mrbgems/mruby-compiler/core/parse.y" + case 214: /* arg: arg "..." */ +#line 2885 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list1(push((yyvsp[-1].nd),(yyvsp[0].nd))); + (yyval.nd) = new_dot3(p, (yyvsp[-1].nd), new_nil(p)); } -#line 6710 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8689 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 82: -#line 2018 "mrbgems/mruby-compiler/core/parse.y" + case 215: /* arg: tBDOT3 arg */ +#line 2889 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list2((yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = new_dot3(p, new_nil(p), (yyvsp[0].nd)); } -#line 6718 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8697 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 83: -#line 2022 "mrbgems/mruby-compiler/core/parse.y" + case 216: /* arg: arg '+' arg */ +#line 2893 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list3((yyvsp[-4].nd), (yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "+", (yyvsp[0].nd)); } -#line 6726 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8705 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 84: -#line 2026 "mrbgems/mruby-compiler/core/parse.y" + case 217: /* arg: arg '-' arg */ +#line 2897 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list2((yyvsp[-1].nd), new_nil(p)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "-", (yyvsp[0].nd)); } -#line 6734 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8713 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 85: -#line 2030 "mrbgems/mruby-compiler/core/parse.y" + case 218: /* arg: arg '*' arg */ +#line 2901 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list3((yyvsp[-3].nd), new_nil(p), (yyvsp[0].nd)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "*", (yyvsp[0].nd)); } -#line 6742 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8721 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 86: -#line 2034 "mrbgems/mruby-compiler/core/parse.y" + case 219: /* arg: arg '/' arg */ +#line 2905 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list2(0, (yyvsp[0].nd)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "/", (yyvsp[0].nd)); } -#line 6750 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8729 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 87: -#line 2038 "mrbgems/mruby-compiler/core/parse.y" + case 220: /* arg: arg '%' arg */ +#line 2909 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list3(0, (yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "%", (yyvsp[0].nd)); } -#line 6758 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8737 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 88: -#line 2042 "mrbgems/mruby-compiler/core/parse.y" + case 221: /* arg: arg tPOW arg */ +#line 2913 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list2(0, new_nil(p)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "**", (yyvsp[0].nd)); } -#line 6766 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8745 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 89: -#line 2046 "mrbgems/mruby-compiler/core/parse.y" + case 222: /* arg: tUMINUS_NUM "integer literal" tPOW arg */ +#line 2917 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list3(0, new_nil(p), (yyvsp[0].nd)); + (yyval.nd) = new_negate(p, call_bin_op(p, (yyvsp[-2].nd), "**", (yyvsp[0].nd))); } -#line 6774 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8753 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 91: -#line 2053 "mrbgems/mruby-compiler/core/parse.y" + case 223: /* arg: tUMINUS_NUM "float literal" tPOW arg */ +#line 2921 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_masgn(p, (yyvsp[-1].nd), NULL); + (yyval.nd) = new_negate(p, call_bin_op(p, (yyvsp[-2].nd), "**", (yyvsp[0].nd))); } -#line 6782 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8761 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 92: -#line 2059 "mrbgems/mruby-compiler/core/parse.y" + case 224: /* arg: "unary plus" arg */ +#line 2925 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list1((yyvsp[-1].nd)); + (yyval.nd) = call_uni_op(p, (yyvsp[0].nd), "+@"); } -#line 6790 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8769 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 93: -#line 2063 "mrbgems/mruby-compiler/core/parse.y" + case 225: /* arg: "unary minus" arg */ +#line 2929 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[-1].nd)); + (yyval.nd) = new_negate(p, (yyvsp[0].nd)); } -#line 6798 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8777 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 94: -#line 2069 "mrbgems/mruby-compiler/core/parse.y" + case 226: /* arg: arg '|' arg */ +#line 2933 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list1((yyvsp[0].nd)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "|", (yyvsp[0].nd)); } -#line 6806 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8785 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 95: -#line 2073 "mrbgems/mruby-compiler/core/parse.y" + case 227: /* arg: arg '^' arg */ +#line 2937 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = push((yyvsp[-1].nd), (yyvsp[0].nd)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "^", (yyvsp[0].nd)); } -#line 6814 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8793 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 96: -#line 2079 "mrbgems/mruby-compiler/core/parse.y" + case 228: /* arg: arg '&' arg */ +#line 2941 "mrbgems/mruby-compiler/core/parse.y" { - assignable(p, (yyvsp[0].nd)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "&", (yyvsp[0].nd)); } -#line 6822 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8801 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 97: -#line 2083 "mrbgems/mruby-compiler/core/parse.y" + case 229: /* arg: arg "<=>" arg */ +#line 2945 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-3].nd), intern_op(aref), (yyvsp[-1].nd), '.'); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "<=>", (yyvsp[0].nd)); } -#line 6830 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8809 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 98: -#line 2087 "mrbgems/mruby-compiler/core/parse.y" + case 230: /* arg: arg '>' arg */ +#line 2949 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, (yyvsp[-1].num)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), ">", (yyvsp[0].nd)); } -#line 6838 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8817 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 99: -#line 2091 "mrbgems/mruby-compiler/core/parse.y" + case 231: /* arg: arg ">=" arg */ +#line 2953 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, tCOLON2); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), ">=", (yyvsp[0].nd)); } -#line 6846 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8825 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 100: -#line 2095 "mrbgems/mruby-compiler/core/parse.y" + case 232: /* arg: arg '<' arg */ +#line 2957 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, (yyvsp[-1].num)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "<", (yyvsp[0].nd)); } -#line 6854 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8833 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 101: -#line 2099 "mrbgems/mruby-compiler/core/parse.y" + case 233: /* arg: arg "<=" arg */ +#line 2961 "mrbgems/mruby-compiler/core/parse.y" { - if (p->in_def || p->in_single) - yyerror(p, "dynamic constant assignment"); - (yyval.nd) = new_colon2(p, (yyvsp[-2].nd), (yyvsp[0].id)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "<=", (yyvsp[0].nd)); } -#line 6864 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8841 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 102: -#line 2105 "mrbgems/mruby-compiler/core/parse.y" + case 234: /* arg: arg "==" arg */ +#line 2965 "mrbgems/mruby-compiler/core/parse.y" { - if (p->in_def || p->in_single) - yyerror(p, "dynamic constant assignment"); - (yyval.nd) = new_colon3(p, (yyvsp[0].id)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "==", (yyvsp[0].nd)); } -#line 6874 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8849 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 103: -#line 2111 "mrbgems/mruby-compiler/core/parse.y" + case 235: /* arg: arg "===" arg */ +#line 2969 "mrbgems/mruby-compiler/core/parse.y" { - backref_error(p, (yyvsp[0].nd)); - (yyval.nd) = 0; + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "===", (yyvsp[0].nd)); } -#line 6883 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8857 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 104: -#line 2118 "mrbgems/mruby-compiler/core/parse.y" + case 236: /* arg: arg "!=" arg */ +#line 2973 "mrbgems/mruby-compiler/core/parse.y" { - assignable(p, (yyvsp[0].nd)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "!=", (yyvsp[0].nd)); } -#line 6891 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8865 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 105: -#line 2122 "mrbgems/mruby-compiler/core/parse.y" + case 237: /* arg: arg "=~" arg */ +#line 2977 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-3].nd), intern_op(aref), (yyvsp[-1].nd), '.'); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "=~", (yyvsp[0].nd)); } -#line 6899 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8873 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 106: -#line 2126 "mrbgems/mruby-compiler/core/parse.y" + case 238: /* arg: arg "!~" arg */ +#line 2981 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, (yyvsp[-1].num)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "!~", (yyvsp[0].nd)); } -#line 6907 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8881 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 107: -#line 2130 "mrbgems/mruby-compiler/core/parse.y" + case 239: /* arg: '!' arg */ +#line 2985 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, tCOLON2); + (yyval.nd) = call_uni_op(p, cond((yyvsp[0].nd)), "!"); } -#line 6915 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8889 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 108: -#line 2134 "mrbgems/mruby-compiler/core/parse.y" + case 240: /* arg: '~' arg */ +#line 2989 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, (yyvsp[-1].num)); + (yyval.nd) = call_uni_op(p, cond((yyvsp[0].nd)), "~"); } -#line 6923 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8897 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 109: -#line 2138 "mrbgems/mruby-compiler/core/parse.y" + case 241: /* arg: arg "<<" arg */ +#line 2993 "mrbgems/mruby-compiler/core/parse.y" { - if (p->in_def || p->in_single) - yyerror(p, "dynamic constant assignment"); - (yyval.nd) = new_colon2(p, (yyvsp[-2].nd), (yyvsp[0].id)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "<<", (yyvsp[0].nd)); } -#line 6933 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8905 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 110: -#line 2144 "mrbgems/mruby-compiler/core/parse.y" + case 242: /* arg: arg ">>" arg */ +#line 2997 "mrbgems/mruby-compiler/core/parse.y" { - if (p->in_def || p->in_single) - yyerror(p, "dynamic constant assignment"); - (yyval.nd) = new_colon3(p, (yyvsp[0].id)); + (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), ">>", (yyvsp[0].nd)); } -#line 6943 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8913 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 111: -#line 2150 "mrbgems/mruby-compiler/core/parse.y" + case 243: /* arg: arg "&&" arg */ +#line 3001 "mrbgems/mruby-compiler/core/parse.y" { - backref_error(p, (yyvsp[0].nd)); - (yyval.nd) = 0; + (yyval.nd) = new_and(p, (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 6952 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8921 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 112: -#line 2155 "mrbgems/mruby-compiler/core/parse.y" + case 244: /* arg: arg "||" arg */ +#line 3005 "mrbgems/mruby-compiler/core/parse.y" { - yyerror(p, "can't assign to numbered parameter"); + (yyval.nd) = new_or(p, (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 6960 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8929 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 113: -#line 2161 "mrbgems/mruby-compiler/core/parse.y" + case 245: /* arg: arg '?' arg opt_nl ':' arg */ +#line 3009 "mrbgems/mruby-compiler/core/parse.y" { - yyerror(p, "class/module name must be CONSTANT"); + (yyval.nd) = new_if(p, cond((yyvsp[-5].nd)), (yyvsp[-3].nd), (yyvsp[0].nd)); } -#line 6968 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8937 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 115: -#line 2168 "mrbgems/mruby-compiler/core/parse.y" + case 246: /* arg: arg '?' arg opt_nl "label" arg */ +#line 3013 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = cons(nint(1), nsym((yyvsp[0].id))); + (yyval.nd) = new_if(p, cond((yyvsp[-5].nd)), (yyvsp[-3].nd), (yyvsp[0].nd)); } -#line 6976 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8945 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 116: -#line 2172 "mrbgems/mruby-compiler/core/parse.y" + case 247: /* arg: defn_head f_opt_arglist_paren '=' arg */ +#line 3017 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = cons(nint(0), nsym((yyvsp[0].id))); + (yyval.nd) = (yyvsp[-3].nd); + endless_method_name(p, (yyvsp[-3].nd)); + void_expr_error(p, (yyvsp[0].nd)); + defn_setup(p, (yyval.nd), (yyvsp[-2].nd), (yyvsp[0].nd)); + nvars_unnest(p); + p->in_def--; } -#line 6984 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8958 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 117: -#line 2176 "mrbgems/mruby-compiler/core/parse.y" + case 248: /* arg: defn_head f_opt_arglist_paren '=' arg "'rescue' modifier" arg */ +#line 3026 "mrbgems/mruby-compiler/core/parse.y" { + (yyval.nd) = (yyvsp[-5].nd); + endless_method_name(p, (yyvsp[-5].nd)); void_expr_error(p, (yyvsp[-2].nd)); - (yyval.nd) = cons((yyvsp[-2].nd), nsym((yyvsp[0].id))); + defn_setup(p, (yyval.nd), (yyvsp[-4].nd), new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd))); + nvars_unnest(p); + p->in_def--; } -#line 6993 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8971 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 121: -#line 2186 "mrbgems/mruby-compiler/core/parse.y" + case 249: /* arg: defs_head f_opt_arglist_paren '=' arg */ +#line 3035 "mrbgems/mruby-compiler/core/parse.y" { - p->lstate = EXPR_ENDFN; - (yyval.id) = (yyvsp[0].id); + (yyval.nd) = (yyvsp[-3].nd); + void_expr_error(p, (yyvsp[0].nd)); + defn_setup(p, (yyval.nd), (yyvsp[-2].nd), (yyvsp[0].nd)); + nvars_unnest(p); + p->in_def--; + p->in_single--; } -#line 7002 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8984 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 122: -#line 2191 "mrbgems/mruby-compiler/core/parse.y" + case 250: /* arg: defs_head f_opt_arglist_paren '=' arg "'rescue' modifier" arg */ +#line 3044 "mrbgems/mruby-compiler/core/parse.y" { - p->lstate = EXPR_ENDFN; - (yyval.id) = (yyvsp[0].id); + (yyval.nd) = (yyvsp[-5].nd); + void_expr_error(p, (yyvsp[-2].nd)); + defn_setup(p, (yyval.nd), (yyvsp[-4].nd), new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd))); + nvars_unnest(p); + p->in_def--; + p->in_single--; } -#line 7011 "mrbgems/mruby-compiler/core/y.tab.c" +#line 8997 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 125: -#line 2202 "mrbgems/mruby-compiler/core/parse.y" + case 251: /* arg: primary */ +#line 3053 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_undef(p, (yyvsp[0].id)); + (yyval.nd) = (yyvsp[0].nd); } -#line 7019 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 126: -#line 2205 "mrbgems/mruby-compiler/core/parse.y" - {p->lstate = EXPR_FNAME;} -#line 7025 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9005 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 127: -#line 2206 "mrbgems/mruby-compiler/core/parse.y" + case 253: /* aref_args: args trailer */ +#line 3060 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = push((yyvsp[-3].nd), nsym((yyvsp[0].id))); + (yyval.nd) = (yyvsp[-1].nd); } -#line 7033 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 128: -#line 2211 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(or); } -#line 7039 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 129: -#line 2212 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(xor); } -#line 7045 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 130: -#line 2213 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(and); } -#line 7051 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 131: -#line 2214 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(cmp); } -#line 7057 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 132: -#line 2215 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(eq); } -#line 7063 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 133: -#line 2216 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(eqq); } -#line 7069 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 134: -#line 2217 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(match); } -#line 7075 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 135: -#line 2218 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(nmatch); } -#line 7081 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 136: -#line 2219 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(gt); } -#line 7087 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 137: -#line 2220 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(ge); } -#line 7093 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 138: -#line 2221 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(lt); } -#line 7099 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9013 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 139: -#line 2222 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(le); } -#line 7105 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 140: -#line 2223 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(neq); } -#line 7111 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 141: -#line 2224 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(lshift); } -#line 7117 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 142: -#line 2225 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(rshift); } -#line 7123 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 143: -#line 2226 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(add); } -#line 7129 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 144: -#line 2227 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(sub); } -#line 7135 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 145: -#line 2228 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(mul); } -#line 7141 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 146: -#line 2229 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(mul); } -#line 7147 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 147: -#line 2230 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(div); } -#line 7153 "mrbgems/mruby-compiler/core/y.tab.c" + case 254: /* aref_args: args comma assocs trailer */ +#line 3064 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = push((yyvsp[-3].nd), new_hash(p, (yyvsp[-1].nd))); + } +#line 9021 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 148: -#line 2231 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(mod); } -#line 7159 "mrbgems/mruby-compiler/core/y.tab.c" + case 255: /* aref_args: assocs trailer */ +#line 3068 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = cons(new_hash(p, (yyvsp[-1].nd)), 0); + } +#line 9029 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 149: -#line 2232 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(pow); } -#line 7165 "mrbgems/mruby-compiler/core/y.tab.c" + case 256: /* arg_rhs: arg */ +#line 3074 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = (yyvsp[0].nd); + } +#line 9037 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 150: -#line 2233 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(pow); } -#line 7171 "mrbgems/mruby-compiler/core/y.tab.c" + case 257: /* arg_rhs: arg "'rescue' modifier" arg */ +#line 3078 "mrbgems/mruby-compiler/core/parse.y" + { + void_expr_error(p, (yyvsp[-2].nd)); + (yyval.nd) = new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + } +#line 9046 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 151: -#line 2234 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(not); } -#line 7177 "mrbgems/mruby-compiler/core/y.tab.c" + case 258: /* paren_args: '(' opt_call_args ')' */ +#line 3085 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = (yyvsp[-1].nd); + } +#line 9054 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 152: -#line 2235 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(neg); } -#line 7183 "mrbgems/mruby-compiler/core/y.tab.c" + case 259: /* paren_args: '(' args comma tBDOT3 rparen */ +#line 3089 "mrbgems/mruby-compiler/core/parse.y" + { + mrb_sym r = intern_op(mul); + mrb_sym k = intern_op(pow); + mrb_sym b = intern_op(and); + (yyval.nd) = new_callargs(p, push((yyvsp[-3].nd), new_splat(p, new_lvar(p, r))), + list1(cons(new_kw_rest_args(p, 0), new_lvar(p, k))), + new_block_arg(p, new_lvar(p, b))); + } +#line 9067 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 153: -#line 2236 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(plus); } -#line 7189 "mrbgems/mruby-compiler/core/y.tab.c" + case 260: /* paren_args: '(' tBDOT3 rparen */ +#line 3098 "mrbgems/mruby-compiler/core/parse.y" + { + mrb_sym r = intern_op(mul); + mrb_sym k = intern_op(pow); + mrb_sym b = intern_op(and); + if (local_var_p(p, r) && local_var_p(p, k) && local_var_p(p, b)) { + (yyval.nd) = new_callargs(p, list1(new_splat(p, new_lvar(p, r))), + list1(cons(new_kw_rest_args(p, 0), new_lvar(p, k))), + new_block_arg(p, new_lvar(p, b))); + } + else { + yyerror(&(yylsp[-2]), p, "unexpected argument forwarding ..."); + (yyval.nd) = 0; + } + } +#line 9086 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 154: -#line 2237 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(minus); } -#line 7195 "mrbgems/mruby-compiler/core/y.tab.c" + case 265: /* opt_call_args: args comma */ +#line 3121 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_callargs(p,(yyvsp[-1].nd),0,0); + } +#line 9094 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 155: -#line 2238 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(aref); } -#line 7201 "mrbgems/mruby-compiler/core/y.tab.c" + case 266: /* opt_call_args: args comma assocs comma */ +#line 3125 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_callargs(p,(yyvsp[-3].nd),(yyvsp[-1].nd),0); + } +#line 9102 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 156: -#line 2239 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(aset); } -#line 7207 "mrbgems/mruby-compiler/core/y.tab.c" + case 267: /* opt_call_args: assocs comma */ +#line 3129 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_callargs(p,0,(yyvsp[-1].nd),0); + } +#line 9110 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 157: -#line 2240 "mrbgems/mruby-compiler/core/parse.y" - { (yyval.id) = intern_op(tick); } -#line 7213 "mrbgems/mruby-compiler/core/y.tab.c" + case 268: /* call_args: command */ +#line 3135 "mrbgems/mruby-compiler/core/parse.y" + { + void_expr_error(p, (yyvsp[0].nd)); + (yyval.nd) = new_callargs(p, list1((yyvsp[0].nd)), 0, 0); + } +#line 9119 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 198: -#line 2258 "mrbgems/mruby-compiler/core/parse.y" + case 269: /* call_args: args opt_block_arg */ +#line 3140 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_asgn(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = new_callargs(p, (yyvsp[-1].nd), 0, (yyvsp[0].nd)); } -#line 7221 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9127 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 199: -#line 2262 "mrbgems/mruby-compiler/core/parse.y" + case 270: /* call_args: assocs opt_block_arg */ +#line 3144 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_op_asgn(p, (yyvsp[-2].nd), (yyvsp[-1].id), (yyvsp[0].nd)); + (yyval.nd) = new_callargs(p, 0, (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 7229 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9135 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 200: -#line 2266 "mrbgems/mruby-compiler/core/parse.y" + case 271: /* call_args: args comma assocs opt_block_arg */ +#line 3148 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-5].nd), intern_op(aref), (yyvsp[-3].nd), '.'), (yyvsp[-1].id), (yyvsp[0].nd)); + (yyval.nd) = new_callargs(p, (yyvsp[-3].nd), (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 7237 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9143 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 201: -#line 2270 "mrbgems/mruby-compiler/core/parse.y" + case 272: /* call_args: block_arg */ +#line 3152 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), 0, (yyvsp[-3].num)), (yyvsp[-1].id), (yyvsp[0].nd)); + (yyval.nd) = new_callargs(p, 0, 0, (yyvsp[0].nd)); } -#line 7245 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9151 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 202: -#line 2274 "mrbgems/mruby-compiler/core/parse.y" - { - (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), 0, (yyvsp[-3].num)), (yyvsp[-1].id), (yyvsp[0].nd)); + case 273: /* @9: %empty */ +#line 3157 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.stack) = p->cmdarg_stack; + CMDARG_PUSH(1); } -#line 7253 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9160 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 203: -#line 2278 "mrbgems/mruby-compiler/core/parse.y" + case 274: /* command_args: @9 call_args */ +#line 3162 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_op_asgn(p, new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), 0, tCOLON2), (yyvsp[-1].id), (yyvsp[0].nd)); + p->cmdarg_stack = (yyvsp[-1].stack); + (yyval.nd) = (yyvsp[0].nd); } -#line 7261 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9169 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 204: -#line 2282 "mrbgems/mruby-compiler/core/parse.y" + case 275: /* block_arg: "&" arg */ +#line 3169 "mrbgems/mruby-compiler/core/parse.y" { - yyerror(p, "constant re-assignment"); - (yyval.nd) = new_begin(p, 0); + (yyval.nd) = new_block_arg(p, (yyvsp[0].nd)); } -#line 7270 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9177 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 205: -#line 2287 "mrbgems/mruby-compiler/core/parse.y" + case 276: /* block_arg: "&" */ +#line 3173 "mrbgems/mruby-compiler/core/parse.y" { - yyerror(p, "constant re-assignment"); - (yyval.nd) = new_begin(p, 0); + (yyval.nd) = new_block_arg(p, 0); } -#line 7279 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9185 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 206: -#line 2292 "mrbgems/mruby-compiler/core/parse.y" + case 277: /* opt_block_arg: comma block_arg */ +#line 3179 "mrbgems/mruby-compiler/core/parse.y" { - backref_error(p, (yyvsp[-2].nd)); - (yyval.nd) = new_begin(p, 0); + (yyval.nd) = (yyvsp[0].nd); } -#line 7288 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9193 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 207: -#line 2297 "mrbgems/mruby-compiler/core/parse.y" + case 278: /* opt_block_arg: none */ +#line 3183 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_dot2(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = 0; } -#line 7296 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9201 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 208: -#line 2301 "mrbgems/mruby-compiler/core/parse.y" + case 280: /* args: arg */ +#line 3192 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_dot2(p, (yyvsp[-1].nd), new_nil(p)); + void_expr_error(p, (yyvsp[0].nd)); + (yyval.nd) = list1((yyvsp[0].nd)); } -#line 7304 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9210 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 209: -#line 2305 "mrbgems/mruby-compiler/core/parse.y" + case 281: /* args: "*" */ +#line 3197 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_dot2(p, new_nil(p), (yyvsp[0].nd)); + (yyval.nd) = list1(new_splat(p, new_lvar(p, intern_op(mul)))); } -#line 7312 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9218 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 210: -#line 2309 "mrbgems/mruby-compiler/core/parse.y" + case 282: /* args: "*" arg */ +#line 3201 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_dot3(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = list1(new_splat(p, (yyvsp[0].nd))); } -#line 7320 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9226 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 211: -#line 2313 "mrbgems/mruby-compiler/core/parse.y" + case 283: /* args: args comma arg */ +#line 3205 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_dot3(p, (yyvsp[-1].nd), new_nil(p)); + void_expr_error(p, (yyvsp[0].nd)); + (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 7328 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9235 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 212: -#line 2317 "mrbgems/mruby-compiler/core/parse.y" + case 284: /* args: args comma "*" */ +#line 3210 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_dot3(p, new_nil(p), (yyvsp[0].nd)); + (yyval.nd) = push((yyvsp[-2].nd), new_splat(p, new_lvar(p, intern_op(mul)))); } -#line 7336 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9243 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 213: -#line 2321 "mrbgems/mruby-compiler/core/parse.y" + case 285: /* args: args comma "*" arg */ +#line 3214 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "+", (yyvsp[0].nd)); + (yyval.nd) = push((yyvsp[-3].nd), new_splat(p, (yyvsp[0].nd))); } -#line 7344 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9251 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 214: -#line 2325 "mrbgems/mruby-compiler/core/parse.y" + case 286: /* mrhs: args comma arg */ +#line 3220 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "-", (yyvsp[0].nd)); + void_expr_error(p, (yyvsp[0].nd)); + (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 7352 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9260 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 215: -#line 2329 "mrbgems/mruby-compiler/core/parse.y" + case 287: /* mrhs: args comma "*" arg */ +#line 3225 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "*", (yyvsp[0].nd)); + (yyval.nd) = push((yyvsp[-3].nd), new_splat(p, (yyvsp[0].nd))); } -#line 7360 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9268 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 216: -#line 2333 "mrbgems/mruby-compiler/core/parse.y" + case 288: /* mrhs: "*" arg */ +#line 3229 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "/", (yyvsp[0].nd)); + (yyval.nd) = list1(new_splat(p, (yyvsp[0].nd))); } -#line 7368 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9276 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 217: -#line 2337 "mrbgems/mruby-compiler/core/parse.y" + case 290: /* primary: string */ +#line 3236 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "%", (yyvsp[0].nd)); + (yyval.nd) = new_str(p, (yyvsp[0].nd)); } -#line 7376 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9284 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 218: -#line 2341 "mrbgems/mruby-compiler/core/parse.y" + case 291: /* primary: xstring */ +#line 3240 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "**", (yyvsp[0].nd)); + (yyval.nd) = new_xstr(p, (yyvsp[0].nd)); } -#line 7384 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9292 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 219: -#line 2345 "mrbgems/mruby-compiler/core/parse.y" + case 296: /* primary: "method" */ +#line 3248 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_negate(p, call_bin_op(p, (yyvsp[-2].nd), "**", (yyvsp[0].nd))); + (yyval.nd) = new_fcall(p, (yyvsp[0].id), 0); } -#line 7392 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9300 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 220: -#line 2349 "mrbgems/mruby-compiler/core/parse.y" + case 297: /* @10: %empty */ +#line 3252 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_negate(p, call_bin_op(p, (yyvsp[-2].nd), "**", (yyvsp[0].nd))); + (yyval.stack) = p->cmdarg_stack; + p->cmdarg_stack = 0; } -#line 7400 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9309 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 221: -#line 2353 "mrbgems/mruby-compiler/core/parse.y" + case 298: /* primary: "'begin'" @10 bodystmt "'end'" */ +#line 3258 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_uni_op(p, (yyvsp[0].nd), "+@"); + p->cmdarg_stack = (yyvsp[-2].stack); + (yyval.nd) = new_begin(p, (yyvsp[-1].nd)); } -#line 7408 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9318 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 222: -#line 2357 "mrbgems/mruby-compiler/core/parse.y" + case 299: /* @11: %empty */ +#line 3263 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_negate(p, (yyvsp[0].nd)); + (yyval.stack) = p->cmdarg_stack; + p->cmdarg_stack = 0; } -#line 7416 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9327 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 223: -#line 2361 "mrbgems/mruby-compiler/core/parse.y" - { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "|", (yyvsp[0].nd)); - } -#line 7424 "mrbgems/mruby-compiler/core/y.tab.c" + case 300: /* $@12: %empty */ +#line 3267 "mrbgems/mruby-compiler/core/parse.y" + {p->lstate = EXPR_ENDARG;} +#line 9333 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 224: -#line 2365 "mrbgems/mruby-compiler/core/parse.y" + case 301: /* primary: "(" @11 compstmt $@12 rparen */ +#line 3268 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "^", (yyvsp[0].nd)); + p->cmdarg_stack = (yyvsp[-3].stack); + (yyval.nd) = (yyvsp[-2].nd); } -#line 7432 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9342 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 225: -#line 2369 "mrbgems/mruby-compiler/core/parse.y" - { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "&", (yyvsp[0].nd)); - } -#line 7440 "mrbgems/mruby-compiler/core/y.tab.c" + case 302: /* $@13: %empty */ +#line 3272 "mrbgems/mruby-compiler/core/parse.y" + {p->lstate = EXPR_ENDARG;} +#line 9348 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 226: -#line 2373 "mrbgems/mruby-compiler/core/parse.y" + case 303: /* primary: "(" $@13 rparen */ +#line 3273 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "<=>", (yyvsp[0].nd)); + (yyval.nd) = new_nil(p); } -#line 7448 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9356 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 227: -#line 2377 "mrbgems/mruby-compiler/core/parse.y" + case 304: /* primary: tLPAREN compstmt ')' */ +#line 3277 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), ">", (yyvsp[0].nd)); + (yyval.nd) = (yyvsp[-1].nd); } -#line 7456 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9364 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 228: -#line 2381 "mrbgems/mruby-compiler/core/parse.y" + case 305: /* primary: primary_value "::" "constant" */ +#line 3281 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), ">=", (yyvsp[0].nd)); + (yyval.nd) = new_colon2(p, (yyvsp[-2].nd), (yyvsp[0].id)); } -#line 7464 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9372 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 229: -#line 2385 "mrbgems/mruby-compiler/core/parse.y" + case 306: /* primary: tCOLON3 "constant" */ +#line 3285 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "<", (yyvsp[0].nd)); + (yyval.nd) = new_colon3(p, (yyvsp[0].id)); } -#line 7472 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9380 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 230: -#line 2389 "mrbgems/mruby-compiler/core/parse.y" + case 307: /* primary: "[" aref_args ']' */ +#line 3289 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "<=", (yyvsp[0].nd)); + (yyval.nd) = new_array(p, (yyvsp[-1].nd)); } -#line 7480 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9388 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 231: -#line 2393 "mrbgems/mruby-compiler/core/parse.y" + case 308: /* primary: tLBRACE assoc_list '}' */ +#line 3293 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "==", (yyvsp[0].nd)); + (yyval.nd) = new_hash(p, (yyvsp[-1].nd)); } -#line 7488 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9396 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 232: -#line 2397 "mrbgems/mruby-compiler/core/parse.y" + case 309: /* primary: "'return'" */ +#line 3297 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "===", (yyvsp[0].nd)); + (yyval.nd) = new_return(p, 0); } -#line 7496 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9404 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 233: -#line 2401 "mrbgems/mruby-compiler/core/parse.y" + case 310: /* primary: "'yield'" opt_paren_args */ +#line 3301 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "!=", (yyvsp[0].nd)); + (yyval.nd) = new_yield(p, (yyvsp[0].nd)); } -#line 7504 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9412 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 234: -#line 2405 "mrbgems/mruby-compiler/core/parse.y" + case 311: /* primary: "'not'" '(' expr rparen */ +#line 3305 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "=~", (yyvsp[0].nd)); + (yyval.nd) = call_uni_op(p, cond((yyvsp[-1].nd)), "!"); } -#line 7512 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9420 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 235: -#line 2409 "mrbgems/mruby-compiler/core/parse.y" + case 312: /* primary: "'not'" '(' rparen */ +#line 3309 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "!~", (yyvsp[0].nd)); + (yyval.nd) = call_uni_op(p, new_nil(p), "!"); } -#line 7520 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9428 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 236: -#line 2413 "mrbgems/mruby-compiler/core/parse.y" + case 313: /* primary: operation brace_block */ +#line 3313 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_uni_op(p, cond((yyvsp[0].nd)), "!"); + (yyval.nd) = new_fcall(p, (yyvsp[-1].id), new_callargs(p, 0, 0, (yyvsp[0].nd))); } -#line 7528 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9436 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 237: -#line 2417 "mrbgems/mruby-compiler/core/parse.y" + case 315: /* primary: method_call brace_block */ +#line 3318 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_uni_op(p, cond((yyvsp[0].nd)), "~"); + call_with_block(p, (yyvsp[-1].nd), (yyvsp[0].nd)); + (yyval.nd) = (yyvsp[-1].nd); } -#line 7536 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9445 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 238: -#line 2421 "mrbgems/mruby-compiler/core/parse.y" + case 316: /* @14: %empty */ +#line 3323 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), "<<", (yyvsp[0].nd)); + local_nest(p); + nvars_nest(p); + (yyval.num) = p->lpar_beg; + p->lpar_beg = ++p->paren_nest; } -#line 7544 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9456 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 239: -#line 2425 "mrbgems/mruby-compiler/core/parse.y" + case 317: /* @15: %empty */ +#line 3330 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_bin_op(p, (yyvsp[-2].nd), ">>", (yyvsp[0].nd)); + (yyval.stack) = p->cmdarg_stack; + p->cmdarg_stack = 0; } -#line 7552 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9465 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 240: -#line 2429 "mrbgems/mruby-compiler/core/parse.y" + case 318: /* primary: "->" @14 f_larglist @15 lambda_body */ +#line 3335 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_and(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + p->lpar_beg = (yyvsp[-3].num); + (yyval.nd) = new_lambda(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + local_unnest(p); + nvars_unnest(p); + p->cmdarg_stack = (yyvsp[-1].stack); + CMDARG_LEXPOP(); } -#line 7560 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9478 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 241: -#line 2433 "mrbgems/mruby-compiler/core/parse.y" + case 319: /* primary: "'if'" expr_value then compstmt if_tail "'end'" */ +#line 3347 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_or(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = new_if(p, cond((yyvsp[-4].nd)), (yyvsp[-2].nd), (yyvsp[-1].nd)); + SET_LINENO((yyval.nd), (yyvsp[-5].num)); } -#line 7568 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9487 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 242: -#line 2437 "mrbgems/mruby-compiler/core/parse.y" + case 320: /* primary: "'unless'" expr_value then compstmt opt_else "'end'" */ +#line 3355 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_if(p, cond((yyvsp[-5].nd)), (yyvsp[-3].nd), (yyvsp[0].nd)); + (yyval.nd) = new_if(p, cond((yyvsp[-4].nd)), (yyvsp[-1].nd), (yyvsp[-2].nd)); + SET_LINENO((yyval.nd), (yyvsp[-5].num)); } -#line 7576 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9496 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 243: -#line 2441 "mrbgems/mruby-compiler/core/parse.y" - { - (yyval.nd) = new_if(p, cond((yyvsp[-5].nd)), (yyvsp[-3].nd), (yyvsp[0].nd)); - } -#line 7584 "mrbgems/mruby-compiler/core/y.tab.c" + case 321: /* $@16: %empty */ +#line 3359 "mrbgems/mruby-compiler/core/parse.y" + {COND_PUSH(1);} +#line 9502 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 244: -#line 2445 "mrbgems/mruby-compiler/core/parse.y" - { - (yyval.nd) = (yyvsp[-3].nd); - endless_method_name(p, (yyvsp[-3].nd)); - void_expr_error(p, (yyvsp[0].nd)); - defn_setup(p, (yyval.nd), (yyvsp[-2].nd), (yyvsp[0].nd)); - nvars_unnest(p); - p->in_def--; - } -#line 7597 "mrbgems/mruby-compiler/core/y.tab.c" + case 322: /* $@17: %empty */ +#line 3359 "mrbgems/mruby-compiler/core/parse.y" + {COND_POP();} +#line 9508 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 245: -#line 2454 "mrbgems/mruby-compiler/core/parse.y" + case 323: /* primary: "'while'" $@16 expr_value do $@17 compstmt "'end'" */ +#line 3362 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-5].nd); - endless_method_name(p, (yyvsp[-5].nd)); - void_expr_error(p, (yyvsp[-2].nd)); - defn_setup(p, (yyval.nd), (yyvsp[-4].nd), new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd))); - nvars_unnest(p); - p->in_def--; + (yyval.nd) = new_while(p, cond((yyvsp[-4].nd)), (yyvsp[-1].nd)); + SET_LINENO((yyval.nd), (yyvsp[-6].num)); } -#line 7610 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9517 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 246: -#line 2463 "mrbgems/mruby-compiler/core/parse.y" - { - (yyval.nd) = (yyvsp[-3].nd); - void_expr_error(p, (yyvsp[0].nd)); - defs_setup(p, (yyval.nd), (yyvsp[-2].nd), (yyvsp[0].nd)); - nvars_unnest(p); - p->in_def--; - p->in_single--; - } -#line 7623 "mrbgems/mruby-compiler/core/y.tab.c" + case 324: /* $@18: %empty */ +#line 3366 "mrbgems/mruby-compiler/core/parse.y" + {COND_PUSH(1);} +#line 9523 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 247: -#line 2472 "mrbgems/mruby-compiler/core/parse.y" - { - (yyval.nd) = (yyvsp[-5].nd); - void_expr_error(p, (yyvsp[-2].nd)); - defs_setup(p, (yyval.nd), (yyvsp[-4].nd), new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd))); - nvars_unnest(p); - p->in_def--; - p->in_single--; - } -#line 7636 "mrbgems/mruby-compiler/core/y.tab.c" + case 325: /* $@19: %empty */ +#line 3366 "mrbgems/mruby-compiler/core/parse.y" + {COND_POP();} +#line 9529 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 248: -#line 2481 "mrbgems/mruby-compiler/core/parse.y" + case 326: /* primary: "'until'" $@18 expr_value do $@19 compstmt "'end'" */ +#line 3369 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[0].nd); + (yyval.nd) = new_until(p, cond((yyvsp[-4].nd)), (yyvsp[-1].nd)); + SET_LINENO((yyval.nd), (yyvsp[-6].num)); } -#line 7644 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9538 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 250: -#line 2488 "mrbgems/mruby-compiler/core/parse.y" + case 327: /* primary: "'case'" expr_value opt_terms case_body "'end'" */ +#line 3376 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-1].nd); - NODE_LINENO((yyval.nd), (yyvsp[-1].nd)); + (yyval.nd) = new_case(p, (yyvsp[-3].nd), (yyvsp[-1].nd)); } -#line 7653 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9546 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 251: -#line 2493 "mrbgems/mruby-compiler/core/parse.y" + case 328: /* primary: "'case'" opt_terms case_body "'end'" */ +#line 3380 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = push((yyvsp[-3].nd), new_hash(p, (yyvsp[-1].nd))); + (yyval.nd) = new_case(p, 0, (yyvsp[-1].nd)); } -#line 7661 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9554 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 252: -#line 2497 "mrbgems/mruby-compiler/core/parse.y" + case 329: /* primary: "'case'" expr_value opt_terms "'in'" p_expr then compstmt in_clauses "'end'" */ +#line 3388 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = cons(new_kw_hash(p, (yyvsp[-1].nd)), 0); - NODE_LINENO((yyval.nd), (yyvsp[-1].nd)); + node *in_clause = new_in(p, (yyvsp[-4].nd), NULL, (yyvsp[-2].nd), FALSE); + (yyval.nd) = new_case_match(p, (yyvsp[-7].nd), cons(in_clause, (yyvsp[-1].nd))); } -#line 7670 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9563 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 253: -#line 2504 "mrbgems/mruby-compiler/core/parse.y" + case 330: /* primary: "'case'" expr_value opt_terms "'in'" p_expr "'if' modifier" expr_value then compstmt in_clauses "'end'" */ +#line 3397 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[0].nd); + node *in_clause = new_in(p, (yyvsp[-6].nd), (yyvsp[-4].nd), (yyvsp[-2].nd), FALSE); + (yyval.nd) = new_case_match(p, (yyvsp[-9].nd), cons(in_clause, (yyvsp[-1].nd))); } -#line 7678 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9572 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 254: -#line 2508 "mrbgems/mruby-compiler/core/parse.y" + case 331: /* primary: "'case'" expr_value opt_terms "'in'" p_expr "'unless' modifier" expr_value then compstmt in_clauses "'end'" */ +#line 3406 "mrbgems/mruby-compiler/core/parse.y" { - void_expr_error(p, (yyvsp[-2].nd)); - (yyval.nd) = new_mod_rescue(p, (yyvsp[-2].nd), (yyvsp[0].nd)); + node *in_clause = new_in(p, (yyvsp[-6].nd), (yyvsp[-4].nd), (yyvsp[-2].nd), TRUE); + (yyval.nd) = new_case_match(p, (yyvsp[-9].nd), cons(in_clause, (yyvsp[-1].nd))); } -#line 7687 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9581 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 255: -#line 2515 "mrbgems/mruby-compiler/core/parse.y" - { - (yyval.nd) = (yyvsp[-1].nd); - } -#line 7695 "mrbgems/mruby-compiler/core/y.tab.c" + case 332: /* $@20: %empty */ +#line 3411 "mrbgems/mruby-compiler/core/parse.y" + {COND_PUSH(1);} +#line 9587 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 333: /* $@21: %empty */ +#line 3413 "mrbgems/mruby-compiler/core/parse.y" + {COND_POP();} +#line 9593 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 256: -#line 2519 "mrbgems/mruby-compiler/core/parse.y" + case 334: /* primary: "'for'" for_var "'in'" $@20 expr_value do $@21 compstmt "'end'" */ +#line 3416 "mrbgems/mruby-compiler/core/parse.y" { - mrb_sym r = intern_op(mul); - mrb_sym k = intern_op(pow); - mrb_sym b = intern_op(and); - (yyval.nd) = new_callargs(p, push((yyvsp[-3].nd), new_splat(p, new_lvar(p, r))), - new_kw_hash(p, list1(cons(new_kw_rest_args(p, 0), new_lvar(p, k)))), - new_block_arg(p, new_lvar(p, b))); + (yyval.nd) = new_for(p, (yyvsp[-7].nd), (yyvsp[-4].nd), (yyvsp[-1].nd)); + SET_LINENO((yyval.nd), (yyvsp[-8].num)); } -#line 7708 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9602 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 257: -#line 2528 "mrbgems/mruby-compiler/core/parse.y" + case 335: /* @22: %empty */ +#line 3422 "mrbgems/mruby-compiler/core/parse.y" { - mrb_sym r = intern_op(mul); - mrb_sym k = intern_op(pow); - mrb_sym b = intern_op(and); - if (local_var_p(p, r) && local_var_p(p, k) && local_var_p(p, b)) { - (yyval.nd) = new_callargs(p, list1(new_splat(p, new_lvar(p, r))), - new_kw_hash(p, list1(cons(new_kw_rest_args(p, 0), new_lvar(p, k)))), - new_block_arg(p, new_lvar(p, b))); - } - else { - yyerror(p, "unexpected argument forwarding ..."); - (yyval.nd) = 0; - } + if (p->in_def || p->in_single) + yyerror(&(yylsp[-2]), p, "class definition in method body"); + (yyval.nd) = local_switch(p); + nvars_block(p); } -#line 7727 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9613 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 262: -#line 2551 "mrbgems/mruby-compiler/core/parse.y" + case 336: /* primary: "'class'" cpath superclass @22 bodystmt "'end'" */ +#line 3430 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_callargs(p,(yyvsp[-1].nd),0,0); - NODE_LINENO((yyval.nd), (yyvsp[-1].nd)); + (yyval.nd) = new_class(p, (yyvsp[-4].nd), (yyvsp[-3].nd), (yyvsp[-1].nd)); + SET_LINENO((yyval.nd), (yyvsp[-5].num)); + local_resume(p, (yyvsp[-2].nd)); + nvars_unnest(p); } -#line 7736 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9624 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 263: -#line 2556 "mrbgems/mruby-compiler/core/parse.y" + case 337: /* @23: %empty */ +#line 3438 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_callargs(p,(yyvsp[-3].nd),new_kw_hash(p,(yyvsp[-1].nd)),0); - NODE_LINENO((yyval.nd), (yyvsp[-3].nd)); + (yyval.num) = p->in_def; + p->in_def = 0; } -#line 7745 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9633 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 264: -#line 2561 "mrbgems/mruby-compiler/core/parse.y" + case 338: /* @24: %empty */ +#line 3443 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_callargs(p,0,new_kw_hash(p,(yyvsp[-1].nd)),0); - NODE_LINENO((yyval.nd), (yyvsp[-1].nd)); + (yyval.nd) = cons(local_switch(p), int_to_node(p->in_single)); + nvars_block(p); + p->in_single = 0; } -#line 7754 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9643 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 265: -#line 2568 "mrbgems/mruby-compiler/core/parse.y" + case 339: /* primary: "'class'" "<<" expr @23 term @24 bodystmt "'end'" */ +#line 3450 "mrbgems/mruby-compiler/core/parse.y" { - void_expr_error(p, (yyvsp[0].nd)); - (yyval.nd) = new_callargs(p, list1((yyvsp[0].nd)), 0, 0); - NODE_LINENO((yyval.nd), (yyvsp[0].nd)); + (yyval.nd) = new_sclass(p, (yyvsp[-5].nd), (yyvsp[-1].nd)); + SET_LINENO((yyval.nd), (yyvsp[-7].num)); + local_resume(p, (yyvsp[-2].nd)->car); + nvars_unnest(p); + p->in_def = (yyvsp[-4].num); + p->in_single = node_to_int((yyvsp[-2].nd)->cdr); } -#line 7764 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9656 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 266: -#line 2574 "mrbgems/mruby-compiler/core/parse.y" + case 340: /* @25: %empty */ +#line 3460 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_callargs(p, (yyvsp[-1].nd), 0, (yyvsp[0].nd)); - NODE_LINENO((yyval.nd), (yyvsp[-1].nd)); + if (p->in_def || p->in_single) + yyerror(&(yylsp[-1]), p, "module definition in method body"); + (yyval.nd) = local_switch(p); + nvars_block(p); } -#line 7773 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9667 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 267: -#line 2579 "mrbgems/mruby-compiler/core/parse.y" + case 341: /* primary: "'module'" cpath @25 bodystmt "'end'" */ +#line 3468 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_callargs(p, 0, new_kw_hash(p, (yyvsp[-1].nd)), (yyvsp[0].nd)); - NODE_LINENO((yyval.nd), (yyvsp[-1].nd)); + (yyval.nd) = new_module(p, (yyvsp[-3].nd), (yyvsp[-1].nd)); + SET_LINENO((yyval.nd), (yyvsp[-4].num)); + local_resume(p, (yyvsp[-2].nd)); + nvars_unnest(p); } -#line 7782 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9678 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 268: -#line 2584 "mrbgems/mruby-compiler/core/parse.y" + case 342: /* primary: defn_head f_arglist bodystmt "'end'" */ +#line 3478 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_callargs(p, (yyvsp[-3].nd), new_kw_hash(p, (yyvsp[-1].nd)), (yyvsp[0].nd)); - NODE_LINENO((yyval.nd), (yyvsp[-3].nd)); + (yyval.nd) = (yyvsp[-3].nd); + defn_setup(p, (yyval.nd), (yyvsp[-2].nd), (yyvsp[-1].nd)); + nvars_unnest(p); + p->in_def--; } -#line 7791 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9689 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 269: -#line 2589 "mrbgems/mruby-compiler/core/parse.y" + case 343: /* primary: defs_head f_arglist bodystmt "'end'" */ +#line 3488 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_callargs(p, 0, 0, (yyvsp[0].nd)); - NODE_LINENO((yyval.nd), (yyvsp[0].nd)); + (yyval.nd) = (yyvsp[-3].nd); + defn_setup(p, (yyval.nd), (yyvsp[-2].nd), (yyvsp[-1].nd)); + nvars_unnest(p); + p->in_def--; + p->in_single--; } -#line 7800 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9701 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 270: -#line 2595 "mrbgems/mruby-compiler/core/parse.y" - { - (yyval.stack) = p->cmdarg_stack; - CMDARG_PUSH(1); + case 344: /* primary: "'break'" */ +#line 3496 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_break(p, 0); } -#line 7809 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9709 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 271: -#line 2600 "mrbgems/mruby-compiler/core/parse.y" + case 345: /* primary: "'next'" */ +#line 3500 "mrbgems/mruby-compiler/core/parse.y" { - p->cmdarg_stack = (yyvsp[-1].stack); - (yyval.nd) = (yyvsp[0].nd); + (yyval.nd) = new_next(p, 0); } -#line 7818 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9717 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 272: -#line 2607 "mrbgems/mruby-compiler/core/parse.y" + case 346: /* primary: "'redo'" */ +#line 3504 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_block_arg(p, (yyvsp[0].nd)); + (yyval.nd) = new_redo(p); } -#line 7826 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9725 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 273: -#line 2611 "mrbgems/mruby-compiler/core/parse.y" + case 347: /* primary: "'retry'" */ +#line 3508 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_block_arg(p, 0); + (yyval.nd) = new_retry(p); } -#line 7834 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9733 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 274: -#line 2617 "mrbgems/mruby-compiler/core/parse.y" + case 348: /* primary_value: primary */ +#line 3514 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = (yyvsp[0].nd); + if (!(yyval.nd)) (yyval.nd) = new_nil(p); } -#line 7842 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9742 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 275: -#line 2621 "mrbgems/mruby-compiler/core/parse.y" + case 355: /* if_tail: "'elsif'" expr_value then compstmt if_tail */ +#line 3533 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = 0; + (yyval.nd) = new_if(p, cond((yyvsp[-3].nd)), (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 7850 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9750 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 277: -#line 2630 "mrbgems/mruby-compiler/core/parse.y" + case 357: /* opt_else: "'else'" compstmt */ +#line 3540 "mrbgems/mruby-compiler/core/parse.y" { - void_expr_error(p, (yyvsp[0].nd)); - (yyval.nd) = list1((yyvsp[0].nd)); - NODE_LINENO((yyval.nd), (yyvsp[0].nd)); + (yyval.nd) = (yyvsp[0].nd); } -#line 7860 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9758 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 278: -#line 2636 "mrbgems/mruby-compiler/core/parse.y" + case 358: /* for_var: lhs */ +#line 3546 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list1(new_splat(p, new_lvar(p, intern_op(mul)))); + (yyval.nd) = list1(list1((yyvsp[0].nd))); } -#line 7868 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9766 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 279: -#line 2640 "mrbgems/mruby-compiler/core/parse.y" + case 360: /* f_margs: f_arg */ +#line 3553 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list1(new_splat(p, (yyvsp[0].nd))); - NODE_LINENO((yyval.nd), (yyvsp[0].nd)); + (yyval.nd) = list3((yyvsp[0].nd),0,0); } -#line 7877 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9774 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 280: -#line 2645 "mrbgems/mruby-compiler/core/parse.y" + case 361: /* f_margs: f_arg ',' "*" f_norm_arg */ +#line 3557 "mrbgems/mruby-compiler/core/parse.y" { - void_expr_error(p, (yyvsp[0].nd)); - (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[0].nd)); + (yyval.nd) = list3((yyvsp[-3].nd), new_lvar(p, (yyvsp[0].id)), 0); } -#line 7886 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9782 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 281: -#line 2650 "mrbgems/mruby-compiler/core/parse.y" + case 362: /* f_margs: f_arg ',' "*" f_norm_arg ',' f_arg */ +#line 3561 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = push((yyvsp[-3].nd), new_splat(p, (yyvsp[0].nd))); + (yyval.nd) = list3((yyvsp[-5].nd), new_lvar(p, (yyvsp[-2].id)), (yyvsp[0].nd)); } -#line 7894 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9790 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 282: -#line 2656 "mrbgems/mruby-compiler/core/parse.y" + case 363: /* f_margs: f_arg ',' "*" */ +#line 3565 "mrbgems/mruby-compiler/core/parse.y" { - void_expr_error(p, (yyvsp[0].nd)); - (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[0].nd)); + local_add_f(p, intern_op(mul)); + (yyval.nd) = list3((yyvsp[-2].nd), int_to_node(-1), 0); } -#line 7903 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9799 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 283: -#line 2661 "mrbgems/mruby-compiler/core/parse.y" + case 364: /* f_margs: f_arg ',' "*" ',' f_arg */ +#line 3570 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = push((yyvsp[-3].nd), new_splat(p, (yyvsp[0].nd))); + (yyval.nd) = list3((yyvsp[-4].nd), int_to_node(-1), (yyvsp[0].nd)); } -#line 7911 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9807 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 284: -#line 2665 "mrbgems/mruby-compiler/core/parse.y" + case 365: /* f_margs: "*" f_norm_arg */ +#line 3574 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list1(new_splat(p, (yyvsp[0].nd))); + (yyval.nd) = list3(0, new_lvar(p, (yyvsp[0].id)), 0); } -#line 7919 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9815 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 292: -#line 2678 "mrbgems/mruby-compiler/core/parse.y" + case 366: /* f_margs: "*" f_norm_arg ',' f_arg */ +#line 3578 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_nvar(p, (yyvsp[0].num)); + (yyval.nd) = list3(0, new_lvar(p, (yyvsp[-2].id)), (yyvsp[0].nd)); } -#line 7927 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9823 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 293: -#line 2682 "mrbgems/mruby-compiler/core/parse.y" + case 367: /* f_margs: "*" */ +#line 3582 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_fcall(p, (yyvsp[0].id), 0); + local_add_f(p, intern_op(mul)); + (yyval.nd) = list3(0, int_to_node(-1), 0); } -#line 7935 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9832 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 294: -#line 2686 "mrbgems/mruby-compiler/core/parse.y" + case 368: /* $@26: %empty */ +#line 3587 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.stack) = p->cmdarg_stack; - p->cmdarg_stack = 0; + local_add_f(p, intern_op(mul)); } -#line 7944 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9840 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 295: -#line 2692 "mrbgems/mruby-compiler/core/parse.y" + case 369: /* f_margs: "*" ',' $@26 f_arg */ +#line 3591 "mrbgems/mruby-compiler/core/parse.y" { - p->cmdarg_stack = (yyvsp[-2].stack); - (yyval.nd) = (yyvsp[-1].nd); + (yyval.nd) = list3(0, int_to_node(-1), (yyvsp[0].nd)); } -#line 7953 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9848 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 296: -#line 2697 "mrbgems/mruby-compiler/core/parse.y" + case 370: /* block_args_tail: f_block_kwarg ',' f_kwrest opt_f_block_arg */ +#line 3597 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.stack) = p->cmdarg_stack; - p->cmdarg_stack = 0; + (yyval.nd) = new_args_tail(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].id)); } -#line 7962 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9856 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 297: -#line 2701 "mrbgems/mruby-compiler/core/parse.y" - {p->lstate = EXPR_ENDARG;} -#line 7968 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 298: -#line 2702 "mrbgems/mruby-compiler/core/parse.y" + case 371: /* block_args_tail: f_block_kwarg opt_f_block_arg */ +#line 3601 "mrbgems/mruby-compiler/core/parse.y" { - p->cmdarg_stack = (yyvsp[-3].stack); - (yyval.nd) = (yyvsp[-2].nd); + (yyval.nd) = new_args_tail(p, (yyvsp[-1].nd), 0, (yyvsp[0].id)); } -#line 7977 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 299: -#line 2706 "mrbgems/mruby-compiler/core/parse.y" - {p->lstate = EXPR_ENDARG;} -#line 7983 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9864 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 300: -#line 2707 "mrbgems/mruby-compiler/core/parse.y" + case 372: /* block_args_tail: f_kwrest opt_f_block_arg */ +#line 3605 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_nil(p); + (yyval.nd) = new_args_tail(p, 0, (yyvsp[-1].id), (yyvsp[0].id)); } -#line 7991 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9872 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 301: -#line 2711 "mrbgems/mruby-compiler/core/parse.y" + case 373: /* block_args_tail: f_block_arg */ +#line 3609 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-1].nd); + (yyval.nd) = new_args_tail(p, 0, 0, (yyvsp[0].id)); } -#line 7999 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9880 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 302: -#line 2715 "mrbgems/mruby-compiler/core/parse.y" + case 374: /* opt_block_args_tail: ',' block_args_tail */ +#line 3615 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_colon2(p, (yyvsp[-2].nd), (yyvsp[0].id)); + (yyval.nd) = (yyvsp[0].nd); } -#line 8007 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9888 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 303: -#line 2719 "mrbgems/mruby-compiler/core/parse.y" + case 375: /* opt_block_args_tail: %empty */ +#line 3619 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_colon3(p, (yyvsp[0].id)); + (yyval.nd) = new_args_tail(p, 0, 0, 0); } -#line 8015 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9896 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 304: -#line 2723 "mrbgems/mruby-compiler/core/parse.y" + case 376: /* block_param: f_arg ',' f_block_optarg ',' f_rest_arg opt_block_args_tail */ +#line 3625 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_array(p, (yyvsp[-1].nd)); - NODE_LINENO((yyval.nd), (yyvsp[-1].nd)); + (yyval.nd) = new_args(p, (yyvsp[-5].nd), (yyvsp[-3].nd), (yyvsp[-1].id), 0, (yyvsp[0].nd)); } -#line 8024 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9904 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 305: -#line 2728 "mrbgems/mruby-compiler/core/parse.y" + case 377: /* block_param: f_arg ',' f_block_optarg ',' f_rest_arg ',' f_arg opt_block_args_tail */ +#line 3629 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_hash(p, (yyvsp[-1].nd)); - NODE_LINENO((yyval.nd), (yyvsp[-1].nd)); + (yyval.nd) = new_args(p, (yyvsp[-7].nd), (yyvsp[-5].nd), (yyvsp[-3].id), (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 8033 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9912 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 306: -#line 2733 "mrbgems/mruby-compiler/core/parse.y" + case 378: /* block_param: f_arg ',' f_block_optarg opt_block_args_tail */ +#line 3633 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_return(p, 0); + (yyval.nd) = new_args(p, (yyvsp[-3].nd), (yyvsp[-1].nd), 0, 0, (yyvsp[0].nd)); } -#line 8041 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9920 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 307: -#line 2737 "mrbgems/mruby-compiler/core/parse.y" + case 379: /* block_param: f_arg ',' f_block_optarg ',' f_arg opt_block_args_tail */ +#line 3637 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_yield(p, (yyvsp[0].nd)); + (yyval.nd) = new_args(p, (yyvsp[-5].nd), (yyvsp[-3].nd), 0, (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 8049 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9928 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 308: -#line 2741 "mrbgems/mruby-compiler/core/parse.y" + case 380: /* block_param: f_arg ',' f_rest_arg opt_block_args_tail */ +#line 3641 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_uni_op(p, cond((yyvsp[-1].nd)), "!"); + (yyval.nd) = new_args(p, (yyvsp[-3].nd), 0, (yyvsp[-1].id), 0, (yyvsp[0].nd)); } -#line 8057 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9936 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 309: -#line 2745 "mrbgems/mruby-compiler/core/parse.y" + case 381: /* block_param: f_arg ',' opt_block_args_tail */ +#line 3645 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = call_uni_op(p, new_nil(p), "!"); + (yyval.nd) = new_args(p, (yyvsp[-2].nd), 0, 0, 0, (yyvsp[0].nd)); } -#line 8065 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9944 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 310: -#line 2749 "mrbgems/mruby-compiler/core/parse.y" + case 382: /* block_param: f_arg ',' f_rest_arg ',' f_arg opt_block_args_tail */ +#line 3649 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_fcall(p, (yyvsp[-1].id), new_callargs(p, 0, 0, (yyvsp[0].nd))); + (yyval.nd) = new_args(p, (yyvsp[-5].nd), 0, (yyvsp[-3].id), (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 8073 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9952 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 312: -#line 2754 "mrbgems/mruby-compiler/core/parse.y" + case 383: /* block_param: f_arg opt_block_args_tail */ +#line 3653 "mrbgems/mruby-compiler/core/parse.y" { - call_with_block(p, (yyvsp[-1].nd), (yyvsp[0].nd)); - (yyval.nd) = (yyvsp[-1].nd); + (yyval.nd) = new_args(p, (yyvsp[-1].nd), 0, 0, 0, (yyvsp[0].nd)); } -#line 8082 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9960 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 313: -#line 2759 "mrbgems/mruby-compiler/core/parse.y" + case 384: /* block_param: f_block_optarg ',' f_rest_arg opt_block_args_tail */ +#line 3657 "mrbgems/mruby-compiler/core/parse.y" { - local_nest(p); - nvars_nest(p); - (yyval.num) = p->lpar_beg; - p->lpar_beg = ++p->paren_nest; + (yyval.nd) = new_args(p, 0, (yyvsp[-3].nd), (yyvsp[-1].id), 0, (yyvsp[0].nd)); } -#line 8093 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9968 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 314: -#line 2766 "mrbgems/mruby-compiler/core/parse.y" + case 385: /* block_param: f_block_optarg ',' f_rest_arg ',' f_arg opt_block_args_tail */ +#line 3661 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.stack) = p->cmdarg_stack; - p->cmdarg_stack = 0; + (yyval.nd) = new_args(p, 0, (yyvsp[-5].nd), (yyvsp[-3].id), (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 8102 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9976 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 315: -#line 2771 "mrbgems/mruby-compiler/core/parse.y" + case 386: /* block_param: f_block_optarg opt_block_args_tail */ +#line 3665 "mrbgems/mruby-compiler/core/parse.y" { - p->lpar_beg = (yyvsp[-3].num); - (yyval.nd) = new_lambda(p, (yyvsp[-2].nd), (yyvsp[0].nd)); - local_unnest(p); - nvars_unnest(p); - p->cmdarg_stack = (yyvsp[-1].stack); - CMDARG_LEXPOP(); + (yyval.nd) = new_args(p, 0, (yyvsp[-1].nd), 0, 0, (yyvsp[0].nd)); } -#line 8115 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9984 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 316: -#line 2783 "mrbgems/mruby-compiler/core/parse.y" + case 387: /* block_param: f_block_optarg ',' f_arg opt_block_args_tail */ +#line 3669 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_if(p, cond((yyvsp[-4].nd)), (yyvsp[-2].nd), (yyvsp[-1].nd)); - SET_LINENO((yyval.nd), (yyvsp[-5].num)); + (yyval.nd) = new_args(p, 0, (yyvsp[-3].nd), 0, (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 8124 "mrbgems/mruby-compiler/core/y.tab.c" +#line 9992 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 317: -#line 2791 "mrbgems/mruby-compiler/core/parse.y" + case 388: /* block_param: f_rest_arg opt_block_args_tail */ +#line 3673 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_unless(p, cond((yyvsp[-4].nd)), (yyvsp[-2].nd), (yyvsp[-1].nd)); - SET_LINENO((yyval.nd), (yyvsp[-5].num)); + (yyval.nd) = new_args(p, 0, 0, (yyvsp[-1].id), 0, (yyvsp[0].nd)); } -#line 8133 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10000 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 318: -#line 2795 "mrbgems/mruby-compiler/core/parse.y" - {COND_PUSH(1);} -#line 8139 "mrbgems/mruby-compiler/core/y.tab.c" + case 389: /* block_param: f_rest_arg ',' f_arg opt_block_args_tail */ +#line 3677 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_args(p, 0, 0, (yyvsp[-3].id), (yyvsp[-1].nd), (yyvsp[0].nd)); + } +#line 10008 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 319: -#line 2795 "mrbgems/mruby-compiler/core/parse.y" - {COND_POP();} -#line 8145 "mrbgems/mruby-compiler/core/y.tab.c" + case 390: /* block_param: block_args_tail */ +#line 3681 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_args(p, 0, 0, 0, 0, (yyvsp[0].nd)); + } +#line 10016 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 320: -#line 2798 "mrbgems/mruby-compiler/core/parse.y" + case 391: /* opt_block_param: none */ +#line 3687 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_while(p, cond((yyvsp[-4].nd)), (yyvsp[-1].nd)); - SET_LINENO((yyval.nd), (yyvsp[-6].num)); + local_add_blk(p); + (yyval.nd) = 0; } -#line 8154 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10025 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 321: -#line 2802 "mrbgems/mruby-compiler/core/parse.y" - {COND_PUSH(1);} -#line 8160 "mrbgems/mruby-compiler/core/y.tab.c" + case 392: /* opt_block_param: block_param_def */ +#line 3692 "mrbgems/mruby-compiler/core/parse.y" + { + p->cmd_start = TRUE; + (yyval.nd) = (yyvsp[0].nd); + } +#line 10034 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 322: -#line 2802 "mrbgems/mruby-compiler/core/parse.y" - {COND_POP();} -#line 8166 "mrbgems/mruby-compiler/core/y.tab.c" + case 393: /* $@27: %empty */ +#line 3698 "mrbgems/mruby-compiler/core/parse.y" + {local_add_blk(p);} +#line 10040 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 323: -#line 2805 "mrbgems/mruby-compiler/core/parse.y" + case 394: /* block_param_def: '|' $@27 opt_bv_decl '|' */ +#line 3699 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_until(p, cond((yyvsp[-4].nd)), (yyvsp[-1].nd)); - SET_LINENO((yyval.nd), (yyvsp[-6].num)); + (yyval.nd) = 0; } -#line 8175 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10048 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 324: -#line 2812 "mrbgems/mruby-compiler/core/parse.y" + case 395: /* block_param_def: "||" */ +#line 3703 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_case(p, (yyvsp[-3].nd), (yyvsp[-1].nd)); + local_add_blk(p); + (yyval.nd) = 0; } -#line 8183 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10057 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 325: -#line 2816 "mrbgems/mruby-compiler/core/parse.y" + case 396: /* block_param_def: '|' block_param opt_bv_decl '|' */ +#line 3708 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_case(p, 0, (yyvsp[-1].nd)); + (yyval.nd) = (yyvsp[-2].nd); } -#line 8191 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 326: -#line 2820 "mrbgems/mruby-compiler/core/parse.y" - {COND_PUSH(1);} -#line 8197 "mrbgems/mruby-compiler/core/y.tab.c" - break; - - case 327: -#line 2822 "mrbgems/mruby-compiler/core/parse.y" - {COND_POP();} -#line 8203 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10065 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 328: -#line 2825 "mrbgems/mruby-compiler/core/parse.y" + case 397: /* opt_bv_decl: opt_nl */ +#line 3714 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_for(p, (yyvsp[-7].nd), (yyvsp[-4].nd), (yyvsp[-1].nd)); - SET_LINENO((yyval.nd), (yyvsp[-8].num)); + (yyval.nd) = 0; } -#line 8212 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10073 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 329: -#line 2831 "mrbgems/mruby-compiler/core/parse.y" + case 398: /* opt_bv_decl: opt_nl ';' bv_decls opt_nl */ +#line 3718 "mrbgems/mruby-compiler/core/parse.y" { - if (p->in_def || p->in_single) - yyerror(p, "class definition in method body"); - (yyval.nd) = local_switch(p); - nvars_block(p); + (yyval.nd) = 0; } -#line 8223 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10081 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 330: -#line 2839 "mrbgems/mruby-compiler/core/parse.y" + case 401: /* bvar: "local variable or method" */ +#line 3728 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_class(p, (yyvsp[-4].nd), (yyvsp[-3].nd), (yyvsp[-1].nd)); - SET_LINENO((yyval.nd), (yyvsp[-5].num)); - local_resume(p, (yyvsp[-2].nd)); - nvars_unnest(p); + local_add_f(p, (yyvsp[0].id)); + new_bv(p, (yyvsp[0].id)); } -#line 8234 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10090 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 331: -#line 2847 "mrbgems/mruby-compiler/core/parse.y" + case 403: /* f_larglist: '(' f_args opt_bv_decl ')' */ +#line 3736 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.num) = p->in_def; - p->in_def = 0; + (yyval.nd) = (yyvsp[-2].nd); } -#line 8243 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10098 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 332: -#line 2852 "mrbgems/mruby-compiler/core/parse.y" + case 404: /* f_larglist: f_args */ +#line 3740 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = cons(local_switch(p), nint(p->in_single)); - nvars_block(p); - p->in_single = 0; + (yyval.nd) = (yyvsp[0].nd); } -#line 8253 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10106 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 333: -#line 2859 "mrbgems/mruby-compiler/core/parse.y" + case 405: /* lambda_body: tLAMBEG compstmt '}' */ +#line 3746 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_sclass(p, (yyvsp[-5].nd), (yyvsp[-1].nd)); - SET_LINENO((yyval.nd), (yyvsp[-7].num)); - local_resume(p, (yyvsp[-2].nd)->car); - nvars_unnest(p); - p->in_def = (yyvsp[-4].num); - p->in_single = intn((yyvsp[-2].nd)->cdr); + (yyval.nd) = (yyvsp[-1].nd); } -#line 8266 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10114 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 334: -#line 2869 "mrbgems/mruby-compiler/core/parse.y" + case 406: /* lambda_body: "'do' for lambda" bodystmt "'end'" */ +#line 3750 "mrbgems/mruby-compiler/core/parse.y" { - if (p->in_def || p->in_single) - yyerror(p, "module definition in method body"); - (yyval.nd) = local_switch(p); - nvars_block(p); + (yyval.nd) = (yyvsp[-1].nd); } -#line 8277 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10122 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 335: -#line 2877 "mrbgems/mruby-compiler/core/parse.y" + case 407: /* @28: %empty */ +#line 3756 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_module(p, (yyvsp[-3].nd), (yyvsp[-1].nd)); - SET_LINENO((yyval.nd), (yyvsp[-4].num)); - local_resume(p, (yyvsp[-2].nd)); - nvars_unnest(p); + local_nest(p); + nvars_nest(p); + (yyval.num) = p->lineno; } -#line 8288 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10132 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 336: -#line 2887 "mrbgems/mruby-compiler/core/parse.y" + case 408: /* do_block: "'do' for block" @28 opt_block_param bodystmt "'end'" */ +#line 3764 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-3].nd); - defn_setup(p, (yyval.nd), (yyvsp[-2].nd), (yyvsp[-1].nd)); + (yyval.nd) = new_block(p,(yyvsp[-2].nd),(yyvsp[-1].nd)); + SET_LINENO((yyval.nd), (yyvsp[-3].num)); + local_unnest(p); nvars_unnest(p); - p->in_def--; } -#line 8299 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10143 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 337: -#line 2897 "mrbgems/mruby-compiler/core/parse.y" + case 409: /* block_call: command do_block */ +#line 3773 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-3].nd); - defs_setup(p, (yyval.nd), (yyvsp[-2].nd), (yyvsp[-1].nd)); - nvars_unnest(p); - p->in_def--; - p->in_single--; + call_with_block(p, (yyvsp[-1].nd), (yyvsp[0].nd)); + (yyval.nd) = (yyvsp[-1].nd); } -#line 8311 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10152 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 338: -#line 2905 "mrbgems/mruby-compiler/core/parse.y" + case 410: /* block_call: block_call call_op2 operation2 opt_paren_args */ +#line 3778 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_break(p, 0); + (yyval.nd) = new_call(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].nd), (yyvsp[-2].num)); } -#line 8319 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10160 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 339: -#line 2909 "mrbgems/mruby-compiler/core/parse.y" + case 411: /* block_call: block_call call_op2 operation2 opt_paren_args brace_block */ +#line 3782 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_next(p, 0); + (yyval.nd) = new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), (yyvsp[-1].nd), (yyvsp[-3].num)); + call_with_block(p, (yyval.nd), (yyvsp[0].nd)); } -#line 8327 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10169 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 340: -#line 2913 "mrbgems/mruby-compiler/core/parse.y" + case 412: /* block_call: block_call call_op2 operation2 command_args do_block */ +#line 3787 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_redo(p); + (yyval.nd) = new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), (yyvsp[-1].nd), (yyvsp[-3].num)); + call_with_block(p, (yyval.nd), (yyvsp[0].nd)); } -#line 8335 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10178 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 341: -#line 2917 "mrbgems/mruby-compiler/core/parse.y" + case 413: /* method_call: operation paren_args */ +#line 3794 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_retry(p); + (yyval.nd) = new_fcall(p, (yyvsp[-1].id), (yyvsp[0].nd)); } -#line 8343 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10186 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 342: -#line 2923 "mrbgems/mruby-compiler/core/parse.y" + case 414: /* method_call: primary_value call_op operation2 opt_paren_args */ +#line 3798 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[0].nd); - if (!(yyval.nd)) (yyval.nd) = new_nil(p); + (yyval.nd) = new_call(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].nd), (yyvsp[-2].num)); } -#line 8352 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10194 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 349: -#line 2942 "mrbgems/mruby-compiler/core/parse.y" + case 415: /* method_call: primary_value "::" operation2 paren_args */ +#line 3802 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_if(p, cond((yyvsp[-3].nd)), (yyvsp[-1].nd), (yyvsp[0].nd)); + (yyval.nd) = new_call(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].nd), tCOLON2); } -#line 8360 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10202 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 351: -#line 2949 "mrbgems/mruby-compiler/core/parse.y" + case 416: /* method_call: primary_value "::" operation3 */ +#line 3806 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[0].nd); + (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, tCOLON2); } -#line 8368 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10210 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 352: -#line 2955 "mrbgems/mruby-compiler/core/parse.y" + case 417: /* method_call: primary_value call_op paren_args */ +#line 3810 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list1(list1((yyvsp[0].nd))); + (yyval.nd) = new_call(p, (yyvsp[-2].nd), MRB_SYM(call), (yyvsp[0].nd), (yyvsp[-1].num)); } -#line 8376 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10218 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 354: -#line 2962 "mrbgems/mruby-compiler/core/parse.y" + case 418: /* method_call: primary_value "::" paren_args */ +#line 3814 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list3((yyvsp[0].nd),0,0); + (yyval.nd) = new_call(p, (yyvsp[-2].nd), MRB_SYM(call), (yyvsp[0].nd), tCOLON2); } -#line 8384 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10226 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 355: -#line 2966 "mrbgems/mruby-compiler/core/parse.y" + case 419: /* method_call: "'super'" paren_args */ +#line 3818 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list3((yyvsp[-3].nd), new_arg(p, (yyvsp[0].id)), 0); + (yyval.nd) = new_super(p, (yyvsp[0].nd)); } -#line 8392 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10234 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 356: -#line 2970 "mrbgems/mruby-compiler/core/parse.y" + case 420: /* method_call: "'super'" */ +#line 3822 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list3((yyvsp[-5].nd), new_arg(p, (yyvsp[-2].id)), (yyvsp[0].nd)); + (yyval.nd) = new_zsuper(p); } -#line 8400 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10242 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 357: -#line 2974 "mrbgems/mruby-compiler/core/parse.y" + case 421: /* method_call: primary_value '[' opt_call_args ']' */ +#line 3826 "mrbgems/mruby-compiler/core/parse.y" { - local_add_f(p, intern_op(mul)); - (yyval.nd) = list3((yyvsp[-2].nd), nint(-1), 0); + (yyval.nd) = new_call(p, (yyvsp[-3].nd), intern_op(aref), (yyvsp[-1].nd), '.'); } -#line 8409 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10250 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 358: -#line 2979 "mrbgems/mruby-compiler/core/parse.y" + case 422: /* @29: %empty */ +#line 3832 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list3((yyvsp[-4].nd), nint(-1), (yyvsp[0].nd)); + local_nest(p); + nvars_nest(p); + (yyval.num) = p->lineno; } -#line 8417 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10260 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 359: -#line 2983 "mrbgems/mruby-compiler/core/parse.y" + case 423: /* brace_block: '{' @29 opt_block_param compstmt '}' */ +#line 3839 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list3(0, new_arg(p, (yyvsp[0].id)), 0); + (yyval.nd) = new_block(p,(yyvsp[-2].nd),(yyvsp[-1].nd)); + SET_LINENO((yyval.nd), (yyvsp[-3].num)); + local_unnest(p); + nvars_unnest(p); } -#line 8425 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10271 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 360: -#line 2987 "mrbgems/mruby-compiler/core/parse.y" + case 424: /* @30: %empty */ +#line 3846 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list3(0, new_arg(p, (yyvsp[-2].id)), (yyvsp[0].nd)); + local_nest(p); + nvars_nest(p); + (yyval.num) = p->lineno; } -#line 8433 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10281 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 361: -#line 2991 "mrbgems/mruby-compiler/core/parse.y" + case 425: /* brace_block: "'do'" @30 opt_block_param bodystmt "'end'" */ +#line 3853 "mrbgems/mruby-compiler/core/parse.y" { - local_add_f(p, intern_op(mul)); - (yyval.nd) = list3(0, nint(-1), 0); + (yyval.nd) = new_block(p,(yyvsp[-2].nd),(yyvsp[-1].nd)); + SET_LINENO((yyval.nd), (yyvsp[-3].num)); + local_unnest(p); + nvars_unnest(p); } -#line 8442 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10292 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 362: -#line 2996 "mrbgems/mruby-compiler/core/parse.y" + case 426: /* case_body: "'when'" args then compstmt cases */ +#line 3864 "mrbgems/mruby-compiler/core/parse.y" { - local_add_f(p, intern_op(mul)); + (yyval.nd) = cons(cons((yyvsp[-3].nd), (yyvsp[-1].nd)), (yyvsp[0].nd)); } -#line 8450 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10300 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 363: -#line 3000 "mrbgems/mruby-compiler/core/parse.y" + case 427: /* cases: opt_else */ +#line 3870 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = list3(0, nint(-1), (yyvsp[0].nd)); + if ((yyvsp[0].nd)) { + (yyval.nd) = cons(cons(0, (yyvsp[0].nd)), 0); + } + else { + (yyval.nd) = 0; + } } -#line 8458 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10313 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 364: -#line 3006 "mrbgems/mruby-compiler/core/parse.y" + case 429: /* in_clauses: opt_else */ +#line 3884 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args_tail(p, (yyvsp[-3].nd), (yyvsp[-1].nd), (yyvsp[0].id)); + (yyval.nd) = (yyvsp[0].nd) ? list1(new_in(p, NULL, NULL, (yyvsp[0].nd), FALSE)) : 0; } -#line 8466 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10321 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 365: -#line 3010 "mrbgems/mruby-compiler/core/parse.y" - { - (yyval.nd) = new_args_tail(p, (yyvsp[-1].nd), 0, (yyvsp[0].id)); - } -#line 8474 "mrbgems/mruby-compiler/core/y.tab.c" + case 430: /* $@31: %empty */ +#line 3887 "mrbgems/mruby-compiler/core/parse.y" + {p->in_kwarg--;} +#line 10327 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 366: -#line 3014 "mrbgems/mruby-compiler/core/parse.y" + case 431: /* in_clauses: "'in'" p_expr $@31 then compstmt in_clauses */ +#line 3888 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args_tail(p, 0, (yyvsp[-1].nd), (yyvsp[0].id)); + node *in_clause = new_in(p, (yyvsp[-4].nd), NULL, (yyvsp[-1].nd), FALSE); + (yyval.nd) = cons(in_clause, (yyvsp[0].nd)); } -#line 8482 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10336 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 367: -#line 3018 "mrbgems/mruby-compiler/core/parse.y" + case 432: /* $@32: %empty */ +#line 3892 "mrbgems/mruby-compiler/core/parse.y" + {p->in_kwarg--;} +#line 10342 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 433: /* in_clauses: "'in'" p_expr $@32 "'if' modifier" expr_value then compstmt in_clauses */ +#line 3893 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args_tail(p, 0, 0, (yyvsp[0].id)); + node *in_clause = new_in(p, (yyvsp[-6].nd), (yyvsp[-3].nd), (yyvsp[-1].nd), FALSE); + (yyval.nd) = cons(in_clause, (yyvsp[0].nd)); } -#line 8490 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10351 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 368: -#line 3024 "mrbgems/mruby-compiler/core/parse.y" + case 434: /* $@33: %empty */ +#line 3897 "mrbgems/mruby-compiler/core/parse.y" + {p->in_kwarg--;} +#line 10357 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 435: /* in_clauses: "'in'" p_expr $@33 "'unless' modifier" expr_value then compstmt in_clauses */ +#line 3898 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[0].nd); + node *in_clause = new_in(p, (yyvsp[-6].nd), (yyvsp[-3].nd), (yyvsp[-1].nd), TRUE); + (yyval.nd) = cons(in_clause, (yyvsp[0].nd)); } -#line 8498 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10366 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 369: -#line 3028 "mrbgems/mruby-compiler/core/parse.y" + case 437: /* p_expr: p_args_head p_as */ +#line 3909 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args_tail(p, 0, 0, 0); + (yyval.nd) = new_pat_array(p, push((yyvsp[-1].nd), (yyvsp[0].nd)), 0, 0); } -#line 8506 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10374 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 370: -#line 3034 "mrbgems/mruby-compiler/core/parse.y" + case 438: /* p_expr: p_args_head p_rest */ +#line 3913 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, (yyvsp[-5].nd), (yyvsp[-3].nd), (yyvsp[-1].id), 0, (yyvsp[0].nd)); + (yyval.nd) = new_pat_array(p, (yyvsp[-1].nd), (yyvsp[0].nd), 0); } -#line 8514 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10382 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 371: -#line 3038 "mrbgems/mruby-compiler/core/parse.y" + case 439: /* p_expr: p_args_head p_rest ',' p_args_post */ +#line 3917 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, (yyvsp[-7].nd), (yyvsp[-5].nd), (yyvsp[-3].id), (yyvsp[-1].nd), (yyvsp[0].nd)); + (yyval.nd) = new_pat_array(p, (yyvsp[-3].nd), (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 8522 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10390 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 372: -#line 3042 "mrbgems/mruby-compiler/core/parse.y" + case 440: /* p_expr: p_rest */ +#line 3921 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, (yyvsp[-3].nd), (yyvsp[-1].nd), 0, 0, (yyvsp[0].nd)); + (yyval.nd) = new_pat_array(p, 0, (yyvsp[0].nd), 0); } -#line 8530 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10398 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 373: -#line 3046 "mrbgems/mruby-compiler/core/parse.y" + case 441: /* p_expr: p_rest ',' p_args_post */ +#line 3925 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, (yyvsp[-5].nd), (yyvsp[-3].nd), 0, (yyvsp[-1].nd), (yyvsp[0].nd)); + (yyval.nd) = new_pat_array(p, 0, (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 8538 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10406 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 374: -#line 3050 "mrbgems/mruby-compiler/core/parse.y" + case 442: /* p_expr: p_hash_elems */ +#line 3929 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, (yyvsp[-3].nd), 0, (yyvsp[-1].id), 0, (yyvsp[0].nd)); + /* Brace-less hash pattern: in a:, b: x */ + (yyval.nd) = new_pat_hash(p, (yyvsp[0].nd), 0); } -#line 8546 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10415 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 375: -#line 3054 "mrbgems/mruby-compiler/core/parse.y" + case 443: /* p_expr: p_hash_elems ',' p_kwrest */ +#line 3934 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, (yyvsp[-2].nd), 0, 0, 0, (yyvsp[0].nd)); + /* Brace-less hash pattern with kwrest: in a:, **rest */ + (yyval.nd) = new_pat_hash(p, (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 8554 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10424 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 376: -#line 3058 "mrbgems/mruby-compiler/core/parse.y" + case 444: /* p_expr: p_kwrest */ +#line 3939 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, (yyvsp[-5].nd), 0, (yyvsp[-3].id), (yyvsp[-1].nd), (yyvsp[0].nd)); + /* Brace-less kwrest only: in **rest */ + (yyval.nd) = new_pat_hash(p, 0, (yyvsp[0].nd)); } -#line 8562 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10433 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 377: -#line 3062 "mrbgems/mruby-compiler/core/parse.y" + case 445: /* p_args_head: p_as ',' */ +#line 3947 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, (yyvsp[-1].nd), 0, 0, 0, (yyvsp[0].nd)); + (yyval.nd) = list1((yyvsp[-1].nd)); } -#line 8570 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10441 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 378: -#line 3066 "mrbgems/mruby-compiler/core/parse.y" + case 446: /* p_args_head: p_args_head p_as ',' */ +#line 3951 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, 0, (yyvsp[-3].nd), (yyvsp[-1].id), 0, (yyvsp[0].nd)); + (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[-1].nd)); } -#line 8578 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10449 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 379: -#line 3070 "mrbgems/mruby-compiler/core/parse.y" + case 447: /* p_args_post: p_as */ +#line 3958 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, 0, (yyvsp[-5].nd), (yyvsp[-3].id), (yyvsp[-1].nd), (yyvsp[0].nd)); + (yyval.nd) = list1((yyvsp[0].nd)); } -#line 8586 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10457 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 380: -#line 3074 "mrbgems/mruby-compiler/core/parse.y" + case 448: /* p_args_post: p_args_post ',' p_as */ +#line 3962 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, 0, (yyvsp[-1].nd), 0, 0, (yyvsp[0].nd)); + (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 8594 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10465 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 381: -#line 3078 "mrbgems/mruby-compiler/core/parse.y" + case 450: /* p_as: p_alt "=>" "local variable or method" */ +#line 3969 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, 0, (yyvsp[-3].nd), 0, (yyvsp[-1].nd), (yyvsp[0].nd)); + (yyval.nd) = new_pat_as(p, (yyvsp[-2].nd), (yyvsp[0].id)); } -#line 8602 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10473 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 382: -#line 3082 "mrbgems/mruby-compiler/core/parse.y" + case 452: /* p_alt: p_alt '|' p_value */ +#line 3976 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, 0, 0, (yyvsp[-1].id), 0, (yyvsp[0].nd)); + (yyval.nd) = new_pat_alt(p, (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 8610 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10481 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 383: -#line 3086 "mrbgems/mruby-compiler/core/parse.y" + case 454: /* p_value: numeric */ +#line 3983 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, 0, 0, (yyvsp[-3].id), (yyvsp[-1].nd), (yyvsp[0].nd)); + (yyval.nd) = new_pat_value(p, (yyvsp[0].nd)); } -#line 8618 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10489 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 384: -#line 3090 "mrbgems/mruby-compiler/core/parse.y" + case 455: /* p_value: symbol */ +#line 3987 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args(p, 0, 0, 0, 0, (yyvsp[0].nd)); + (yyval.nd) = new_pat_value(p, (yyvsp[0].nd)); } -#line 8626 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10497 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 385: -#line 3096 "mrbgems/mruby-compiler/core/parse.y" + case 456: /* p_value: string */ +#line 3991 "mrbgems/mruby-compiler/core/parse.y" { - local_add_blk(p, 0); - (yyval.nd) = 0; + (yyval.nd) = new_pat_value(p, new_str(p, (yyvsp[0].nd))); } -#line 8635 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10505 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 386: -#line 3101 "mrbgems/mruby-compiler/core/parse.y" - { - p->cmd_start = TRUE; - (yyval.nd) = (yyvsp[0].nd); + case 457: /* p_value: "'nil'" */ +#line 3995 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_pat_value(p, new_nil(p)); } -#line 8644 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10513 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 387: -#line 3107 "mrbgems/mruby-compiler/core/parse.y" - {local_add_blk(p, 0);} -#line 8650 "mrbgems/mruby-compiler/core/y.tab.c" + case 458: /* p_value: "'true'" */ +#line 3999 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_pat_value(p, new_true(p)); + } +#line 10521 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 388: -#line 3108 "mrbgems/mruby-compiler/core/parse.y" + case 459: /* p_value: "'false'" */ +#line 4003 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = 0; + (yyval.nd) = new_pat_value(p, new_false(p)); } -#line 8658 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10529 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 389: -#line 3112 "mrbgems/mruby-compiler/core/parse.y" + case 460: /* p_value: p_const */ +#line 4007 "mrbgems/mruby-compiler/core/parse.y" { - local_add_blk(p, 0); - (yyval.nd) = 0; + (yyval.nd) = new_pat_value(p, (yyvsp[0].nd)); } -#line 8667 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10537 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 390: -#line 3117 "mrbgems/mruby-compiler/core/parse.y" + case 463: /* p_value: '^' "local variable or method" */ +#line 4013 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-2].nd); + (yyval.nd) = new_pat_pin(p, (yyvsp[0].id)); } -#line 8675 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10545 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 391: -#line 3124 "mrbgems/mruby-compiler/core/parse.y" + case 464: /* p_array: "[" p_array_body ']' */ +#line 4020 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = 0; + (yyval.nd) = (yyvsp[-1].nd); } -#line 8683 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10553 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 392: -#line 3128 "mrbgems/mruby-compiler/core/parse.y" + case 465: /* p_array: "[" ']' */ +#line 4024 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = 0; + (yyval.nd) = new_pat_array(p, 0, 0, 0); } -#line 8691 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10561 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 395: -#line 3138 "mrbgems/mruby-compiler/core/parse.y" + case 466: /* p_array_body: p_array_elems */ +#line 4031 "mrbgems/mruby-compiler/core/parse.y" { - local_add_f(p, (yyvsp[0].id)); - new_bv(p, (yyvsp[0].id)); + /* Just pre elements, no rest */ + (yyval.nd) = new_pat_array(p, (yyvsp[0].nd), 0, 0); } -#line 8700 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10570 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 397: -#line 3146 "mrbgems/mruby-compiler/core/parse.y" + case 467: /* p_array_body: p_array_elems ',' p_rest */ +#line 4036 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-2].nd); + /* Pre elements + rest, no post */ + (yyval.nd) = new_pat_array(p, (yyvsp[-2].nd), (yyvsp[0].nd), 0); } -#line 8708 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10579 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 398: -#line 3150 "mrbgems/mruby-compiler/core/parse.y" + case 468: /* p_array_body: p_array_elems ',' p_rest ',' p_array_elems */ +#line 4041 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[0].nd); + /* Pre + rest + post */ + (yyval.nd) = new_pat_array(p, (yyvsp[-4].nd), (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 8716 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10588 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 399: -#line 3156 "mrbgems/mruby-compiler/core/parse.y" + case 469: /* p_array_body: p_rest */ +#line 4046 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-1].nd); + /* Just rest, no pre or post */ + (yyval.nd) = new_pat_array(p, 0, (yyvsp[0].nd), 0); } -#line 8724 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10597 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 400: -#line 3160 "mrbgems/mruby-compiler/core/parse.y" + case 470: /* p_array_body: p_rest ',' p_array_elems */ +#line 4051 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[-1].nd); + /* Rest + post, no pre */ + (yyval.nd) = new_pat_array(p, 0, (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 8732 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10606 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 401: -#line 3166 "mrbgems/mruby-compiler/core/parse.y" + case 471: /* p_array_body: p_rest ',' p_array_elems ',' p_rest */ +#line 4056 "mrbgems/mruby-compiler/core/parse.y" { - local_nest(p); - nvars_nest(p); + /* Find pattern: [*pre, elems, *post] */ + (yyval.nd) = new_pat_find(p, (yyvsp[-4].nd), (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 8741 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10615 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 402: -#line 3173 "mrbgems/mruby-compiler/core/parse.y" + case 472: /* p_array_elems: p_as */ +#line 4064 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_block(p,(yyvsp[-2].nd),(yyvsp[-1].nd)); - local_unnest(p); - nvars_unnest(p); + (yyval.nd) = list1((yyvsp[0].nd)); } -#line 8751 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10623 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 403: -#line 3181 "mrbgems/mruby-compiler/core/parse.y" + case 473: /* p_array_elems: p_array_elems ',' p_as */ +#line 4068 "mrbgems/mruby-compiler/core/parse.y" { - if (typen((yyvsp[-1].nd)->car) == NODE_YIELD) { - yyerror(p, "block given to yield"); - } - else { - call_with_block(p, (yyvsp[-1].nd), (yyvsp[0].nd)); - } - (yyval.nd) = (yyvsp[-1].nd); + (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 8765 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10631 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 404: -#line 3191 "mrbgems/mruby-compiler/core/parse.y" + case 474: /* p_rest: "*" "local variable or method" */ +#line 4075 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].nd), (yyvsp[-2].num)); + (yyval.nd) = new_pat_var(p, (yyvsp[0].id)); } -#line 8773 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10639 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 405: -#line 3195 "mrbgems/mruby-compiler/core/parse.y" + case 475: /* p_rest: "*" */ +#line 4079 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), (yyvsp[-1].nd), (yyvsp[-3].num)); - call_with_block(p, (yyval.nd), (yyvsp[0].nd)); + /* Anonymous rest pattern */ + (yyval.nd) = (node*)-1; } -#line 8782 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10648 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 406: -#line 3200 "mrbgems/mruby-compiler/core/parse.y" + case 476: /* p_const: "constant" */ +#line 4087 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-4].nd), (yyvsp[-2].id), (yyvsp[-1].nd), (yyvsp[-3].num)); - call_with_block(p, (yyval.nd), (yyvsp[0].nd)); + (yyval.nd) = new_const(p, (yyvsp[0].id)); } -#line 8791 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10656 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 407: -#line 3207 "mrbgems/mruby-compiler/core/parse.y" + case 477: /* p_const: p_const "::" "constant" */ +#line 4091 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_fcall(p, (yyvsp[-1].id), (yyvsp[0].nd)); + (yyval.nd) = new_colon2(p, (yyvsp[-2].nd), (yyvsp[0].id)); } -#line 8799 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10664 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 408: -#line 3211 "mrbgems/mruby-compiler/core/parse.y" + case 478: /* p_const: tCOLON3 "constant" */ +#line 4095 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].nd), (yyvsp[-2].num)); + (yyval.nd) = new_colon3(p, (yyvsp[0].id)); } -#line 8807 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10672 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 409: -#line 3215 "mrbgems/mruby-compiler/core/parse.y" + case 479: /* p_hash: tLBRACE p_hash_body '}' */ +#line 4102 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].nd), tCOLON2); + (yyval.nd) = (yyvsp[-1].nd); } -#line 8815 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10680 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 410: -#line 3219 "mrbgems/mruby-compiler/core/parse.y" + case 480: /* p_hash: tLBRACE '}' */ +#line 4106 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-2].nd), (yyvsp[0].id), 0, tCOLON2); + (yyval.nd) = new_pat_hash(p, 0, 0); } -#line 8823 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10688 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 411: -#line 3223 "mrbgems/mruby-compiler/core/parse.y" + case 481: /* p_hash_body: p_hash_elems */ +#line 4113 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-2].nd), MRB_SYM_2(p->mrb, call), (yyvsp[0].nd), (yyvsp[-1].num)); + (yyval.nd) = new_pat_hash(p, (yyvsp[0].nd), 0); } -#line 8831 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10696 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 412: -#line 3227 "mrbgems/mruby-compiler/core/parse.y" + case 482: /* p_hash_body: p_hash_elems ',' p_kwrest */ +#line 4117 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-2].nd), MRB_SYM_2(p->mrb, call), (yyvsp[0].nd), tCOLON2); + (yyval.nd) = new_pat_hash(p, (yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 8839 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10704 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 413: -#line 3231 "mrbgems/mruby-compiler/core/parse.y" + case 483: /* p_hash_body: p_kwrest */ +#line 4121 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_super(p, (yyvsp[0].nd)); + (yyval.nd) = new_pat_hash(p, 0, (yyvsp[0].nd)); } -#line 8847 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10712 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 414: -#line 3235 "mrbgems/mruby-compiler/core/parse.y" + case 484: /* p_hash_elems: p_hash_elem */ +#line 4128 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_zsuper(p); + (yyval.nd) = list1((yyvsp[0].nd)); } -#line 8855 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10720 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 415: -#line 3239 "mrbgems/mruby-compiler/core/parse.y" + case 485: /* p_hash_elems: p_hash_elems ',' p_hash_elem */ +#line 4132 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_call(p, (yyvsp[-3].nd), intern_op(aref), (yyvsp[-1].nd), '.'); + (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 8863 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10728 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 416: -#line 3245 "mrbgems/mruby-compiler/core/parse.y" + case 486: /* p_hash_elem: "local variable or method" "label" p_as */ +#line 4141 "mrbgems/mruby-compiler/core/parse.y" { - local_nest(p); - nvars_nest(p); - (yyval.num) = p->lineno; + /* {key: pattern} */ + (yyval.nd) = cons(new_sym(p, (yyvsp[-2].id)), (yyvsp[0].nd)); } -#line 8873 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10737 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 417: -#line 3252 "mrbgems/mruby-compiler/core/parse.y" + case 487: /* p_hash_elem: "local variable or method" "label" */ +#line 4146 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_block(p,(yyvsp[-2].nd),(yyvsp[-1].nd)); - SET_LINENO((yyval.nd), (yyvsp[-3].num)); - local_unnest(p); - nvars_unnest(p); + /* {key:} shorthand - binds to variable with same name */ + (yyval.nd) = cons(new_sym(p, (yyvsp[-1].id)), new_pat_var(p, (yyvsp[-1].id))); } -#line 8884 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10746 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 418: -#line 3259 "mrbgems/mruby-compiler/core/parse.y" + case 488: /* p_kwrest: "**" "local variable or method" */ +#line 4154 "mrbgems/mruby-compiler/core/parse.y" { - local_nest(p); - nvars_nest(p); - (yyval.num) = p->lineno; + (yyval.nd) = new_pat_var(p, (yyvsp[0].id)); } -#line 8894 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10754 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 419: -#line 3266 "mrbgems/mruby-compiler/core/parse.y" + case 489: /* p_kwrest: "**" "'nil'" */ +#line 4158 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_block(p,(yyvsp[-2].nd),(yyvsp[-1].nd)); - SET_LINENO((yyval.nd), (yyvsp[-3].num)); - local_unnest(p); - nvars_unnest(p); + /* **nil - exact match, no extra keys allowed */ + (yyval.nd) = (node*)-1; } -#line 8905 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10763 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 420: -#line 3277 "mrbgems/mruby-compiler/core/parse.y" + case 490: /* p_kwrest: "**" */ +#line 4163 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = cons(cons((yyvsp[-3].nd), (yyvsp[-1].nd)), (yyvsp[0].nd)); + /* ** - anonymous rest, discards extra keys */ + (yyval.nd) = (node*)-2; } -#line 8913 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10772 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 421: -#line 3283 "mrbgems/mruby-compiler/core/parse.y" + case 491: /* p_var: "local variable or method" */ +#line 4170 "mrbgems/mruby-compiler/core/parse.y" { - if ((yyvsp[0].nd)) { - (yyval.nd) = cons(cons(0, (yyvsp[0].nd)), 0); - } - else { - (yyval.nd) = 0; - } + (yyval.nd) = new_pat_var(p, (yyvsp[0].id)); } -#line 8926 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10780 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 423: -#line 3297 "mrbgems/mruby-compiler/core/parse.y" + case 492: /* opt_rescue: "'rescue'" exc_list exc_var then compstmt opt_rescue */ +#line 4178 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = list1(list3((yyvsp[-4].nd), (yyvsp[-3].nd), (yyvsp[-1].nd))); if ((yyvsp[0].nd)) (yyval.nd) = append((yyval.nd), (yyvsp[0].nd)); } -#line 8935 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10789 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 425: -#line 3305 "mrbgems/mruby-compiler/core/parse.y" + case 494: /* exc_list: arg */ +#line 4186 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = list1((yyvsp[0].nd)); } -#line 8943 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10797 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 428: -#line 3313 "mrbgems/mruby-compiler/core/parse.y" + case 497: /* exc_var: "=>" lhs */ +#line 4194 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = (yyvsp[0].nd); } -#line 8951 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10805 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 430: -#line 3320 "mrbgems/mruby-compiler/core/parse.y" + case 499: /* opt_ensure: "'ensure'" compstmt */ +#line 4201 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = (yyvsp[0].nd); } -#line 8959 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10813 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 437: -#line 3334 "mrbgems/mruby-compiler/core/parse.y" + case 506: /* string: string string_fragment */ +#line 4215 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = concat_string(p, (yyvsp[-1].nd), (yyvsp[0].nd)); + (yyval.nd) = append((yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 8967 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10821 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 440: -#line 3342 "mrbgems/mruby-compiler/core/parse.y" + case 507: /* string_fragment: "character literal" */ +#line 4221 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[0].nd); + /* tCHAR is (len . str), wrap as cons list */ + (yyval.nd) = list1((yyvsp[0].nd)); } -#line 8975 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10830 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 441: -#line 3346 "mrbgems/mruby-compiler/core/parse.y" + case 508: /* string_fragment: tSTRING */ +#line 4226 "mrbgems/mruby-compiler/core/parse.y" { - node *n = (yyvsp[-1].nd); - if (intn((yyvsp[0].nd)->cdr->cdr) > 0) { - n = push(n, (yyvsp[0].nd)); - } - else { - cons_free((yyvsp[0].nd)); - } - (yyval.nd) = new_dstr(p, n); + /* tSTRING is (len . str), wrap as cons list */ + (yyval.nd) = list1((yyvsp[0].nd)); + } +#line 10839 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 509: /* string_fragment: "string literal" tSTRING */ +#line 4231 "mrbgems/mruby-compiler/core/parse.y" + { + /* $2 is (len . str), wrap as cons list */ + (yyval.nd) = list1((yyvsp[0].nd)); + } +#line 10848 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 510: /* string_fragment: "string literal" string_rep tSTRING */ +#line 4236 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = push((yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 8990 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10856 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 443: -#line 3360 "mrbgems/mruby-compiler/core/parse.y" + case 512: /* string_rep: string_rep string_interp */ +#line 4243 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = append((yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 8998 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10864 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 444: -#line 3366 "mrbgems/mruby-compiler/core/parse.y" + case 513: /* string_interp: tSTRING_MID */ +#line 4249 "mrbgems/mruby-compiler/core/parse.y" { + /* $1 is already in (len . str) format */ (yyval.nd) = list1((yyvsp[0].nd)); } -#line 9006 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10873 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 445: -#line 3370 "mrbgems/mruby-compiler/core/parse.y" + case 514: /* @34: %empty */ +#line 4254 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = push_strterm(p); } -#line 9014 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10881 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 446: -#line 3375 "mrbgems/mruby-compiler/core/parse.y" + case 515: /* string_interp: tSTRING_PART @34 compstmt '}' */ +#line 4259 "mrbgems/mruby-compiler/core/parse.y" { pop_strterm(p,(yyvsp[-2].nd)); - (yyval.nd) = list2((yyvsp[-3].nd), (yyvsp[-1].nd)); + /* $1 is already in (len . str) format, create (-1 . node) for expression */ + node *expr_elem = cons(int_to_node(-1), (yyvsp[-1].nd)); + (yyval.nd) = list2((yyvsp[-3].nd), expr_elem); } -#line 9023 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10892 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 447: -#line 3380 "mrbgems/mruby-compiler/core/parse.y" + case 516: /* string_interp: tLITERAL_DELIM */ +#line 4266 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = list1(new_literal_delim(p)); } -#line 9031 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10900 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 448: -#line 3384 "mrbgems/mruby-compiler/core/parse.y" + case 517: /* string_interp: tHD_LITERAL_DELIM heredoc_bodies */ +#line 4270 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = list1(new_literal_delim(p)); } -#line 9039 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10908 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 449: -#line 3390 "mrbgems/mruby-compiler/core/parse.y" + case 518: /* xstring: tXSTRING_BEG tXSTRING */ +#line 4276 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[0].nd); + (yyval.nd) = cons((yyvsp[0].nd), (node*)NULL); } -#line 9047 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10916 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 450: -#line 3394 "mrbgems/mruby-compiler/core/parse.y" + case 519: /* xstring: tXSTRING_BEG string_rep tXSTRING */ +#line 4280 "mrbgems/mruby-compiler/core/parse.y" { - node *n = (yyvsp[-1].nd); - if (intn((yyvsp[0].nd)->cdr->cdr) > 0) { - n = push(n, (yyvsp[0].nd)); - } - else { - cons_free((yyvsp[0].nd)); - } - (yyval.nd) = new_dxstr(p, n); + (yyval.nd) = push((yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 9062 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10924 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 451: -#line 3407 "mrbgems/mruby-compiler/core/parse.y" + case 520: /* regexp: tREGEXP_BEG tREGEXP */ +#line 4286 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = (yyvsp[0].nd); + node *data = (yyvsp[0].nd); /* ((len . pattern) . (flags . encoding)) */ + const char *flags = (const char*)data->cdr->car; + const char *encoding = (const char*)data->cdr->cdr; + /* Use data->car directly as pattern_list: (len . pattern) */ + node *pattern_list = cons(data->car, (node*)NULL); + (yyval.nd) = new_regx(p, pattern_list, flags, encoding); } -#line 9070 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10937 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 452: -#line 3411 "mrbgems/mruby-compiler/core/parse.y" + case 521: /* regexp: tREGEXP_BEG string_rep tREGEXP */ +#line 4295 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_dregx(p, (yyvsp[-1].nd), (yyvsp[0].nd)); + node *data = (yyvsp[0].nd); /* ((len . pattern) . (flags . encoding)) */ + const char *flags = (const char*)data->cdr->car; + const char *encoding = (const char*)data->cdr->cdr; + /* Append the pattern from $3->car to the string list $2 */ + node *complete_list = push((yyvsp[-1].nd), data->car); + (yyval.nd) = new_regx(p, complete_list, flags, encoding); } -#line 9078 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10950 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 456: -#line 3424 "mrbgems/mruby-compiler/core/parse.y" + case 525: /* heredoc_body: tHEREDOC_END */ +#line 4313 "mrbgems/mruby-compiler/core/parse.y" { parser_heredoc_info *info = parsing_heredoc_info(p); - info->doc = push(info->doc, new_str(p, "", 0)); + info->doc = push(info->doc, new_str_empty(p)); heredoc_end(p); } -#line 9088 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10960 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 457: -#line 3430 "mrbgems/mruby-compiler/core/parse.y" + case 526: /* heredoc_body: heredoc_string_rep tHEREDOC_END */ +#line 4319 "mrbgems/mruby-compiler/core/parse.y" { heredoc_end(p); } -#line 9096 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10968 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 460: -#line 3440 "mrbgems/mruby-compiler/core/parse.y" + case 529: /* heredoc_string_interp: tHD_STRING_MID */ +#line 4329 "mrbgems/mruby-compiler/core/parse.y" { parser_heredoc_info *info = parsing_heredoc_info(p); info->doc = push(info->doc, (yyvsp[0].nd)); heredoc_treat_nextline(p); } -#line 9106 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10978 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 461: -#line 3446 "mrbgems/mruby-compiler/core/parse.y" + case 530: /* @35: %empty */ +#line 4335 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = push_strterm(p); } -#line 9114 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10986 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 462: -#line 3451 "mrbgems/mruby-compiler/core/parse.y" + case 531: /* heredoc_string_interp: tHD_STRING_PART @35 compstmt '}' */ +#line 4340 "mrbgems/mruby-compiler/core/parse.y" { pop_strterm(p, (yyvsp[-2].nd)); parser_heredoc_info *info = parsing_heredoc_info(p); - info->doc = push(push(info->doc, (yyvsp[-3].nd)), (yyvsp[-1].nd)); + /* $1 is already in (len . str) format, create (-1 . node) for expression */ + node *expr_elem = cons(int_to_node(-1), (yyvsp[-1].nd)); + info->doc = push(push(info->doc, (yyvsp[-3].nd)), expr_elem); } -#line 9124 "mrbgems/mruby-compiler/core/y.tab.c" +#line 10998 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 463: -#line 3459 "mrbgems/mruby-compiler/core/parse.y" + case 532: /* words: tWORDS_BEG tSTRING */ +#line 4350 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_words(p, list1((yyvsp[0].nd))); } -#line 9132 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11006 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 464: -#line 3463 "mrbgems/mruby-compiler/core/parse.y" + case 533: /* words: tWORDS_BEG string_rep tSTRING */ +#line 4354 "mrbgems/mruby-compiler/core/parse.y" { node *n = (yyvsp[-1].nd); - if (intn((yyvsp[0].nd)->cdr->cdr) > 0) { - n = push(n, (yyvsp[0].nd)); - } - else { - cons_free((yyvsp[0].nd)); - } + n = push(n, (yyvsp[0].nd)); (yyval.nd) = new_words(p, n); } -#line 9147 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11016 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 465: -#line 3477 "mrbgems/mruby-compiler/core/parse.y" + case 534: /* symbol: basic_symbol */ +#line 4362 "mrbgems/mruby-compiler/core/parse.y" { - p->lstate = EXPR_ENDARG; (yyval.nd) = new_sym(p, (yyvsp[0].id)); } -#line 9156 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11024 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 466: -#line 3482 "mrbgems/mruby-compiler/core/parse.y" + case 535: /* symbol: "symbol" "string literal" string_rep tSTRING */ +#line 4366 "mrbgems/mruby-compiler/core/parse.y" { node *n = (yyvsp[-1].nd); p->lstate = EXPR_ENDARG; - if (intn((yyvsp[0].nd)->cdr->cdr) > 0) { + if (node_to_int((yyvsp[0].nd)->car) > 0) { n = push(n, (yyvsp[0].nd)); } else { cons_free((yyvsp[0].nd)); } - (yyval.nd) = new_dsym(p, new_dstr(p, n)); + (yyval.nd) = new_dsym(p, n); } -#line 9172 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11040 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 467: -#line 3496 "mrbgems/mruby-compiler/core/parse.y" + case 536: /* symbol: "symbol" "numbered parameter" */ +#line 4378 "mrbgems/mruby-compiler/core/parse.y" + { + mrb_sym sym = intern_numparam((yyvsp[0].num)); + (yyval.nd) = new_sym(p, sym); + } +#line 11049 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 537: /* basic_symbol: "symbol" sym */ +#line 4385 "mrbgems/mruby-compiler/core/parse.y" { + p->lstate = EXPR_END; (yyval.id) = (yyvsp[0].id); } -#line 9180 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11058 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 472: -#line 3506 "mrbgems/mruby-compiler/core/parse.y" + case 542: /* sym: tSTRING */ +#line 4396 "mrbgems/mruby-compiler/core/parse.y" { (yyval.id) = new_strsym(p, (yyvsp[0].nd)); } -#line 9188 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11066 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 473: -#line 3510 "mrbgems/mruby-compiler/core/parse.y" + case 543: /* sym: "string literal" tSTRING */ +#line 4400 "mrbgems/mruby-compiler/core/parse.y" { (yyval.id) = new_strsym(p, (yyvsp[0].nd)); } -#line 9196 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11074 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 474: -#line 3516 "mrbgems/mruby-compiler/core/parse.y" + case 544: /* symbols: tSYMBOLS_BEG tSTRING */ +#line 4406 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_symbols(p, list1((yyvsp[0].nd))); } -#line 9204 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11082 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 475: -#line 3520 "mrbgems/mruby-compiler/core/parse.y" + case 545: /* symbols: tSYMBOLS_BEG string_rep tSTRING */ +#line 4410 "mrbgems/mruby-compiler/core/parse.y" { node *n = (yyvsp[-1].nd); - if (intn((yyvsp[0].nd)->cdr->cdr) > 0) { - n = push(n, (yyvsp[0].nd)); - } + n = push(n, (yyvsp[0].nd)); (yyval.nd) = new_symbols(p, n); } -#line 9216 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11092 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 478: -#line 3532 "mrbgems/mruby-compiler/core/parse.y" + case 548: /* numeric: tUMINUS_NUM "integer literal" */ +#line 4420 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_negate(p, (yyvsp[0].nd)); } -#line 9224 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11100 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 479: -#line 3536 "mrbgems/mruby-compiler/core/parse.y" + case 549: /* numeric: tUMINUS_NUM "float literal" */ +#line 4424 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_negate(p, (yyvsp[0].nd)); } -#line 9232 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11108 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 480: -#line 3542 "mrbgems/mruby-compiler/core/parse.y" + case 550: /* variable: "local variable or method" */ +#line 4430 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_lvar(p, (yyvsp[0].id)); } -#line 9240 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11116 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 481: -#line 3546 "mrbgems/mruby-compiler/core/parse.y" + case 551: /* variable: "instance variable" */ +#line 4434 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_ivar(p, (yyvsp[0].id)); } -#line 9248 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11124 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 482: -#line 3550 "mrbgems/mruby-compiler/core/parse.y" + case 552: /* variable: "global variable" */ +#line 4438 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_gvar(p, (yyvsp[0].id)); } -#line 9256 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11132 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 483: -#line 3554 "mrbgems/mruby-compiler/core/parse.y" + case 553: /* variable: "class variable" */ +#line 4442 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_cvar(p, (yyvsp[0].id)); } -#line 9264 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11140 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 484: -#line 3558 "mrbgems/mruby-compiler/core/parse.y" + case 554: /* variable: "constant" */ +#line 4446 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_const(p, (yyvsp[0].id)); } -#line 9272 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11148 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 485: -#line 3564 "mrbgems/mruby-compiler/core/parse.y" + case 555: /* var_lhs: variable */ +#line 4452 "mrbgems/mruby-compiler/core/parse.y" { assignable(p, (yyvsp[0].nd)); } -#line 9280 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11156 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 486: -#line 3568 "mrbgems/mruby-compiler/core/parse.y" + case 556: /* var_lhs: "numbered parameter" */ +#line 4456 "mrbgems/mruby-compiler/core/parse.y" { - yyerror(p, "can't assign to numbered parameter"); + yyerror(&(yylsp[0]), p, "can't assign to numbered parameter"); } -#line 9288 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11164 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 487: -#line 3574 "mrbgems/mruby-compiler/core/parse.y" + case 557: /* var_ref: variable */ +#line 4462 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = var_reference(p, (yyvsp[0].nd)); } -#line 9296 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11172 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 488: -#line 3578 "mrbgems/mruby-compiler/core/parse.y" + case 558: /* var_ref: "numbered parameter" */ +#line 4466 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_nvar(p, (yyvsp[0].num)); + } +#line 11180 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 559: /* var_ref: "'nil'" */ +#line 4470 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_nil(p); } -#line 9304 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11188 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 489: -#line 3582 "mrbgems/mruby-compiler/core/parse.y" + case 560: /* var_ref: "'self'" */ +#line 4474 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_self(p); } -#line 9312 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11196 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 490: -#line 3586 "mrbgems/mruby-compiler/core/parse.y" + case 561: /* var_ref: "'true'" */ +#line 4478 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_true(p); } -#line 9320 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11204 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 491: -#line 3590 "mrbgems/mruby-compiler/core/parse.y" + case 562: /* var_ref: "'false'" */ +#line 4482 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_false(p); } -#line 9328 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11212 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 492: -#line 3594 "mrbgems/mruby-compiler/core/parse.y" + case 563: /* var_ref: "'__FILE__'" */ +#line 4486 "mrbgems/mruby-compiler/core/parse.y" { const char *fn = mrb_sym_name_len(p->mrb, p->filename_sym, NULL); if (!fn) { fn = "(null)"; } - (yyval.nd) = new_str(p, fn, strlen(fn)); + (yyval.nd) = new_str(p, cons(cons(int_to_node(strlen(fn)), (node*)fn), (node*)NULL)); } -#line 9340 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11224 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 493: -#line 3602 "mrbgems/mruby-compiler/core/parse.y" + case 564: /* var_ref: "'__LINE__'" */ +#line 4494 "mrbgems/mruby-compiler/core/parse.y" { char buf[16]; dump_int(p->lineno, buf); (yyval.nd) = new_int(p, buf, 10, 0); } -#line 9351 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11235 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 494: -#line 3609 "mrbgems/mruby-compiler/core/parse.y" + case 565: /* var_ref: "'__ENCODING__'" */ +#line 4501 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_fcall(p, MRB_SYM_2(p->mrb, __ENCODING__), 0); + (yyval.nd) = new_fcall(p, MRB_SYM(__ENCODING__), 0); } -#line 9359 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11243 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 497: -#line 3619 "mrbgems/mruby-compiler/core/parse.y" + case 568: /* superclass: %empty */ +#line 4511 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = 0; } -#line 9367 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11251 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 498: -#line 3623 "mrbgems/mruby-compiler/core/parse.y" + case 569: /* $@36: %empty */ +#line 4515 "mrbgems/mruby-compiler/core/parse.y" { p->lstate = EXPR_BEG; p->cmd_start = TRUE; } -#line 9376 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11260 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 499: -#line 3628 "mrbgems/mruby-compiler/core/parse.y" + case 570: /* superclass: '<' $@36 expr_value term */ +#line 4520 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = (yyvsp[-1].nd); } -#line 9384 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11268 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 502: -#line 3644 "mrbgems/mruby-compiler/core/parse.y" + case 573: /* f_arglist_paren: '(' f_args rparen */ +#line 4536 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = (yyvsp[-1].nd); p->lstate = EXPR_BEG; p->cmd_start = TRUE; } -#line 9394 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11278 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 503: -#line 3650 "mrbgems/mruby-compiler/core/parse.y" + case 574: /* f_arglist_paren: '(' f_arg ',' tBDOT3 rparen */ +#line 4542 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args_dots(p, (yyvsp[-3].nd)); } -#line 9402 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11286 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 504: -#line 3654 "mrbgems/mruby-compiler/core/parse.y" + case 575: /* f_arglist_paren: '(' tBDOT3 rparen */ +#line 4546 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args_dots(p, 0); } -#line 9410 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11294 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 506: -#line 3661 "mrbgems/mruby-compiler/core/parse.y" + case 577: /* f_arglist: f_args term */ +#line 4553 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = (yyvsp[-1].nd); } -#line 9418 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11302 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 507: -#line 3665 "mrbgems/mruby-compiler/core/parse.y" + case 578: /* f_arglist: f_arg ',' tBDOT3 term */ +#line 4557 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args_dots(p, (yyvsp[-3].nd)); } -#line 9426 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11310 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 508: -#line 3669 "mrbgems/mruby-compiler/core/parse.y" + case 579: /* f_arglist: "..." term */ +#line 4561 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args_dots(p, 0); } -#line 9434 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11318 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 509: -#line 3675 "mrbgems/mruby-compiler/core/parse.y" + case 580: /* f_label: "local variable or method" "label" */ +#line 4567 "mrbgems/mruby-compiler/core/parse.y" { + (yyval.id) = (yyvsp[-1].id); local_nest(p); + p->lstate = EXPR_MID; /* make newlines significant after label */ } -#line 9442 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11328 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 510: -#line 3679 "mrbgems/mruby-compiler/core/parse.y" + case 581: /* f_label: "numbered parameter" "label" */ +#line 4573 "mrbgems/mruby-compiler/core/parse.y" { + (yyval.id) = intern_numparam((yyvsp[-1].num)); local_nest(p); + p->lstate = EXPR_MID; /* make newlines significant after label */ } -#line 9450 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11338 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 511: -#line 3685 "mrbgems/mruby-compiler/core/parse.y" + case 582: /* f_kw: f_label arg */ +#line 4581 "mrbgems/mruby-compiler/core/parse.y" { void_expr_error(p, (yyvsp[0].nd)); (yyval.nd) = new_kw_arg(p, (yyvsp[-1].id), cons((yyvsp[0].nd), locals_node(p))); local_unnest(p); } -#line 9460 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11348 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 512: -#line 3691 "mrbgems/mruby-compiler/core/parse.y" + case 583: /* f_kw: f_label */ +#line 4587 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_kw_arg(p, (yyvsp[0].id), 0); local_unnest(p); } -#line 9469 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11357 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 513: -#line 3698 "mrbgems/mruby-compiler/core/parse.y" + case 584: /* f_block_kw: f_label primary_value */ +#line 4594 "mrbgems/mruby-compiler/core/parse.y" { void_expr_error(p, (yyvsp[0].nd)); (yyval.nd) = new_kw_arg(p, (yyvsp[-1].id), cons((yyvsp[0].nd), locals_node(p))); local_unnest(p); } -#line 9479 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11367 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 514: -#line 3704 "mrbgems/mruby-compiler/core/parse.y" + case 585: /* f_block_kw: f_label */ +#line 4600 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_kw_arg(p, (yyvsp[0].id), 0); local_unnest(p); } -#line 9488 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11376 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 515: -#line 3711 "mrbgems/mruby-compiler/core/parse.y" + case 586: /* f_block_kwarg: f_block_kw */ +#line 4607 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = list1((yyvsp[0].nd)); } -#line 9496 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11384 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 516: -#line 3715 "mrbgems/mruby-compiler/core/parse.y" + case 587: /* f_block_kwarg: f_block_kwarg ',' f_block_kw */ +#line 4611 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 9504 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11392 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 517: -#line 3721 "mrbgems/mruby-compiler/core/parse.y" + case 588: /* f_kwarg: f_kw */ +#line 4617 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = list1((yyvsp[0].nd)); } -#line 9512 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11400 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 518: -#line 3725 "mrbgems/mruby-compiler/core/parse.y" + case 589: /* f_kwarg: f_kwarg ',' f_kw */ +#line 4621 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 9520 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11408 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 521: -#line 3735 "mrbgems/mruby-compiler/core/parse.y" + case 592: /* f_kwrest: kwrest_mark "local variable or method" */ +#line 4631 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_kw_rest_args(p, (yyvsp[0].id)); + (yyval.id) = (yyvsp[0].id); } -#line 9528 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11416 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 522: -#line 3739 "mrbgems/mruby-compiler/core/parse.y" + case 593: /* f_kwrest: kwrest_mark */ +#line 4635 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_kw_rest_args(p, 0); + (yyval.id) = intern_op(pow); } -#line 9536 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11424 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 523: -#line 3745 "mrbgems/mruby-compiler/core/parse.y" + case 594: /* args_tail: f_kwarg ',' f_kwrest opt_f_block_arg */ +#line 4641 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args_tail(p, (yyvsp[-3].nd), (yyvsp[-1].nd), (yyvsp[0].id)); + (yyval.nd) = new_args_tail(p, (yyvsp[-3].nd), (yyvsp[-1].id), (yyvsp[0].id)); } -#line 9544 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11432 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 524: -#line 3749 "mrbgems/mruby-compiler/core/parse.y" + case 595: /* args_tail: f_kwarg opt_f_block_arg */ +#line 4645 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args_tail(p, (yyvsp[-1].nd), 0, (yyvsp[0].id)); } -#line 9552 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11440 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 525: -#line 3753 "mrbgems/mruby-compiler/core/parse.y" + case 596: /* args_tail: f_kwrest opt_f_block_arg */ +#line 4649 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_args_tail(p, 0, (yyvsp[-1].nd), (yyvsp[0].id)); + (yyval.nd) = new_args_tail(p, 0, (yyvsp[-1].id), (yyvsp[0].id)); } -#line 9560 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11448 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 526: -#line 3757 "mrbgems/mruby-compiler/core/parse.y" + case 597: /* args_tail: f_block_arg */ +#line 4653 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args_tail(p, 0, 0, (yyvsp[0].id)); } -#line 9568 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11456 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 527: -#line 3763 "mrbgems/mruby-compiler/core/parse.y" + case 598: /* opt_args_tail: ',' args_tail */ +#line 4659 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = (yyvsp[0].nd); } -#line 9576 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11464 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 528: -#line 3767 "mrbgems/mruby-compiler/core/parse.y" + case 599: /* opt_args_tail: ',' */ +#line 4663 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args_tail(p, 0, 0, 0); } -#line 9584 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11472 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 529: -#line 3773 "mrbgems/mruby-compiler/core/parse.y" + case 600: /* opt_args_tail: %empty */ +#line 4667 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.nd) = new_args_tail(p, 0, 0, 0); + } +#line 11480 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 601: /* f_args: f_arg ',' f_optarg ',' f_rest_arg opt_args_tail */ +#line 4673 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, (yyvsp[-5].nd), (yyvsp[-3].nd), (yyvsp[-1].id), 0, (yyvsp[0].nd)); } -#line 9592 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11488 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 530: -#line 3777 "mrbgems/mruby-compiler/core/parse.y" + case 602: /* f_args: f_arg ',' f_optarg ',' f_rest_arg ',' f_arg opt_args_tail */ +#line 4677 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, (yyvsp[-7].nd), (yyvsp[-5].nd), (yyvsp[-3].id), (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 9600 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11496 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 531: -#line 3781 "mrbgems/mruby-compiler/core/parse.y" + case 603: /* f_args: f_arg ',' f_optarg opt_args_tail */ +#line 4681 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, (yyvsp[-3].nd), (yyvsp[-1].nd), 0, 0, (yyvsp[0].nd)); } -#line 9608 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11504 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 532: -#line 3785 "mrbgems/mruby-compiler/core/parse.y" + case 604: /* f_args: f_arg ',' f_optarg ',' f_arg opt_args_tail */ +#line 4685 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, (yyvsp[-5].nd), (yyvsp[-3].nd), 0, (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 9616 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11512 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 533: -#line 3789 "mrbgems/mruby-compiler/core/parse.y" + case 605: /* f_args: f_arg ',' f_rest_arg opt_args_tail */ +#line 4689 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, (yyvsp[-3].nd), 0, (yyvsp[-1].id), 0, (yyvsp[0].nd)); } -#line 9624 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11520 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 534: -#line 3793 "mrbgems/mruby-compiler/core/parse.y" + case 606: /* f_args: f_arg ',' f_rest_arg ',' f_arg opt_args_tail */ +#line 4693 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, (yyvsp[-5].nd), 0, (yyvsp[-3].id), (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 9632 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11528 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 535: -#line 3797 "mrbgems/mruby-compiler/core/parse.y" + case 607: /* f_args: f_arg opt_args_tail */ +#line 4697 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, (yyvsp[-1].nd), 0, 0, 0, (yyvsp[0].nd)); } -#line 9640 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11536 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 536: -#line 3801 "mrbgems/mruby-compiler/core/parse.y" + case 608: /* f_args: f_optarg ',' f_rest_arg opt_args_tail */ +#line 4701 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, 0, (yyvsp[-3].nd), (yyvsp[-1].id), 0, (yyvsp[0].nd)); } -#line 9648 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11544 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 537: -#line 3805 "mrbgems/mruby-compiler/core/parse.y" + case 609: /* f_args: f_optarg ',' f_rest_arg ',' f_arg opt_args_tail */ +#line 4705 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, 0, (yyvsp[-5].nd), (yyvsp[-3].id), (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 9656 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11552 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 538: -#line 3809 "mrbgems/mruby-compiler/core/parse.y" + case 610: /* f_args: f_optarg opt_args_tail */ +#line 4709 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, 0, (yyvsp[-1].nd), 0, 0, (yyvsp[0].nd)); } -#line 9664 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11560 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 539: -#line 3813 "mrbgems/mruby-compiler/core/parse.y" + case 611: /* f_args: f_optarg ',' f_arg opt_args_tail */ +#line 4713 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, 0, (yyvsp[-3].nd), 0, (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 9672 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11568 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 540: -#line 3817 "mrbgems/mruby-compiler/core/parse.y" + case 612: /* f_args: f_rest_arg opt_args_tail */ +#line 4717 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, 0, 0, (yyvsp[-1].id), 0, (yyvsp[0].nd)); } -#line 9680 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11576 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 541: -#line 3821 "mrbgems/mruby-compiler/core/parse.y" + case 613: /* f_args: f_rest_arg ',' f_arg opt_args_tail */ +#line 4721 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, 0, 0, (yyvsp[-3].id), (yyvsp[-1].nd), (yyvsp[0].nd)); } -#line 9688 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11584 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 542: -#line 3825 "mrbgems/mruby-compiler/core/parse.y" + case 614: /* f_args: args_tail */ +#line 4725 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = new_args(p, 0, 0, 0, 0, (yyvsp[0].nd)); } -#line 9696 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11592 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 543: -#line 3829 "mrbgems/mruby-compiler/core/parse.y" + case 615: /* f_args: %empty */ +#line 4729 "mrbgems/mruby-compiler/core/parse.y" { local_add_f(p, 0); (yyval.nd) = new_args(p, 0, 0, 0, 0, 0); } -#line 9705 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11601 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 544: -#line 3836 "mrbgems/mruby-compiler/core/parse.y" + case 616: /* f_bad_arg: "constant" */ +#line 4736 "mrbgems/mruby-compiler/core/parse.y" { - yyerror(p, "formal argument cannot be a constant"); + yyerror(&(yylsp[0]), p, "formal argument cannot be a constant"); (yyval.nd) = 0; } -#line 9714 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11610 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 545: -#line 3841 "mrbgems/mruby-compiler/core/parse.y" + case 617: /* f_bad_arg: "instance variable" */ +#line 4741 "mrbgems/mruby-compiler/core/parse.y" { - yyerror(p, "formal argument cannot be an instance variable"); + yyerror(&(yylsp[0]), p, "formal argument cannot be an instance variable"); (yyval.nd) = 0; } -#line 9723 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11619 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 546: -#line 3846 "mrbgems/mruby-compiler/core/parse.y" + case 618: /* f_bad_arg: "global variable" */ +#line 4746 "mrbgems/mruby-compiler/core/parse.y" { - yyerror(p, "formal argument cannot be a global variable"); + yyerror(&(yylsp[0]), p, "formal argument cannot be a global variable"); (yyval.nd) = 0; } -#line 9732 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11628 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 547: -#line 3851 "mrbgems/mruby-compiler/core/parse.y" + case 619: /* f_bad_arg: "class variable" */ +#line 4751 "mrbgems/mruby-compiler/core/parse.y" { - yyerror(p, "formal argument cannot be a class variable"); + yyerror(&(yylsp[0]), p, "formal argument cannot be a class variable"); (yyval.nd) = 0; } -#line 9741 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11637 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 548: -#line 3856 "mrbgems/mruby-compiler/core/parse.y" + case 620: /* f_bad_arg: "numbered parameter" */ +#line 4756 "mrbgems/mruby-compiler/core/parse.y" { - yyerror(p, "formal argument cannot be a numbered parameter"); + yyerror(&(yylsp[0]), p, "formal argument cannot be a numbered parameter"); (yyval.nd) = 0; } -#line 9750 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11646 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 549: -#line 3863 "mrbgems/mruby-compiler/core/parse.y" + case 621: /* f_norm_arg: f_bad_arg */ +#line 4763 "mrbgems/mruby-compiler/core/parse.y" { (yyval.id) = 0; } -#line 9758 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11654 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 550: -#line 3867 "mrbgems/mruby-compiler/core/parse.y" + case 622: /* f_norm_arg: "local variable or method" */ +#line 4767 "mrbgems/mruby-compiler/core/parse.y" { local_add_f(p, (yyvsp[0].id)); (yyval.id) = (yyvsp[0].id); } -#line 9767 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11663 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 551: -#line 3874 "mrbgems/mruby-compiler/core/parse.y" + case 623: /* f_arg_item: f_norm_arg */ +#line 4774 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_arg(p, (yyvsp[0].id)); + (yyval.nd) = new_lvar(p, (yyvsp[0].id)); } -#line 9775 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11671 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 552: -#line 3878 "mrbgems/mruby-compiler/core/parse.y" + case 624: /* @37: %empty */ +#line 4778 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = local_switch(p); } -#line 9783 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11679 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 553: -#line 3882 "mrbgems/mruby-compiler/core/parse.y" + case 625: /* f_arg_item: tLPAREN @37 f_margs rparen */ +#line 4782 "mrbgems/mruby-compiler/core/parse.y" { - (yyval.nd) = new_masgn_param(p, (yyvsp[-1].nd), p->locals->car); + (yyval.nd) = new_marg(p, (yyvsp[-1].nd)); local_resume(p, (yyvsp[-2].nd)); local_add_f(p, 0); } -#line 9793 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11689 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 554: -#line 3890 "mrbgems/mruby-compiler/core/parse.y" + case 626: /* f_arg: f_arg_item */ +#line 4790 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = list1((yyvsp[0].nd)); } -#line 9801 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11697 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 555: -#line 3894 "mrbgems/mruby-compiler/core/parse.y" + case 627: /* f_arg: f_arg ',' f_arg_item */ +#line 4794 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 9809 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11705 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 556: -#line 3900 "mrbgems/mruby-compiler/core/parse.y" + case 628: /* f_opt_asgn: "local variable or method" '=' */ +#line 4800 "mrbgems/mruby-compiler/core/parse.y" { local_add_f(p, (yyvsp[-1].id)); local_nest(p); (yyval.id) = (yyvsp[-1].id); } -#line 9819 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11715 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 557: -#line 3908 "mrbgems/mruby-compiler/core/parse.y" + case 629: /* f_opt: f_opt_asgn arg */ +#line 4808 "mrbgems/mruby-compiler/core/parse.y" { void_expr_error(p, (yyvsp[0].nd)); - (yyval.nd) = cons(nsym((yyvsp[-1].id)), cons((yyvsp[0].nd), locals_node(p))); + (yyval.nd) = cons(sym_to_node((yyvsp[-1].id)), cons((yyvsp[0].nd), locals_node(p))); local_unnest(p); } -#line 9829 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11725 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 558: -#line 3916 "mrbgems/mruby-compiler/core/parse.y" + case 630: /* f_block_opt: f_opt_asgn primary_value */ +#line 4816 "mrbgems/mruby-compiler/core/parse.y" { void_expr_error(p, (yyvsp[0].nd)); - (yyval.nd) = cons(nsym((yyvsp[-1].id)), cons((yyvsp[0].nd), locals_node(p))); + (yyval.nd) = cons(sym_to_node((yyvsp[-1].id)), cons((yyvsp[0].nd), locals_node(p))); local_unnest(p); } -#line 9839 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11735 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 559: -#line 3924 "mrbgems/mruby-compiler/core/parse.y" + case 631: /* f_block_optarg: f_block_opt */ +#line 4824 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = list1((yyvsp[0].nd)); } -#line 9847 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11743 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 560: -#line 3928 "mrbgems/mruby-compiler/core/parse.y" + case 632: /* f_block_optarg: f_block_optarg ',' f_block_opt */ +#line 4828 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 9855 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11751 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 561: -#line 3934 "mrbgems/mruby-compiler/core/parse.y" + case 633: /* f_optarg: f_opt */ +#line 4834 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = list1((yyvsp[0].nd)); } -#line 9863 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11759 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 562: -#line 3938 "mrbgems/mruby-compiler/core/parse.y" + case 634: /* f_optarg: f_optarg ',' f_opt */ +#line 4838 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 9871 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11767 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 565: -#line 3948 "mrbgems/mruby-compiler/core/parse.y" + case 637: /* f_rest_arg: restarg_mark "local variable or method" */ +#line 4848 "mrbgems/mruby-compiler/core/parse.y" { local_add_f(p, (yyvsp[0].id)); (yyval.id) = (yyvsp[0].id); } -#line 9880 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11776 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 566: -#line 3953 "mrbgems/mruby-compiler/core/parse.y" + case 638: /* f_rest_arg: restarg_mark */ +#line 4853 "mrbgems/mruby-compiler/core/parse.y" { (yyval.id) = intern_op(mul); local_add_f(p, (yyval.id)); } -#line 9889 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11785 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 569: -#line 3964 "mrbgems/mruby-compiler/core/parse.y" + case 641: /* f_block_arg: blkarg_mark "local variable or method" */ +#line 4864 "mrbgems/mruby-compiler/core/parse.y" { (yyval.id) = (yyvsp[0].id); } -#line 9897 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11793 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 642: /* f_block_arg: blkarg_mark "'nil'" */ +#line 4868 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.id) = MRB_SYM(nil); + } +#line 11801 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 570: -#line 3968 "mrbgems/mruby-compiler/core/parse.y" + case 643: /* f_block_arg: blkarg_mark */ +#line 4872 "mrbgems/mruby-compiler/core/parse.y" { (yyval.id) = intern_op(and); } -#line 9905 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11809 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 571: -#line 3974 "mrbgems/mruby-compiler/core/parse.y" + case 644: /* opt_f_block_arg: ',' f_block_arg */ +#line 4878 "mrbgems/mruby-compiler/core/parse.y" { (yyval.id) = (yyvsp[0].id); } -#line 9913 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11817 "mrbgems/mruby-compiler/core/y.tab.c" + break; + + case 645: /* opt_f_block_arg: ',' */ +#line 4882 "mrbgems/mruby-compiler/core/parse.y" + { + (yyval.id) = 0; + } +#line 11825 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 572: -#line 3978 "mrbgems/mruby-compiler/core/parse.y" + case 646: /* opt_f_block_arg: none */ +#line 4886 "mrbgems/mruby-compiler/core/parse.y" { (yyval.id) = 0; } -#line 9921 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11833 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 573: -#line 3984 "mrbgems/mruby-compiler/core/parse.y" + case 647: /* singleton: var_ref */ +#line 4892 "mrbgems/mruby-compiler/core/parse.y" { + prohibit_literals(p, (yyvsp[0].nd)); (yyval.nd) = (yyvsp[0].nd); if (!(yyval.nd)) (yyval.nd) = new_nil(p); } -#line 9930 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11843 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 574: -#line 3988 "mrbgems/mruby-compiler/core/parse.y" + case 648: /* $@38: %empty */ +#line 4897 "mrbgems/mruby-compiler/core/parse.y" {p->lstate = EXPR_BEG;} -#line 9936 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11849 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 575: -#line 3989 "mrbgems/mruby-compiler/core/parse.y" + case 649: /* singleton: '(' $@38 expr rparen */ +#line 4898 "mrbgems/mruby-compiler/core/parse.y" { - if ((yyvsp[-1].nd) == 0) { - yyerror(p, "can't define singleton method for ()."); - } - else { - switch (typen((yyvsp[-1].nd)->car)) { - case NODE_STR: - case NODE_DSTR: - case NODE_XSTR: - case NODE_DXSTR: - case NODE_DREGX: - case NODE_MATCH: - case NODE_FLOAT: - case NODE_ARRAY: - case NODE_HEREDOC: - yyerror(p, "can't define singleton method for literals"); - default: - break; - } - } + prohibit_literals(p, (yyvsp[-1].nd)); (yyval.nd) = (yyvsp[-1].nd); } -#line 9963 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11858 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 577: -#line 4015 "mrbgems/mruby-compiler/core/parse.y" + case 651: /* assoc_list: assocs trailer */ +#line 4906 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = (yyvsp[-1].nd); } -#line 9971 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11866 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 578: -#line 4021 "mrbgems/mruby-compiler/core/parse.y" + case 652: /* assocs: assoc */ +#line 4912 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = list1((yyvsp[0].nd)); - NODE_LINENO((yyval.nd), (yyvsp[0].nd)); } -#line 9980 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11874 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 579: -#line 4026 "mrbgems/mruby-compiler/core/parse.y" + case 653: /* assocs: assocs comma assoc */ +#line 4916 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = push((yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 9988 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11882 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 580: -#line 4032 "mrbgems/mruby-compiler/core/parse.y" + case 654: /* assoc: arg "=>" arg */ +#line 4922 "mrbgems/mruby-compiler/core/parse.y" { void_expr_error(p, (yyvsp[-2].nd)); void_expr_error(p, (yyvsp[0].nd)); (yyval.nd) = cons((yyvsp[-2].nd), (yyvsp[0].nd)); } -#line 9998 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11892 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 581: -#line 4038 "mrbgems/mruby-compiler/core/parse.y" + case 655: /* assoc: "local variable or method" "label" arg */ +#line 4928 "mrbgems/mruby-compiler/core/parse.y" { void_expr_error(p, (yyvsp[0].nd)); (yyval.nd) = cons(new_sym(p, (yyvsp[-2].id)), (yyvsp[0].nd)); } -#line 10007 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11901 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 582: -#line 4043 "mrbgems/mruby-compiler/core/parse.y" + case 656: /* assoc: "local variable or method" "label" */ +#line 4933 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = cons(new_sym(p, (yyvsp[-1].id)), label_reference(p, (yyvsp[-1].id))); } -#line 10015 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11909 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 583: -#line 4047 "mrbgems/mruby-compiler/core/parse.y" + case 657: /* assoc: "numbered parameter" "label" */ +#line 4937 "mrbgems/mruby-compiler/core/parse.y" { mrb_sym sym = intern_numparam((yyvsp[-1].num)); (yyval.nd) = cons(new_sym(p, sym), label_reference(p, sym)); } -#line 10024 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11918 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 584: -#line 4052 "mrbgems/mruby-compiler/core/parse.y" + case 658: /* assoc: "numbered parameter" "label" arg */ +#line 4942 "mrbgems/mruby-compiler/core/parse.y" { void_expr_error(p, (yyvsp[0].nd)); (yyval.nd) = cons(new_sym(p, intern_numparam((yyvsp[-2].num))), (yyvsp[0].nd)); } -#line 10033 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11927 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 585: -#line 4057 "mrbgems/mruby-compiler/core/parse.y" + case 659: /* assoc: string_fragment "label" arg */ +#line 4947 "mrbgems/mruby-compiler/core/parse.y" { void_expr_error(p, (yyvsp[0].nd)); - if (typen((yyvsp[-2].nd)->car) == NODE_DSTR) { + if ((yyvsp[-2].nd)->cdr) { + /* Multiple fragments - create dynamic symbol */ + (yyval.nd) = cons(new_dsym(p, (yyvsp[-2].nd)), (yyvsp[0].nd)); + } + else if (node_to_int((yyvsp[-2].nd)->car->car) < 0) { + /* Single fragment but it's an expression (-1 . node) - create dynamic symbol */ (yyval.nd) = cons(new_dsym(p, (yyvsp[-2].nd)), (yyvsp[0].nd)); } else { - (yyval.nd) = cons(new_sym(p, new_strsym(p, (yyvsp[-2].nd))), (yyvsp[0].nd)); + /* Single string fragment - create simple symbol */ + (yyval.nd) = cons(new_sym(p, new_strsym(p, (yyvsp[-2].nd)->car)), (yyvsp[0].nd)); } } -#line 10047 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11947 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 586: -#line 4067 "mrbgems/mruby-compiler/core/parse.y" + case 660: /* assoc: "**" arg */ +#line 4963 "mrbgems/mruby-compiler/core/parse.y" { void_expr_error(p, (yyvsp[0].nd)); (yyval.nd) = cons(new_kw_rest_args(p, 0), (yyvsp[0].nd)); } -#line 10056 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11956 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 587: -#line 4072 "mrbgems/mruby-compiler/core/parse.y" + case 661: /* assoc: "**" */ +#line 4968 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = cons(new_kw_rest_args(p, 0), new_lvar(p, intern_op(pow))); } -#line 10064 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11964 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 600: -#line 4098 "mrbgems/mruby-compiler/core/parse.y" + case 674: /* call_op: '.' */ +#line 4994 "mrbgems/mruby-compiler/core/parse.y" { (yyval.num) = '.'; } -#line 10072 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11972 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 601: -#line 4102 "mrbgems/mruby-compiler/core/parse.y" + case 675: /* call_op: "&." */ +#line 4998 "mrbgems/mruby-compiler/core/parse.y" { (yyval.num) = 0; } -#line 10080 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11980 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 603: -#line 4109 "mrbgems/mruby-compiler/core/parse.y" + case 677: /* call_op2: "::" */ +#line 5005 "mrbgems/mruby-compiler/core/parse.y" { (yyval.num) = tCOLON2; } -#line 10088 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11988 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 612: -#line 4130 "mrbgems/mruby-compiler/core/parse.y" + case 686: /* term: ';' */ +#line 5026 "mrbgems/mruby-compiler/core/parse.y" {yyerrok;} -#line 10094 "mrbgems/mruby-compiler/core/y.tab.c" +#line 11994 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 614: -#line 4135 "mrbgems/mruby-compiler/core/parse.y" + case 688: /* nl: '\n' */ +#line 5031 "mrbgems/mruby-compiler/core/parse.y" { p->lineno += (yyvsp[0].num); p->column = 0; } -#line 10103 "mrbgems/mruby-compiler/core/y.tab.c" +#line 12003 "mrbgems/mruby-compiler/core/y.tab.c" break; - case 618: -#line 4147 "mrbgems/mruby-compiler/core/parse.y" + case 692: /* none: %empty */ +#line 5043 "mrbgems/mruby-compiler/core/parse.y" { (yyval.nd) = 0; } -#line 10111 "mrbgems/mruby-compiler/core/y.tab.c" +#line 12011 "mrbgems/mruby-compiler/core/y.tab.c" break; -#line 10115 "mrbgems/mruby-compiler/core/y.tab.c" +#line 12015 "mrbgems/mruby-compiler/core/y.tab.c" default: break; } @@ -10126,13 +12026,14 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); case of YYERROR or YYBACKUP, subsequent parser actions might lead to an incorrect destructor call or verbose syntax error message before the lookahead is translated. */ - YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); + YY_SYMBOL_PRINT ("-> $$ =", YY_CAST (yysymbol_kind_t, yyr1[yyn]), &yyval, &yyloc, p); YYPOPSTACK (yylen); + yylen = 0; - YY_STACK_PRINT (yyss, yyssp); *++yyvsp = yyval; + *++yylsp = yyloc; /* Now 'shift' the result of the reduction. Determine what state that goes to, based on the state we popped back to and the rule @@ -10154,50 +12055,45 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yyerrlab: /* Make sure we have latest lookahead translation. See comments at user semantic actions for why this is necessary. */ - yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar); - + yytoken = yychar == YYEMPTY ? YYSYMBOL_YYEMPTY : YYTRANSLATE (yychar); /* If not already recovering from an error, report this error. */ if (!yyerrstatus) { ++yynerrs; -#if ! YYERROR_VERBOSE - yyerror (p, YY_("syntax error")); -#else -# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \ - yyssp, yytoken) { + yypcontext_t yyctx + = {yyssp, yytoken, &yylloc}; char const *yymsgp = YY_("syntax error"); int yysyntax_error_status; - yysyntax_error_status = YYSYNTAX_ERROR; + yysyntax_error_status = yysyntax_error (&yymsg_alloc, &yymsg, &yyctx, p); if (yysyntax_error_status == 0) yymsgp = yymsg; - else if (yysyntax_error_status == 1) + else if (yysyntax_error_status == -1) { if (yymsg != yymsgbuf) YYSTACK_FREE (yymsg); - yymsg = YY_CAST (char *, YYSTACK_ALLOC (YY_CAST (YYSIZE_T, yymsg_alloc))); - if (!yymsg) + yymsg = YY_CAST (char *, + YYSTACK_ALLOC (YY_CAST (YYSIZE_T, yymsg_alloc))); + if (yymsg) { - yymsg = yymsgbuf; - yymsg_alloc = sizeof yymsgbuf; - yysyntax_error_status = 2; + yysyntax_error_status + = yysyntax_error (&yymsg_alloc, &yymsg, &yyctx, p); + yymsgp = yymsg; } else { - yysyntax_error_status = YYSYNTAX_ERROR; - yymsgp = yymsg; + yymsg = yymsgbuf; + yymsg_alloc = sizeof yymsgbuf; + yysyntax_error_status = YYENOMEM; } } - yyerror (p, yymsgp); - if (yysyntax_error_status == 2) - goto yyexhaustedlab; + yyerror (&yylloc, p, yymsgp); + if (yysyntax_error_status == YYENOMEM) + YYNOMEM; } -# undef YYSYNTAX_ERROR -#endif } - - + yyerror_range[1] = yylloc; if (yyerrstatus == 3) { /* If just tried and failed to reuse lookahead token after an @@ -10212,7 +12108,7 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); else { yydestruct ("Error: discarding", - yytoken, &yylval, p); + yytoken, &yylval, &yylloc, p); yychar = YYEMPTY; } } @@ -10230,12 +12126,14 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); label yyerrorlab therefore never appears in user code. */ if (0) YYERROR; + ++yynerrs; /* Do not reclaim the symbols of the rule whose action triggered this YYERROR. */ YYPOPSTACK (yylen); + yylen = 0; - YY_STACK_PRINT (yyss, yyssp); + YY_STACK_PRINT (yyss, yyssp, p); yystate = *yyssp; goto yyerrlab1; @@ -10246,13 +12144,14 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yyerrlab1: yyerrstatus = 3; /* Each real token shifted decrements this. */ + /* Pop stack until we find a state that shifts the error token. */ for (;;) { yyn = yypact[yystate]; if (!yypact_value_is_default (yyn)) { - yyn += YYTERROR; - if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) + yyn += YYSYMBOL_YYerror; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYSYMBOL_YYerror) { yyn = yytable[yyn]; if (0 < yyn) @@ -10264,21 +12163,26 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); if (yyssp == yyss) YYABORT; - + yyerror_range[1] = *yylsp; yydestruct ("Error: popping", - yystos[yystate], yyvsp, p); + YY_ACCESSING_SYMBOL (yystate), yyvsp, yylsp, p); YYPOPSTACK (1); + yystate = *yyssp; - YY_STACK_PRINT (yyss, yyssp); + YY_STACK_PRINT (yyss, yyssp, p); } YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN *++yyvsp = yylval; YY_IGNORE_MAYBE_UNINITIALIZED_END + yyerror_range[2] = yylloc; + ++yylsp; + YYLLOC_DEFAULT (*yylsp, yyerror_range, 2); /* Shift the error token. */ - YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); + YY_SYMBOL_PRINT ("Shifting", YY_ACCESSING_SYMBOL (yyn), yyvsp, yylsp, p); + yystate = yyn; goto yynewstate; @@ -10289,7 +12193,7 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); `-------------------------------------*/ yyacceptlab: yyresult = 0; - goto yyreturn; + goto yyreturnlab; /*-----------------------------------. @@ -10297,58 +12201,55 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); `-----------------------------------*/ yyabortlab: yyresult = 1; - goto yyreturn; + goto yyreturnlab; -#if !defined yyoverflow || YYERROR_VERBOSE -/*-------------------------------------------------. -| yyexhaustedlab -- memory exhaustion comes here. | -`-------------------------------------------------*/ +/*-----------------------------------------------------------. +| yyexhaustedlab -- YYNOMEM (memory exhaustion) comes here. | +`-----------------------------------------------------------*/ yyexhaustedlab: - yyerror (p, YY_("memory exhausted")); + yyerror (&yylloc, p, YY_("memory exhausted")); yyresult = 2; - /* Fall through. */ -#endif + goto yyreturnlab; -/*-----------------------------------------------------. -| yyreturn -- parsing is finished, return the result. | -`-----------------------------------------------------*/ -yyreturn: +/*----------------------------------------------------------. +| yyreturnlab -- parsing is finished, clean up and return. | +`----------------------------------------------------------*/ +yyreturnlab: if (yychar != YYEMPTY) { /* Make sure we have latest lookahead translation. See comments at user semantic actions for why this is necessary. */ yytoken = YYTRANSLATE (yychar); yydestruct ("Cleanup: discarding lookahead", - yytoken, &yylval, p); + yytoken, &yylval, &yylloc, p); } /* Do not reclaim the symbols of the rule whose action triggered this YYABORT or YYACCEPT. */ YYPOPSTACK (yylen); - YY_STACK_PRINT (yyss, yyssp); + YY_STACK_PRINT (yyss, yyssp, p); while (yyssp != yyss) { yydestruct ("Cleanup: popping", - yystos[+*yyssp], yyvsp, p); + YY_ACCESSING_SYMBOL (+*yyssp), yyvsp, yylsp, p); YYPOPSTACK (1); } #ifndef yyoverflow if (yyss != yyssa) YYSTACK_FREE (yyss); #endif -#if YYERROR_VERBOSE if (yymsg != yymsgbuf) YYSTACK_FREE (yymsg); -#endif return yyresult; } -#line 4151 "mrbgems/mruby-compiler/core/parse.y" + +#line 5047 "mrbgems/mruby-compiler/core/parse.y" #define pylval (*((YYSTYPE*)(p->ylval))) static void -yyerror(parser_state *p, const char *s) +yyerror(void *lp, parser_state *p, const char *s) { char* c; size_t n; @@ -10383,7 +12284,7 @@ yyerror_c(parser_state *p, const char *msg, char c) strncpy(buf, msg, sizeof(buf) - 2); buf[sizeof(buf) - 2] = '\0'; strncat(buf, &c, 1); - yyerror(p, buf); + yyerror(NULL, p, buf); } static void @@ -10431,52 +12332,72 @@ backref_error(parser_state *p, node *n) { int c; - c = intn(n->car); + c = node_to_int(n->car); if (c == NODE_NTH_REF) { - yyerror_c(p, "can't set variable $", (char)intn(n->cdr)+'0'); + yyerror_c(p, "can't set variable $", (char)node_to_int(n->cdr)+'0'); } else if (c == NODE_BACK_REF) { - yyerror_c(p, "can't set variable $", (char)intn(n->cdr)); + yyerror_c(p, "can't set variable $", (char)node_to_int(n->cdr)); } else { - mrb_bug(p->mrb, "Internal error in backref_error() : n=>car == %d", c); + yyerror(NULL, p, "Internal error in backref_error()"); } } static void void_expr_error(parser_state *p, node *n) { - int c; - if (n == NULL) return; - c = intn(n->car); - switch (c) { - case NODE_BREAK: - case NODE_RETURN: - case NODE_NEXT: - case NODE_REDO: - case NODE_RETRY: - yyerror(p, "void value expression"); - break; - case NODE_AND: - case NODE_OR: - if (n->cdr) { - void_expr_error(p, n->cdr->car); - void_expr_error(p, n->cdr->cdr); - } - break; - case NODE_BEGIN: - if (n->cdr) { - while (n->cdr) { - n = n->cdr; + + /* Check if this is a variable-sized node first */ + struct mrb_ast_var_header *header = (struct mrb_ast_var_header*)n; + if (header) { + /* Handle variable-sized nodes */ + switch ((enum node_type)header->node_type) { + case NODE_BREAK: + case NODE_RETURN: + case NODE_NEXT: + case NODE_REDO: + case NODE_RETRY: + yyerror(NULL, p, "void value expression"); + return; + case NODE_AND: + case NODE_OR: + { + struct mrb_ast_and_node *and_n = (struct mrb_ast_and_node*)n; + void_expr_error(p, (node*)and_n->left); + void_expr_error(p, (node*)and_n->right); + } + return; + case NODE_STMTS: + { + struct mrb_ast_stmts_node *stmts = (struct mrb_ast_stmts_node*)n; + node *last = stmts->stmts; + if (last) { + /* Find the last statement in the cons list */ + while (last->cdr) { + last = last->cdr; + } + void_expr_error(p, last->car); + } + } + return; + case NODE_BEGIN: + { + struct mrb_ast_begin_node *begin_n = (struct mrb_ast_begin_node*)n; + if (begin_n->body) { + void_expr_error(p, (node*)begin_n->body); + } } - void_expr_error(p, n->car); + return; + default: + /* Other variable-sized nodes are OK */ + return; } - break; - default: - break; } + + /* Should not reach here - all nodes should be variable-sized now */ } static void pushback(parser_state *p, int c); @@ -10510,7 +12431,7 @@ nextc(parser_state *p) if (p->pb) { node *tmp; - c = intn(p->pb->car); + c = node_to_int(p->pb->car); tmp = p->pb; p->pb = p->pb->cdr; cons_free(tmp); @@ -10546,7 +12467,7 @@ pushback(parser_state *p, int c) if (c >= 0) { p->column--; } - p->pb = cons(nint(c), p->pb); + p->pb = cons(int_to_node(c), p->pb); } static void @@ -10571,7 +12492,7 @@ peekc_n(parser_state *p, int n) c0 = nextc(p); if (c0 == -1) return c0; /* do not skip partial EOF */ if (c0 >= 0) --p->column; - list = push(list, nint(c0)); + list = push(list, int_to_node(c0)); } while(n--); if (p->pb) { p->pb = append(list, p->pb); @@ -10638,19 +12559,18 @@ skips(parser_state *p, const char *s) } return TRUE; } - else{ + else { s--; } } return FALSE; } - static int newtok(parser_state *p) { if (p->tokbuf != p->buf) { - mrb_free(p->mrb, p->tokbuf); + mrbc_free(p->tokbuf); p->tokbuf = p->buf; p->tsiz = MRB_PARSER_TOKBUF_SIZE; } @@ -10671,30 +12591,8 @@ tokadd(parser_state *p, int32_t c) len = 1; } else { - /* Unicode character */ - c = -c; - if (c < 0x80) { - utf8[0] = (char)c; - len = 1; - } - else if (c < 0x800) { - utf8[0] = (char)(0xC0 | (c >> 6)); - utf8[1] = (char)(0x80 | (c & 0x3F)); - len = 2; - } - else if (c < 0x10000) { - utf8[0] = (char)(0xE0 | (c >> 12) ); - utf8[1] = (char)(0x80 | ((c >> 6) & 0x3F)); - utf8[2] = (char)(0x80 | ( c & 0x3F)); - len = 3; - } - else { - utf8[0] = (char)(0xF0 | (c >> 18) ); - utf8[1] = (char)(0x80 | ((c >> 12) & 0x3F)); - utf8[2] = (char)(0x80 | ((c >> 6) & 0x3F)); - utf8[3] = (char)(0x80 | ( c & 0x3F)); - len = 4; - } + /* Unicode character (negative c indicates codepoint) */ + len = (int)mrb_utf8_to_buf(utf8, (uint32_t)(-c)); } if (p->tidx+len >= p->tsiz) { if (p->tsiz >= MRB_PARSER_TOKBUF_MAX) { @@ -10703,11 +12601,11 @@ tokadd(parser_state *p, int32_t c) } p->tsiz *= 2; if (p->tokbuf == p->buf) { - p->tokbuf = (char*)mrb_malloc(p->mrb, p->tsiz); + p->tokbuf = (char*)mrbc_malloc(p->tsiz); memcpy(p->tokbuf, p->buf, MRB_PARSER_TOKBUF_SIZE); } else { - p->tokbuf = (char*)mrb_realloc(p->mrb, p->tokbuf, p->tsiz); + p->tokbuf = (char*)mrbc_realloc(p->tokbuf, p->tsiz); } } for (i = 0; i < len; i++) { @@ -10726,7 +12624,7 @@ tokfix(parser_state *p) { if (p->tidx >= MRB_PARSER_TOKBUF_MAX) { p->tidx = MRB_PARSER_TOKBUF_MAX-1; - yyerror(p, "string too long (truncated)"); + yyerror(NULL, p, "string too long (truncated)"); } p->tokbuf[p->tidx] = '\0'; } @@ -10747,7 +12645,7 @@ toklen(parser_state *p) #define IS_END() (p->lstate == EXPR_END || p->lstate == EXPR_ENDARG || p->lstate == EXPR_ENDFN) #define IS_BEG() (p->lstate == EXPR_BEG || p->lstate == EXPR_MID || p->lstate == EXPR_VALUE || p->lstate == EXPR_CLASS) #define IS_SPCARG(c) (IS_ARG() && space_seen && !ISSPACE(c)) -#define IS_LABEL_POSSIBLE() ((p->lstate == EXPR_BEG && !cmd_state) || IS_ARG()) +#define IS_LABEL_POSSIBLE() ((p->lstate == EXPR_BEG && !cmd_state) || IS_ARG() || p->lstate == EXPR_VALUE) #define IS_LABEL_SUFFIX(n) (peek_n(p, ':',(n)) && !peek_n(p, ':', (n)+1)) static int32_t @@ -10797,7 +12695,7 @@ read_escape_unicode(parser_state *p, int limit) buf[0] = nextc(p); if (buf[0] < 0) { eof: - yyerror(p, "invalid escape character syntax"); + yyerror(NULL, p, "invalid escape character syntax"); return -1; } if (ISXDIGIT(buf[0])) { @@ -10816,7 +12714,7 @@ read_escape_unicode(parser_state *p, int limit) } hex = scan_hex(p, buf, i, &i); if (i == 0 || hex > 0x10FFFF || (hex & 0xFFFFF800) == 0xD800) { - yyerror(p, "invalid Unicode code point"); + yyerror(NULL, p, "invalid Unicode code point"); return -1; } return hex; @@ -10886,7 +12784,7 @@ read_escape(parser_state *p) } } if (i == 0) { - yyerror(p, "invalid hex escape"); + yyerror(NULL, p, "invalid hex escape"); return -1; } return scan_hex(p, buf, i, &i); @@ -10914,7 +12812,7 @@ read_escape(parser_state *p) case 'M': if ((c = nextc(p)) != '-') { - yyerror(p, "Invalid escape character syntax"); + yyerror(NULL, p, "Invalid escape character syntax"); pushback(p, c); return '\0'; } @@ -10928,7 +12826,7 @@ read_escape(parser_state *p) case 'C': if ((c = nextc(p)) != '-') { - yyerror(p, "Invalid escape character syntax"); + yyerror(NULL, p, "Invalid escape character syntax"); pushback(p, c); return '\0'; } @@ -10944,7 +12842,7 @@ read_escape(parser_state *p) eof: case -1: case -2: /* end of a file */ - yyerror(p, "Invalid escape character syntax"); + yyerror(NULL, p, "Invalid escape character syntax"); return '\0'; default: @@ -10987,8 +12885,8 @@ heredoc_remove_indent(parser_state *p, parser_heredoc_info *hinfo) while (indented) { n = indented->car; pair = n->car; - str = (char*)pair->car; - len = (size_t)pair->cdr; + len = (size_t)pair->car; + str = (char*)pair->cdr; escaped = n->cdr->car; nspaces = n->cdr->cdr; if (escaped) { @@ -11011,13 +12909,14 @@ heredoc_remove_indent(parser_state *p, parser_heredoc_info *hinfo) } if (newlen < len) newstr[newlen] = '\0'; - pair->car = (node*)newstr; - pair->cdr = (node*)newlen; - } else { + pair->car = (node*)newlen; + pair->cdr = (node*)newstr; + } + else { spaces = (size_t)nspaces->car; heredoc_count_indent(hinfo, str, len, spaces, &offset); - pair->car = (node*)(str + offset); - pair->cdr = (node*)(len - offset); + pair->car = (node*)(len - offset); + pair->cdr = (node*)(str + offset); } indented = indented->cdr; } @@ -11072,8 +12971,8 @@ parse_string(parser_state *p) int len = toklen(p); if (hinfo->allow_indent) { while (ISSPACE(*s) && len > 0) { - ++s; - --len; + s++; + len--; } } if (hinfo->term_len > 0 && len-1 == hinfo->term_len && strncmp(s, hinfo->term, len-1) == 0) { @@ -11087,20 +12986,20 @@ parse_string(parser_state *p) const char s2[] = "\" anywhere before EOF"; if (sizeof(s1)+sizeof(s2)+strlen(hinfo->term)+1 >= sizeof(buf)) { - yyerror(p, "can't find heredoc delimiter anywhere before EOF"); - } else { + yyerror(NULL, p, "can't find heredoc delimiter anywhere before EOF"); + } + else { strcpy(buf, s1); strcat(buf, hinfo->term); strcat(buf, s2); - yyerror(p, buf); + yyerror(NULL, p, buf); } return 0; } - node *nd = new_str(p, tok(p), toklen(p)); - pylval.nd = nd; + pylval.nd = new_str_tok(p); if (unindent && head) { - nspaces = push(nspaces, nint(spaces)); - heredoc_push_indented(p, hinfo, nd->cdr, escaped, nspaces, empty && line_head); + nspaces = push(nspaces, int_to_node(spaces)); + heredoc_push_indented(p, hinfo, pylval.nd, escaped, nspaces, empty && line_head); } return tHD_STRING_MID; } @@ -11108,12 +13007,12 @@ parse_string(parser_state *p) if (c == '\t') spaces += 8; else if (ISSPACE(c)) - ++spaces; + spaces++; else empty = FALSE; } if (c < 0) { - yyerror(p, "unterminated string meets end of file"); + yyerror(NULL, p, "unterminated string meets end of file"); return 0; } else if (c == beg) { @@ -11134,8 +13033,8 @@ parse_string(parser_state *p) p->lineno++; p->column = 0; if (unindent) { - nspaces = push(nspaces, nint(spaces)); - escaped = push(escaped, nint(pos)); + nspaces = push(nspaces, int_to_node(spaces)); + escaped = push(escaped, int_to_node(pos)); pos--; empty = TRUE; spaces = 0; @@ -11189,12 +13088,11 @@ parse_string(parser_state *p) tokfix(p); p->lstate = EXPR_BEG; p->cmd_start = TRUE; - node *nd = new_str(p, tok(p), toklen(p)); - pylval.nd = nd; + pylval.nd = new_str_tok(p); if (hinfo) { if (unindent && head) { - nspaces = push(nspaces, nint(spaces)); - heredoc_push_indented(p, hinfo, nd->cdr, escaped, nspaces, FALSE); + nspaces = push(nspaces, int_to_node(spaces)); + heredoc_push_indented(p, hinfo, pylval.nd, escaped, nspaces, FALSE); } hinfo->line_head = FALSE; return tHD_STRING_PART; @@ -11224,7 +13122,7 @@ parse_string(parser_state *p) else { pushback(p, c); tokfix(p); - pylval.nd = new_str(p, tok(p), toklen(p)); + pylval.nd = new_str_tok(p); return tSTRING_MID; } } @@ -11236,18 +13134,19 @@ parse_string(parser_state *p) } tokfix(p); - p->lstate = EXPR_ENDARG; + p->lstate = EXPR_END; end_strterm(p); if (type & STR_FUNC_XQUOTE) { - pylval.nd = new_xstr(p, tok(p), toklen(p)); + pylval.nd = new_str_tok(p); return tXSTRING; } if (type & STR_FUNC_REGEXP) { int f = 0; int re_opt; - char *s = strndup(tok(p), toklen(p)); + int pattern_len = toklen(p); + char *s = strndup(tok(p), pattern_len); char flags[3]; char *flag = flags; char enc = '\0'; @@ -11277,7 +13176,7 @@ parse_string(parser_state *p) } strcat(msg, " - "); strncat(msg, tok(p), sizeof(msg) - strlen(msg) - 1); - yyerror(p, msg); + yyerror(NULL, p, msg); } if (f != 0) { if (f & 1) *flag++ = 'i'; @@ -11298,11 +13197,11 @@ parse_string(parser_state *p) else { encp = NULL; } - pylval.nd = new_regx(p, s, dup, encp); + pylval.nd = cons(cons(int_to_node(pattern_len), (node*)s), cons((node*)dup, (node*)encp)); return tREGEXP; } - pylval.nd = new_str(p, tok(p), toklen(p)); + pylval.nd = new_str_tok(p); return tSTRING; } @@ -11316,7 +13215,7 @@ number_literal_suffix(parser_state *p) int mask = NUM_SUFFIX_R|NUM_SUFFIX_I; while ((c = nextc(p)) != -1) { - list = push(list, nint(c)); + list = push(list, int_to_node(c)); if ((mask & NUM_SUFFIX_I) && c == 'i') { result |= (mask & NUM_SUFFIX_I); @@ -11382,7 +13281,7 @@ heredoc_identifier(parser_state *p) tokadd(p, c); } if (c < 0) { - yyerror(p, "unterminated here document identifier"); + yyerror(NULL, p, "unterminated here document identifier"); return 0; } } @@ -11403,8 +13302,7 @@ heredoc_identifier(parser_state *p) pushback(p, c); } tokfix(p); - newnode = new_heredoc(p); - info = (parser_heredoc_info*)newnode->cdr; + newnode = new_heredoc(p, &info); info->term = strndup(tok(p), toklen(p)); info->term_len = toklen(p); if (! quote) @@ -11442,6 +13340,11 @@ parser_yylex(parser_state *p) enum mrb_lex_state_enum last_state; int token_column; + /* Early termination if too many errors - prevents DoS from malformed input */ + if (p->nerr > 10) { + return 0; /* EOF */ + } + if (p->lex_strterm) { if (is_strterm_type(p, STR_FUNC_HEREDOC)) { if (p->parsing_heredoc != NULL) @@ -11611,7 +13514,7 @@ parser_yylex(parser_state *p) if (c < 0 || ISSPACE(c)) { do { if (!skips(p, end)) { - yyerror(p, "embedded document meets end of file"); + yyerror(NULL, p, "embedded document meets end of file"); return 0; } c = nextc(p); @@ -11736,7 +13639,7 @@ parser_yylex(parser_state *p) } c = nextc(p); if (c < 0) { - yyerror(p, "incomplete character syntax"); + yyerror(NULL, p, "incomplete character syntax"); return 0; } if (ISSPACE(c)) { @@ -11771,7 +13674,7 @@ parser_yylex(parser_state *p) strcpy(buf, "invalid character syntax; use ?\\"); strncat(buf, cc, 2); - yyerror(p, buf); + yyerror(NULL, p, buf); } } ternary: @@ -11796,8 +13699,8 @@ parser_yylex(parser_state *p) tokadd(p, c); } tokfix(p); - pylval.nd = new_str(p, tok(p), toklen(p)); - p->lstate = EXPR_ENDARG; + pylval.nd = new_str_tok(p); + p->lstate = EXPR_END; return tCHAR; case '&': @@ -11937,7 +13840,7 @@ parser_yylex(parser_state *p) pushback(p, c); p->lstate = EXPR_BEG; if (c >= 0 && ISDIGIT(c)) { - yyerror(p, "no . floating literal anymore; put 0 before dot"); + yyerror(NULL, p, "no . floating literal anymore; put 0 before dot"); } p->lstate = EXPR_DOT; return '.'; @@ -11951,14 +13854,17 @@ parser_yylex(parser_state *p) int suffix = 0; is_float = seen_point = seen_e = nondigit = 0; - p->lstate = EXPR_ENDARG; + p->lstate = EXPR_END; newtok(p); - if (c == '-' || c == '+') { + if (c == '-') { tokadd(p, c); c = nextc(p); } + else if (c == '+') { + c = nextc(p); + } if (c == '0') { -#define no_digits() do {yyerror(p,"numeric literal without digits"); return 0;} while (0) +#define no_digits() do {yyerror(NULL, p,"numeric literal without digits"); return 0;} while (0) int start = toklen(p); c = nextc(p); if (c == 'x' || c == 'X') { @@ -12077,7 +13983,7 @@ parser_yylex(parser_state *p) } if (c > '7' && c <= '9') { invalid_octal: - yyerror(p, "Invalid octal digit"); + yyerror(NULL, p, "Invalid octal digit"); } else if (c == '.' || c == 'e' || c == 'E') { tokadd(p, '0'); @@ -12163,17 +14069,10 @@ parser_yylex(parser_state *p) return tINTEGER; #else double d; - char *endp; - errno = 0; - d = mrb_float_read(tok(p), &endp); - if (d == 0 && endp == tok(p)) { + if (!mrb_read_float(tok(p), NULL, &d)) { yywarning_s(p, "corrupted float value", tok(p)); } - else if (errno == ERANGE) { - yywarning_s(p, "float out of range", tok(p)); - errno = 0; - } suffix = number_literal_suffix(p); if (seen_e && (suffix & NUM_SUFFIX_R)) { pushback(p, 'r'); @@ -12213,7 +14112,8 @@ parser_yylex(parser_state *p) } if (!space_seen && IS_END()) { pushback(p, c); - p->lstate = EXPR_BEG; + /* In pattern matching context, use EXPR_ARG so newlines are significant */ + p->lstate = p->in_kwarg ? EXPR_ARG : EXPR_BEG; return tLABEL_TAG; } if (IS_END() || ISSPACE(c) || c == '#') { @@ -12370,12 +14270,12 @@ parser_yylex(parser_state *p) else { term = nextc(p); if (ISALNUM(term)) { - yyerror(p, "unknown type of %string"); + yyerror(NULL, p, "unknown type of %string"); return 0; } } if (c < 0 || term < 0) { - yyerror(p, "unterminated quoted string meets end of file"); + yyerror(NULL, p, "unterminated quoted string meets end of file"); return 0; } paren = term; @@ -12423,7 +14323,7 @@ parser_yylex(parser_state *p) return tSYMBOLS_BEG; default: - yyerror(p, "unknown type of %string"); + yyerror(NULL, p, "unknown type of %string"); return 0; } } @@ -12449,7 +14349,7 @@ parser_yylex(parser_state *p) token_column = newtok(p); c = nextc(p); if (c < 0) { - yyerror(p, "incomplete global variable syntax"); + yyerror(NULL, p, "incomplete global variable syntax"); return 0; } switch (c) { @@ -12518,8 +14418,8 @@ parser_yylex(parser_state *p) if (last_state == EXPR_FNAME) goto gvar; tokfix(p); { - mrb_int n = mrb_int_read(tok(p), NULL, NULL); - if (n > INT32_MAX) { + mrb_int n; + if (!mrb_read_int(tok(p), NULL, NULL, &n)) { yywarning(p, "capture group index too big; always nil"); return keyword_nil; } @@ -12548,10 +14448,10 @@ parser_yylex(parser_state *p) } if (c < 0) { if (p->tidx == 1) { - yyerror(p, "incomplete instance variable syntax"); + yyerror(NULL, p, "incomplete instance variable syntax"); } else { - yyerror(p, "incomplete class variable syntax"); + yyerror(NULL, p, "incomplete class variable syntax"); } return 0; } @@ -12584,7 +14484,7 @@ parser_yylex(parser_state *p) buf[sizeof(s)-1] = hexdigits[(c & 0xf0) >> 4]; buf[sizeof(s)] = hexdigits[(c & 0x0f)]; buf[sizeof(s)+1] = 0; - yyerror(p, buf); + yyerror(NULL, p, buf); goto retry; } @@ -12631,31 +14531,13 @@ parser_yylex(parser_state *p) break; case '_': - if (p->lstate != EXPR_FNAME && toklen(p) == 2 && ISDIGIT(tok(p)[1]) && p->nvars) { + if (toklen(p) == 2 && ISDIGIT(tok(p)[1]) && p->nvars) { int n = tok(p)[1] - '0'; int nvar; if (n > 0) { - node *nvars = p->nvars->cdr; - - while (nvars) { - nvar = intn(nvars->car); - if (nvar == -2) break; /* top of the scope */ - if (nvar > 0) { - yywarning(p, "numbered parameter used in outer block"); - break; - } - nvars->car = nint(-1); - nvars = nvars->cdr; - } - nvar = intn(p->nvars->car); + nvar = node_to_int(p->nvars->car); if (nvar != -2) { /* numbered parameters never appear on toplevel */ - if (nvar == -1) { - yywarning(p, "numbered parameter used in inner block"); - } - else { - p->nvars->car = nint(nvar > n ? nvar : n); - } pylval.num = n; p->lstate = EXPR_END; return tNUMPARAM; @@ -12733,6 +14615,10 @@ parser_yylex(parser_state *p) return keyword_do_block; return keyword_do; } + if (kw->id[0] == keyword_in) { + /* Set in_kwarg for pattern matching context */ + p->in_kwarg++; + } if (state == EXPR_BEG || state == EXPR_VALUE || state == EXPR_CLASS) return kw->id[0]; else { @@ -12771,14 +14657,14 @@ parser_yylex(parser_state *p) } static int -yylex(void *lval, parser_state *p) +yylex(void *lval, void *lp, parser_state *p) { p->ylval = lval; return parser_yylex(p); } static void -parser_init_cxt(parser_state *p, mrbc_context *cxt) +parser_init_cxt(parser_state *p, mrb_ccontext *cxt) { if (!cxt) return; if (cxt->filename) mrb_parser_set_filename(p, cxt->filename); @@ -12794,6 +14680,7 @@ parser_init_cxt(parser_state *p, mrbc_context *cxt) p->capture_errors = cxt->capture_errors; p->no_optimize = cxt->no_optimize; p->no_ext_ops = cxt->no_ext_ops; + p->no_return_value = cxt->no_return_value; p->upper = cxt->upper; if (cxt->partial_hook) { p->cxt = cxt; @@ -12801,31 +14688,33 @@ parser_init_cxt(parser_state *p, mrbc_context *cxt) } static void -parser_update_cxt(parser_state *p, mrbc_context *cxt) +parser_update_cxt(parser_state *p, mrb_ccontext *cxt) { node *n, *n0; int i = 0; if (!cxt) return; if (!p->tree) return; - if (intn(p->tree->car) != NODE_SCOPE) return; - n0 = n = p->tree->cdr->car; + if (!node_type_p(p->tree, NODE_SCOPE)) return; + + /* Extract locals from variable-sized NODE_SCOPE */ + struct mrb_ast_scope_node *scope = scope_node(p->tree); + n0 = n = scope->locals; while (n) { i++; n = n->cdr; } - cxt->syms = (mrb_sym*)mrb_realloc(p->mrb, cxt->syms, i*sizeof(mrb_sym)); + cxt->syms = (mrb_sym*)mrbc_realloc(cxt->syms, i*sizeof(mrb_sym)); cxt->slen = i; for (i=0, n=n0; n; i++,n=n->cdr) { - cxt->syms[i] = sym(n->car); + cxt->syms[i] = node_to_sym(n->car); } } -void mrb_codedump_all(mrb_state*, struct RProc*); -void mrb_parser_dump(mrb_state *mrb, node *tree, int offset); +static void dump_node(mrb_state *mrb, node *tree, int offset); MRB_API void -mrb_parser_parse(parser_state *p, mrbc_context *c) +mrb_parser_parse(parser_state *p, mrb_ccontext *c) { struct mrb_jmpbuf buf1; struct mrb_jmpbuf *prev = p->mrb->jmp; @@ -12848,13 +14737,13 @@ mrb_parser_parse(parser_state *p, mrbc_context *c) } parser_update_cxt(p, c); if (c && c->dump_result) { - mrb_parser_dump(p->mrb, p->tree, 0); + dump_node(p->mrb, p->tree, 0); } } MRB_CATCH(p->mrb->jmp) { p->nerr++; if (p->mrb->exc == NULL) { - yyerror(p, "memory allocation error"); + yyerror(NULL, p, "memory allocation error"); p->nerr++; p->tree = 0; } @@ -12866,13 +14755,13 @@ mrb_parser_parse(parser_state *p, mrbc_context *c) MRB_API parser_state* mrb_parser_new(mrb_state *mrb) { - mrb_pool *pool; + mempool *pool; parser_state *p; static const parser_state parser_state_zero = { 0 }; - pool = mrb_pool_open(mrb); + pool = mempool_open(); if (!pool) return NULL; - p = (parser_state*)mrb_pool_alloc(pool, sizeof(parser_state)); + p = (parser_state*)mempool_alloc(pool, sizeof(parser_state)); if (!p) return NULL; *p = parser_state_zero; @@ -12908,35 +14797,39 @@ mrb_parser_new(mrb_state *mrb) MRB_API void mrb_parser_free(parser_state *p) { if (p->tokbuf != p->buf) { - mrb_free(p->mrb, p->tokbuf); + mrbc_free(p->tokbuf); } - mrb_pool_close(p->pool); + mempool_close(p->pool); } -MRB_API mrbc_context* -mrbc_context_new(mrb_state *mrb) +MRB_API mrb_ccontext* +mrb_ccontext_new(mrb_state *mrb) { - return (mrbc_context*)mrb_calloc(mrb, 1, sizeof(mrbc_context)); + static const mrb_ccontext cc_zero = { 0 }; + mrb_ccontext *cc = (mrb_ccontext*)mrbc_malloc(sizeof(mrb_ccontext)); + *cc = cc_zero; + return cc; } MRB_API void -mrbc_context_free(mrb_state *mrb, mrbc_context *cxt) +mrb_ccontext_free(mrb_state *mrb, mrb_ccontext *cxt) { - mrb_free(mrb, cxt->filename); - mrb_free(mrb, cxt->syms); - mrb_free(mrb, cxt); + mrbc_free(cxt->filename); + mrbc_free(cxt->syms); + mrbc_free(cxt); } MRB_API const char* -mrbc_filename(mrb_state *mrb, mrbc_context *c, const char *s) +mrb_ccontext_filename(mrb_state *mrb, mrb_ccontext *c, const char *s) { if (s) { size_t len = strlen(s); - char *p = (char*)mrb_malloc(mrb, len + 1); + char *p = (char*)mrbc_malloc(len + 1); + if (p == NULL) return NULL; memcpy(p, s, len + 1); if (c->filename) { - mrb_free(mrb, c->filename); + mrbc_free(c->filename); } c->filename = p; } @@ -12944,20 +14837,21 @@ mrbc_filename(mrb_state *mrb, mrbc_context *c, const char *s) } MRB_API void -mrbc_partial_hook(mrb_state *mrb, mrbc_context *c, int (*func)(struct mrb_parser_state*), void *data) +mrb_ccontext_partial_hook(mrb_ccontext *c, int (*func)(struct mrb_parser_state*), void *data) { c->partial_hook = func; c->partial_data = data; } MRB_API void -mrbc_cleanup_local_variables(mrb_state *mrb, mrbc_context *c) +mrb_ccontext_cleanup_local_variables(mrb_ccontext *c) { if (c->syms) { - mrb_free(mrb, c->syms); + mrbc_free(c->syms); c->syms = NULL; c->slen = 0; } + c->keep_lv = FALSE; } MRB_API void @@ -12969,9 +14863,13 @@ mrb_parser_set_filename(struct mrb_parser_state *p, const char *f) sym = mrb_intern_cstr(p->mrb, f); p->filename_sym = sym; + /* Save current lineno so that AST nodes produced from a bison lookahead + across the file boundary (in partial_hook) can recover the correct + line in init_var_header instead of recording lineno=0. */ + p->prev_file_lineno = p->lineno; p->lineno = (p->filename_table_length > 0)? 0 : 1; - for (i = 0; i < p->filename_table_length; ++i) { + for (i = 0; i < p->filename_table_length; i++) { if (p->filename_table[i] == sym) { p->current_filename_index = i; return; @@ -12979,7 +14877,7 @@ mrb_parser_set_filename(struct mrb_parser_state *p, const char *f) } if (p->filename_table_length == UINT16_MAX) { - yyerror(p, "too many files to compile"); + yyerror(NULL, p, "too many files to compile"); return; } p->current_filename_index = p->filename_table_length++; @@ -13002,7 +14900,7 @@ mrb_parser_get_filename(struct mrb_parser_state* p, uint16_t idx) { #ifndef MRB_NO_STDIO static struct mrb_parser_state * -mrb_parse_file_continue(mrb_state *mrb, FILE *f, const void *prebuf, size_t prebufsize, mrbc_context *c) +mrb_parse_file_continue(mrb_state *mrb, FILE *f, const void *prebuf, size_t prebufsize, mrb_ccontext *c) { parser_state *p; @@ -13022,14 +14920,14 @@ mrb_parse_file_continue(mrb_state *mrb, FILE *f, const void *prebuf, size_t preb } MRB_API parser_state* -mrb_parse_file(mrb_state *mrb, FILE *f, mrbc_context *c) +mrb_parse_file(mrb_state *mrb, FILE *f, mrb_ccontext *c) { return mrb_parse_file_continue(mrb, f, NULL, 0, c); } #endif MRB_API parser_state* -mrb_parse_nstring(mrb_state *mrb, const char *s, size_t len, mrbc_context *c) +mrb_parse_nstring(mrb_state *mrb, const char *s, size_t len, mrb_ccontext *c) { parser_state *p; @@ -13043,13 +14941,13 @@ mrb_parse_nstring(mrb_state *mrb, const char *s, size_t len, mrbc_context *c) } MRB_API parser_state* -mrb_parse_string(mrb_state *mrb, const char *s, mrbc_context *c) +mrb_parse_string(mrb_state *mrb, const char *s, mrb_ccontext *c) { return mrb_parse_nstring(mrb, s, strlen(s), c); } MRB_API mrb_value -mrb_load_exec(mrb_state *mrb, struct mrb_parser_state *p, mrbc_context *c) +mrb_load_exec(mrb_state *mrb, struct mrb_parser_state *p, mrb_ccontext *c) { struct RClass *target = mrb->object_class; struct RProc *proc; @@ -13112,7 +15010,7 @@ mrb_load_exec(mrb_state *mrb, struct mrb_parser_state *p, mrbc_context *c) #ifndef MRB_NO_STDIO MRB_API mrb_value -mrb_load_file_cxt(mrb_state *mrb, FILE *f, mrbc_context *c) +mrb_load_file_cxt(mrb_state *mrb, FILE *f, mrb_ccontext *c) { return mrb_load_exec(mrb, mrb_parse_file(mrb, f, c), c); } @@ -13132,7 +15030,7 @@ mrb_load_file(mrb_state *mrb, FILE *f) * - `NUL` is included in the first 64 bytes of the file */ MRB_API mrb_value -mrb_load_detect_file_cxt(mrb_state *mrb, FILE *fp, mrbc_context *c) +mrb_load_detect_file_cxt(mrb_state *mrb, FILE *fp, mrb_ccontext *c) { union { char b[DETECT_SIZE]; @@ -13151,15 +15049,10 @@ mrb_load_detect_file_cxt(mrb_state *mrb, FILE *fp, mrbc_context *c) return mrb_load_exec(mrb, mrb_parse_file_continue(mrb, fp, leading.b, bufsize, c), c); } else { - mrb_int binsize; - uint8_t *bin; - mrb_value bin_obj = mrb_nil_value(); /* temporary string object */ - mrb_value result; - - binsize = bin_to_uint32(leading.h.binary_size); - bin_obj = mrb_str_new(mrb, NULL, binsize); - bin = (uint8_t*)RSTRING_PTR(bin_obj); - if ((size_t)binsize > bufsize) { + mrb_int binsize = bin_to_uint32(leading.h.binary_size); + mrb_value bin_obj = mrb_str_new(mrb, NULL, binsize); + uint8_t *bin = (uint8_t*)RSTRING_PTR(bin_obj); + if ((size_t)binsize > bufsize) { memcpy(bin, leading.b, bufsize); if (fread(bin + bufsize, binsize - bufsize, 1, fp) == 0) { binsize = bufsize; @@ -13167,7 +15060,7 @@ mrb_load_detect_file_cxt(mrb_state *mrb, FILE *fp, mrbc_context *c) } } - result = mrb_load_irep_buf_cxt(mrb, bin, binsize, c); + mrb_value result = mrb_load_irep_buf_cxt(mrb, bin, binsize, c); if (mrb_string_p(bin_obj)) mrb_str_resize(mrb, bin_obj, 0); return result; } @@ -13175,7 +15068,7 @@ mrb_load_detect_file_cxt(mrb_state *mrb, FILE *fp, mrbc_context *c) #endif MRB_API mrb_value -mrb_load_nstring_cxt(mrb_state *mrb, const char *s, size_t len, mrbc_context *c) +mrb_load_nstring_cxt(mrb_state *mrb, const char *s, size_t len, mrb_ccontext *c) { return mrb_load_exec(mrb, mrb_parse_nstring(mrb, s, len, c), c); } @@ -13187,7 +15080,7 @@ mrb_load_nstring(mrb_state *mrb, const char *s, size_t len) } MRB_API mrb_value -mrb_load_string_cxt(mrb_state *mrb, const char *s, mrbc_context *c) +mrb_load_string_cxt(mrb_state *mrb, const char *s, mrb_ccontext *c) { return mrb_load_nstring_cxt(mrb, s, strlen(s), c); } @@ -13201,9 +15094,9 @@ mrb_load_string(mrb_state *mrb, const char *s) #ifndef MRB_NO_STDIO static void -dump_prefix(node *tree, int offset) +dump_prefix(int offset, uint16_t lineno) { - printf("%05d ", tree->lineno); + printf("%05d ", lineno); while (offset--) { putc(' ', stdout); putc(' ', stdout); @@ -13214,56 +15107,60 @@ static void dump_recur(mrb_state *mrb, node *tree, int offset) { while (tree) { - mrb_parser_dump(mrb, tree->car, offset); + dump_node(mrb, tree->car, offset); tree = tree->cdr; } } static void -dump_args(mrb_state *mrb, node *n, int offset) +dump_locals(mrb_state *mrb, node *tree, int offset, uint16_t lineno) { - if (n->car) { - dump_prefix(n, offset+1); - printf("mandatory args:\n"); - dump_recur(mrb, n->car, offset+2); - } - n = n->cdr; - if (n->car) { - dump_prefix(n, offset+1); - printf("optional args:\n"); - { - node *n2 = n->car; + if (!tree || (!tree->car && !tree->cdr)) return; - while (n2) { - dump_prefix(n2, offset+2); - printf("%s=\n", mrb_sym_name(mrb, sym(n2->car->car))); - mrb_parser_dump(mrb, n2->car->cdr, offset+3); - n2 = n2->cdr; + dump_prefix(offset, lineno); + printf("locals:\n"); + dump_prefix(offset+1, lineno); + while (tree) { + if (tree->car) { + mrb_sym sym = node_to_sym(tree->car); + if (sym != 0) { + const char *name = mrb_sym_name(mrb, sym); + if (name && strlen(name) > 0 && name[0] != '!' && name[0] != '@' && name[0] != '$') { + printf(" %s", mrb_sym_dump(mrb, sym)); + } + else { + printf(" (invalid symbol: %s)", name ? name : "(null)"); + } + } + else { + printf(" (anonymous)"); } } + tree = tree->cdr; } - n = n->cdr; - if (n->car) { - mrb_sym rest = sym(n->car); + printf("\n"); +} - dump_prefix(n, offset+1); - if (rest == MRB_OPSYM(mul)) - printf("rest=*\n"); - else - printf("rest=*%s\n", mrb_sym_name(mrb, rest)); +static void +dump_cpath(mrb_state *mrb, node *tree, int offset, uint16_t lineno) +{ + dump_prefix(offset, lineno); + printf("cpath: "); + if (!tree) { + printf("(null)\n"); } - n = n->cdr; - if (n->car) { - dump_prefix(n, offset+1); - printf("post mandatory args:\n"); - dump_recur(mrb, n->car, offset+2); + else if (node_to_int(tree->car) == 0) { + printf("(null)\n"); } - - n = n->cdr; - if (n) { - mrb_assert(intn(n->car) == NODE_ARGS_TAIL); - mrb_parser_dump(mrb, n, offset); + else if (node_to_int(tree->car) == 1) { + printf("Object\n"); + } + else { + printf("\n"); + dump_node(mrb, tree->car, offset+1); } + dump_prefix(offset, lineno); + printf("name: %s\n", mrb_sym_dump(mrb, node_to_sym(tree->cdr))); } /* @@ -13276,436 +15173,664 @@ static const char* str_dump(mrb_state *mrb, const char *str, int len) { int ai = mrb_gc_arena_save(mrb); - mrb_value s; -# if INT_MAX > MRB_INT_MAX / 4 - /* check maximum length with "\xNN" character */ - if (len > MRB_INT_MAX / 4) { - len = MRB_INT_MAX / 4; - } -# endif - s = mrb_str_new(mrb, str, (mrb_int)len); + mrb_value s = mrb_str_new(mrb, str, (mrb_int)len); s = mrb_str_dump(mrb, s); mrb_gc_arena_restore(mrb, ai); return RSTRING_PTR(s); } + +static void +dump_str(mrb_state *mrb, node *n, int offset, uint16_t lineno) +{ + while (n) { + dump_prefix(offset, lineno); + int len = node_to_int(n->car->car); + if (len >= 0) { + printf("str: %s\n", str_dump(mrb, (char*)n->car->cdr, len)); + } + else { + printf("interpolation:\n"); + dump_node(mrb, n->car->cdr, offset+1); + } + n = n->cdr; + } +} + +static void +dump_args(mrb_state *mrb, struct mrb_ast_args *args, int offset, uint16_t lineno) +{ + if (args->mandatory_args) { + dump_prefix(offset, lineno); + printf("mandatory args:\n"); + dump_recur(mrb, args->mandatory_args, offset+1); + } + if (args->optional_args) { + dump_prefix(offset, lineno); + printf("optional args:\n"); + { + node *n = args->optional_args; + while (n) { + dump_prefix(offset+1, lineno); + printf("%s=\n", mrb_sym_name(mrb, node_to_sym(n->car->car))); + dump_node(mrb, n->car->cdr, offset+2); + n = n->cdr; + } + } + } + if (args->rest_arg) { + mrb_sym rest = args->rest_arg; + + dump_prefix(offset, lineno); + if (rest == MRB_OPSYM(mul)) + printf("rest=*\n"); + else + printf("rest=*%s\n", mrb_sym_name(mrb, rest)); + } + if (args->post_mandatory_args) { + dump_prefix(offset, lineno); + printf("post mandatory args:\n"); + dump_recur(mrb, args->post_mandatory_args, offset+1); + } + if (args->keyword_args) { + dump_prefix(offset, lineno); + printf("keyword args:\n"); + { + node *n = args->keyword_args; + while (n) { + dump_prefix(offset+1, lineno); + printf("%s:\n", mrb_sym_name(mrb, node_to_sym(n->car->car))); + dump_node(mrb, n->car->cdr, offset+2); + n = n->cdr; + } + } + } + if (args->kwrest_arg) { + mrb_sym rest = args->kwrest_arg; + + dump_prefix(offset, lineno); + if (rest == MRB_OPSYM(pow)) + printf("kwrest=**\n"); + else + printf("kwrest=**%s\n", mrb_sym_name(mrb, rest)); + } + if (args->block_arg) { + mrb_sym blk = args->block_arg; + + dump_prefix(offset, lineno); + if (blk == MRB_OPSYM(and)) + printf("blk=&\n"); + else if (blk == MRB_SYM(nil)) + printf("blk=&nil\n"); + else + printf("blk=&%s\n", mrb_sym_name(mrb, blk)); + } +} + +static void +dump_callargs(mrb_state *mrb, node *n, int offset, uint16_t lineno) +{ + if (!n) return; + + struct mrb_ast_callargs *args = (struct mrb_ast_callargs*)n; + if (args->regular_args) { + dump_prefix(offset+1, lineno); + printf("args:\n"); + dump_recur(mrb, args->regular_args, offset+2); + } + if (args->keyword_args) { + dump_prefix(offset+1, lineno); + printf("kw_args:\n"); + node *kw = args->keyword_args; + while (kw) { + dump_prefix(offset+2, lineno); + printf("key:\n"); + if (node_to_sym(kw->car->car) == MRB_OPSYM(pow)) { + dump_prefix(offset+3, lineno); + printf("**:\n"); + } + else { + dump_node(mrb, kw->car->car, offset+3); + } + dump_prefix(offset+2, lineno); + printf("value:\n"); + dump_node(mrb, kw->car->cdr, offset+3); + kw = kw->cdr; + } + } + if (args->block_arg) { + dump_prefix(offset+1, lineno); + printf("block:\n"); + dump_node(mrb, args->block_arg, offset+2); + } +} + #endif void -mrb_parser_dump(mrb_state *mrb, node *tree, int offset) +dump_node(mrb_state *mrb, node *tree, int offset) { #ifndef MRB_NO_STDIO - int nodetype; + enum node_type nodetype; + uint16_t lineno = 0; if (!tree) return; - again: - dump_prefix(tree, offset); - nodetype = intn(tree->car); - tree = tree->cdr; + + /* Extract line number from variable-sized node header */ + if (node_type(tree) != NODE_LAST) { + lineno = ((struct mrb_ast_var_header*)tree)->lineno; + } + + dump_prefix(offset, lineno); + + /* All nodes are now variable-sized nodes with headers */ + nodetype = node_type(tree); + switch (nodetype) { + /* Variable-sized node cases */ + case NODE_SCOPE: + printf("NODE_SCOPE:\n"); + if (scope_node(tree)->locals) { + dump_locals(mrb, scope_node(tree)->locals, offset+1, lineno); + } + if (scope_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, scope_node(tree)->body, offset+2); + } + break; + + case NODE_INT: + printf("NODE_INT: %d\n", int_node(tree)->value); + break; + + case NODE_BIGINT: + printf("NODE_BIGINT: %s (base %d)\n", bigint_node(tree)->string, bigint_node(tree)->base); + break; + + case NODE_FLOAT: + printf("NODE_FLOAT: %s\n", float_node(tree)->value); + break; + + case NODE_STR: + printf("NODE_STR:\n"); + dump_str(mrb, str_node(tree)->list, offset+1, lineno); + break; + + case NODE_XSTR: + printf("NODE_XSTR:\n"); + dump_str(mrb, xstr_node(tree)->list, offset+1, lineno); + break; + + case NODE_SYM: + printf("NODE_SYM: %s\n", mrb_sym_dump(mrb, sym_node(tree)->symbol)); + break; + + case NODE_DSYM: + printf("NODE_DSYM:\n"); + dump_str(mrb, str_node(tree)->list, offset+1, lineno); + break; + + case NODE_LVAR: + printf("NODE_LVAR: %s\n", mrb_sym_dump(mrb, var_node(tree)->symbol)); + break; + + case NODE_GVAR: + printf("NODE_GVAR: %s\n", mrb_sym_dump(mrb, var_node(tree)->symbol)); + break; + + case NODE_IVAR: + printf("NODE_IVAR: %s\n", mrb_sym_dump(mrb, var_node(tree)->symbol)); + break; + + case NODE_CVAR: + printf("NODE_CVAR: %s\n", mrb_sym_dump(mrb, var_node(tree)->symbol)); + break; + + case NODE_NVAR: + printf("NODE_NVAR: %d\n", nvar_node(tree)->num); + break; + + case NODE_CONST: + printf("NODE_CONST: %s\n", mrb_sym_dump(mrb, var_node(tree)->symbol)); + break; + + case NODE_CALL: + printf("NODE_CALL: %s\n", mrb_sym_dump(mrb, call_node(tree)->method_name)); + if (call_node(tree)->receiver) { + dump_prefix(offset+1, lineno); + printf("receiver:\n"); + dump_node(mrb, call_node(tree)->receiver, offset+2); + } + if (call_node(tree)->args) { + dump_callargs(mrb, call_node(tree)->args, offset, lineno); + } + break; + + case NODE_ARRAY: + printf("NODE_ARRAY:\n"); + if (array_node(tree)->elements) { + dump_recur(mrb, array_node(tree)->elements, offset+1); + } + break; + + case NODE_TRUE: + printf("NODE_TRUE\n"); + break; + + case NODE_FALSE: + printf("NODE_FALSE\n"); + break; + + case NODE_NIL: + printf("NODE_NIL\n"); + break; + + case NODE_SELF: + printf("NODE_SELF\n"); + break; + + case NODE_IF: + printf("NODE_IF:\n"); + if (if_node(tree)->condition) { + dump_prefix(offset+1, lineno); + printf("cond:\n"); + dump_node(mrb, if_node(tree)->condition, offset+2); + } + if (if_node(tree)->then_body) { + dump_prefix(offset+1, lineno); + printf("then:\n"); + dump_node(mrb, if_node(tree)->then_body, offset+2); + } + if (if_node(tree)->else_body) { + dump_prefix(offset+1, lineno); + printf("else:\n"); + dump_node(mrb, if_node(tree)->else_body, offset+2); + } + break; + + case NODE_DEF: + printf("NODE_DEF: %s\n", mrb_sym_dump(mrb, def_node(tree)->name)); + if (def_node(tree)->args) { + dump_args(mrb, sdef_node(tree)->args, offset+1, lineno); + } + if (def_node(tree)->locals) { + dump_locals(mrb, def_node(tree)->locals, offset+1, lineno); + } + if (def_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, def_node(tree)->body, offset+2); + } + break; + + case NODE_ASGN: + printf("NODE_ASGN:\n"); + if (asgn_node(tree)->lhs) { + dump_prefix(offset+1, lineno); + printf("lhs:\n"); + dump_node(mrb, asgn_node(tree)->lhs, offset+2); + } + if (asgn_node(tree)->rhs) { + dump_prefix(offset+1, lineno); + printf("rhs:\n"); + dump_node(mrb, asgn_node(tree)->rhs, offset+2); + } + break; + + case NODE_MASGN: + case NODE_MARG: + printf("%s:\n", node_type(tree) == NODE_MASGN ? "NODE_MASGN" : "NODE_MARG"); + /* Handle pre-splat variables */ + if (masgn_node(tree)->pre) { + dump_prefix(offset+1, lineno); + printf("pre:\n"); + dump_recur(mrb, masgn_node(tree)->pre, offset+2); + } + /* Handle splat variable (can be -1 sentinel for anonymous splat) */ + if (masgn_node(tree)->rest) { + if ((intptr_t)masgn_node(tree)->rest == -1) { + dump_prefix(offset+1, lineno); + printf("rest: *\n"); + } + else { + dump_prefix(offset+1, lineno); + printf("rest:\n"); + dump_node(mrb, masgn_node(tree)->rest, offset+2); + } + } + /* Handle post-splat variables */ + if (masgn_node(tree)->post) { + dump_prefix(offset+1, lineno); + printf("post:\n"); + dump_recur(mrb, masgn_node(tree)->post, offset+2); + } + if (masgn_node(tree)->rhs) { + dump_prefix(offset+1, lineno); + printf("rhs:\n"); + dump_node(mrb, masgn_node(tree)->rhs, offset+2); + } + break; + + case NODE_RETURN: + printf("NODE_RETURN:\n"); + if (return_node(tree)->args) { + dump_node(mrb, return_node(tree)->args, offset); + } + break; + + case NODE_BREAK: + printf("NODE_BREAK:\n"); + if (break_node(tree)->value) { + dump_prefix(offset+1, lineno); + printf("value:\n"); + dump_node(mrb, break_node(tree)->value, offset+2); + } + break; + + case NODE_NEXT: + printf("NODE_NEXT:\n"); + if (next_node(tree)->value) { + dump_prefix(offset+1, lineno); + printf("value:\n"); + dump_node(mrb, next_node(tree)->value, offset+2); + } + break; + + case NODE_NEGATE: + printf("NODE_NEGATE:\n"); + if (negate_node(tree)->operand) { + dump_prefix(offset+1, lineno); + printf("operand:\n"); + dump_node(mrb, negate_node(tree)->operand, offset+2); + } + break; + + case NODE_STMTS: + printf("NODE_STMTS:\n"); + if (stmts_node(tree)->stmts) { + dump_recur(mrb, stmts_node(tree)->stmts, offset+1); + } + break; + case NODE_BEGIN: printf("NODE_BEGIN:\n"); - dump_recur(mrb, tree, offset+1); + if (begin_node(tree)->body) { + dump_node(mrb, begin_node(tree)->body, offset+1); + } break; case NODE_RESCUE: printf("NODE_RESCUE:\n"); - if (tree->car) { - dump_prefix(tree, offset+1); + if (rescue_node(tree)->body) { + dump_prefix(offset+1, lineno); printf("body:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); + dump_node(mrb, rescue_node(tree)->body, offset+2); } - tree = tree->cdr; - if (tree->car) { - node *n2 = tree->car; - - dump_prefix(n2, offset+1); + if (rescue_node(tree)->rescue_clauses) { + node *n2 = rescue_node(tree)->rescue_clauses; + dump_prefix(offset+1, lineno); printf("rescue:\n"); while (n2) { node *n3 = n2->car; if (n3->car) { - dump_prefix(n2, offset+2); + dump_prefix(offset+2, lineno); printf("handle classes:\n"); dump_recur(mrb, n3->car, offset+3); } if (n3->cdr->car) { - dump_prefix(n3, offset+2); + dump_prefix(offset+2, lineno); printf("exc_var:\n"); - mrb_parser_dump(mrb, n3->cdr->car, offset+3); + dump_node(mrb, n3->cdr->car, offset+3); } if (n3->cdr->cdr->car) { - dump_prefix(n3, offset+2); + dump_prefix(offset+2, lineno); printf("rescue body:\n"); - mrb_parser_dump(mrb, n3->cdr->cdr->car, offset+3); + dump_node(mrb, n3->cdr->cdr->car, offset+3); } n2 = n2->cdr; } } - tree = tree->cdr; - if (tree->car) { - dump_prefix(tree, offset+1); + if (rescue_node(tree)->else_clause) { + dump_prefix(offset+1, lineno); printf("else:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); + dump_node(mrb, rescue_node(tree)->else_clause, offset+2); } break; case NODE_ENSURE: printf("NODE_ENSURE:\n"); - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - dump_prefix(tree, offset+1); - printf("ensure:\n"); - mrb_parser_dump(mrb, tree->cdr->cdr, offset+2); + if (ensure_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, ensure_node(tree)->body, offset+2); + } + if (ensure_node(tree)->ensure_clause) { + dump_prefix(offset+1, lineno); + printf("ensure:\n"); + dump_node(mrb, ensure_node(tree)->ensure_clause, offset+2); + } break; case NODE_LAMBDA: printf("NODE_LAMBDA:\n"); - dump_prefix(tree, offset); goto block; case NODE_BLOCK: - block: printf("NODE_BLOCK:\n"); - tree = tree->cdr; - if (tree->car) { - dump_args(mrb, tree->car, offset+1); + block: + if (block_node(tree)->locals) { + dump_locals(mrb, block_node(tree)->locals, offset+1, lineno); } - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->cdr->car, offset+2); - break; - - case NODE_IF: - printf("NODE_IF:\n"); - dump_prefix(tree, offset+1); - printf("cond:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - dump_prefix(tree, offset+1); - printf("then:\n"); - mrb_parser_dump(mrb, tree->cdr->car, offset+2); - if (tree->cdr->cdr->car) { - dump_prefix(tree, offset+1); - printf("else:\n"); - mrb_parser_dump(mrb, tree->cdr->cdr->car, offset+2); + if (block_node(tree)->args) { + dump_args(mrb, block_node(tree)->args, offset+1, lineno); } + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, block_node(tree)->body, offset+2); break; case NODE_AND: printf("NODE_AND:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - mrb_parser_dump(mrb, tree->cdr, offset+1); + dump_node(mrb, and_node(tree)->left, offset+1); + dump_node(mrb, and_node(tree)->right, offset+1); break; case NODE_OR: printf("NODE_OR:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - mrb_parser_dump(mrb, tree->cdr, offset+1); + dump_node(mrb, or_node(tree)->left, offset+1); + dump_node(mrb, or_node(tree)->right, offset+1); break; - case NODE_CASE: - printf("NODE_CASE:\n"); - if (tree->car) { - mrb_parser_dump(mrb, tree->car, offset+1); - } - tree = tree->cdr; - while (tree) { - dump_prefix(tree, offset+1); - printf("case:\n"); - dump_recur(mrb, tree->car->car, offset+2); - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->car->cdr, offset+2); - tree = tree->cdr; + case NODE_CASE: + printf("NODE_CASE:\n"); + if (case_node(tree)->value) { + dump_prefix(offset+1, lineno); + printf("value:\n"); + dump_node(mrb, case_node(tree)->value, offset+2); + } + if (case_node(tree)->body) { + node *when_node = case_node(tree)->body; + while (when_node) { + dump_prefix(offset+1, lineno); + printf("when:\n"); + node *when_clause = when_node->car; + if (when_clause && when_clause->car) { + dump_prefix(offset+2, lineno); + printf("cond:\n"); + dump_recur(mrb, when_clause->car, offset+3); + } + if (when_clause && when_clause->cdr) { + dump_prefix(offset+2, lineno); + printf("body:\n"); + dump_node(mrb, when_clause->cdr, offset+3); + } + when_node = when_node->cdr; + } } break; case NODE_WHILE: printf("NODE_WHILE:\n"); - dump_prefix(tree, offset+1); - printf("cond:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->cdr, offset+2); - break; - + goto dump_loop_node; case NODE_UNTIL: printf("NODE_UNTIL:\n"); - dump_prefix(tree, offset+1); + goto dump_loop_node; + case NODE_WHILE_MOD: + printf("NODE_WHILE_MOD:\n"); + goto dump_loop_node; + case NODE_UNTIL_MOD: + printf("NODE_UNTIL_MOD:\n"); + + dump_loop_node: + dump_prefix(offset+1, lineno); printf("cond:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - dump_prefix(tree, offset+1); + dump_node(mrb, while_node(tree)->condition, offset+2); + dump_prefix(offset+1, lineno); printf("body:\n"); - mrb_parser_dump(mrb, tree->cdr, offset+2); + dump_node(mrb, while_node(tree)->body, offset+2); break; case NODE_FOR: printf("NODE_FOR:\n"); - dump_prefix(tree, offset+1); - printf("var:\n"); - { - node *n2 = tree->car; - - if (n2->car) { - dump_prefix(n2, offset+2); - printf("pre:\n"); - dump_recur(mrb, n2->car, offset+3); - } - n2 = n2->cdr; - if (n2) { - if (n2->car) { - dump_prefix(n2, offset+2); - printf("rest:\n"); - mrb_parser_dump(mrb, n2->car, offset+3); - } - n2 = n2->cdr; - if (n2) { - if (n2->car) { - dump_prefix(n2, offset+2); - printf("post:\n"); - dump_recur(mrb, n2->car, offset+3); - } - } - } - } - tree = tree->cdr; - dump_prefix(tree, offset+1); - printf("in:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - tree = tree->cdr; - dump_prefix(tree, offset+1); - printf("do:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - break; - - case NODE_SCOPE: - printf("NODE_SCOPE:\n"); - { - node *n2 = tree->car; - mrb_bool first_lval = TRUE; - - if (n2 && (n2->car || n2->cdr)) { - dump_prefix(n2, offset+1); - printf("local variables:\n"); - dump_prefix(n2, offset+2); - while (n2) { - if (n2->car) { - if (!first_lval) printf(", "); - printf("%s", mrb_sym_name(mrb, sym(n2->car))); - first_lval = FALSE; + if (for_node(tree)->var) { + dump_prefix(offset+1, lineno); + printf("var:\n"); + /* FOR_NODE_VAR structure: + * var_list->car: cons-list of pre-splat variables + * var_list->cdr->car: splat varnode (not a cons-list) + * var_list->cdr->cdr->car: cons-list of post-splat variables */ + node *var_list = for_node(tree)->var; + if (var_list) { + dump_recur(mrb, var_list->car, offset+2); + if (var_list && var_list->cdr) { + /* Second element is a varnode, not a cons-list */ + dump_prefix(offset+1, lineno); + printf("splat var:\n"); + dump_node(mrb, var_list->cdr->car, offset+2); + if (var_list->cdr->cdr) { + /* Third element is a cons-list of post-splat variables */ + dump_prefix(offset+1, lineno); + printf("post var:\n"); + dump_recur(mrb, var_list->cdr->cdr->car, offset+2); } - n2 = n2->cdr; } - printf("\n"); } } - tree = tree->cdr; - offset++; - goto again; - - case NODE_FCALL: - case NODE_CALL: - case NODE_SCALL: - switch (nodetype) { - case NODE_FCALL: - printf("NODE_FCALL:\n"); break; - case NODE_CALL: - printf("NODE_CALL(.):\n"); break; - case NODE_SCALL: - printf("NODE_SCALL(&.):\n"); break; - default: - break; + if (for_node(tree)->iterable) { + dump_prefix(offset+1, lineno); + printf("iterable:\n"); + dump_node(mrb, for_node(tree)->iterable, offset+2); } - mrb_parser_dump(mrb, tree->car, offset+1); - dump_prefix(tree, offset+1); - printf("method='%s' (%d)\n", - mrb_sym_dump(mrb, sym(tree->cdr->car)), - intn(tree->cdr->car)); - tree = tree->cdr->cdr->car; - if (tree) { - dump_prefix(tree, offset+1); - printf("args:\n"); - dump_recur(mrb, tree->car, offset+2); - if (tree->cdr) { - if (tree->cdr->car) { - dump_prefix(tree, offset+1); - printf("kwargs:\n"); - mrb_parser_dump(mrb, tree->cdr->car, offset+2); - } - if (tree->cdr->cdr) { - dump_prefix(tree, offset+1); - printf("block:\n"); - mrb_parser_dump(mrb, tree->cdr->cdr, offset+2); - } - } + if (for_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, for_node(tree)->body, offset+2); } break; case NODE_DOT2: printf("NODE_DOT2:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - mrb_parser_dump(mrb, tree->cdr, offset+1); + { + if (dot2_node(tree)->left) { + dump_prefix(offset+1, lineno); + printf("left:\n"); + dump_node(mrb, dot2_node(tree)->left, offset+2); + } + if (dot2_node(tree)->right) { + dump_prefix(offset+1, lineno); + printf("right:\n"); + dump_node(mrb, dot2_node(tree)->right, offset+2); + } + } break; case NODE_DOT3: printf("NODE_DOT3:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - mrb_parser_dump(mrb, tree->cdr, offset+1); + { + if (dot3_node(tree)->left) { + dump_prefix(offset+1, lineno); + printf("left:\n"); + dump_node(mrb, dot3_node(tree)->left, offset+2); + } + if (dot3_node(tree)->right) { + dump_prefix(offset+1, lineno); + printf("right:\n"); + dump_node(mrb, dot3_node(tree)->right, offset+2); + } + } break; case NODE_COLON2: printf("NODE_COLON2:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - dump_prefix(tree, offset+1); - printf("::%s\n", mrb_sym_name(mrb, sym(tree->cdr))); + if (colon2_node(tree)->base) { + dump_prefix(offset+1, lineno); + printf("base:\n"); + dump_node(mrb, colon2_node(tree)->base, offset+2); + } + dump_prefix(offset+1, lineno); + printf("name: %s\n", mrb_sym_name(mrb, colon2_node(tree)->name)); break; case NODE_COLON3: - printf("NODE_COLON3: ::%s\n", mrb_sym_name(mrb, sym(tree))); - break; - - case NODE_ARRAY: - printf("NODE_ARRAY:\n"); - dump_recur(mrb, tree, offset+1); + printf("NODE_COLON3: ::%s\n", mrb_sym_name(mrb, colon3_node(tree)->name)); break; case NODE_HASH: printf("NODE_HASH:\n"); - while (tree) { - dump_prefix(tree, offset+1); - printf("key:\n"); - mrb_parser_dump(mrb, tree->car->car, offset+2); - dump_prefix(tree, offset+1); - printf("value:\n"); - mrb_parser_dump(mrb, tree->car->cdr, offset+2); - tree = tree->cdr; - } - break; - - case NODE_KW_HASH: - printf("NODE_KW_HASH:\n"); - while (tree) { - dump_prefix(tree, offset+1); - printf("key:\n"); - mrb_parser_dump(mrb, tree->car->car, offset+2); - dump_prefix(tree, offset+1); - printf("value:\n"); - mrb_parser_dump(mrb, tree->car->cdr, offset+2); - tree = tree->cdr; - } - break; - - case NODE_SPLAT: - printf("NODE_SPLAT:\n"); - mrb_parser_dump(mrb, tree, offset+1); - break; - - case NODE_ASGN: - printf("NODE_ASGN:\n"); - dump_prefix(tree, offset+1); - printf("lhs:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - dump_prefix(tree, offset+1); - printf("rhs:\n"); - mrb_parser_dump(mrb, tree->cdr, offset+2); - break; - - case NODE_MASGN: - printf("NODE_MASGN:\n"); - dump_prefix(tree, offset+1); - printf("mlhs:\n"); { - node *n2 = tree->car; - - if (n2->car) { - dump_prefix(tree, offset+2); - printf("pre:\n"); - dump_recur(mrb, n2->car, offset+3); - } - n2 = n2->cdr; - if (n2) { - if (n2->car) { - dump_prefix(n2, offset+2); - printf("rest:\n"); - if (n2->car == nint(-1)) { - dump_prefix(n2, offset+2); - printf("(empty)\n"); - } - else { - mrb_parser_dump(mrb, n2->car, offset+3); - } + node *pairs = hash_node(tree)->pairs; + while (pairs) { + dump_prefix(offset+1, lineno); + printf("key:\n"); + if (node_to_sym(pairs->car->car) == MRB_OPSYM(pow)) { + dump_prefix(offset+2, lineno); + printf("**\n"); } - n2 = n2->cdr; - if (n2 && n2->car) { - dump_prefix(n2, offset+2); - printf("post:\n"); - dump_recur(mrb, n2->car, offset+3); + else { + dump_node(mrb, pairs->car->car, offset+2); } + dump_prefix(offset+1, lineno); + printf("value:\n"); + dump_node(mrb, pairs->car->cdr, offset+2); + pairs = pairs->cdr; } } - dump_prefix(tree, offset+1); - printf("rhs:\n"); - mrb_parser_dump(mrb, tree->cdr, offset+2); + break; + + case NODE_SPLAT: + printf("NODE_SPLAT:\n"); + dump_node(mrb, splat_node(tree)->value, offset+1); break; case NODE_OP_ASGN: printf("NODE_OP_ASGN:\n"); - dump_prefix(tree, offset+1); + dump_prefix(offset+1, lineno); printf("lhs:\n"); - mrb_parser_dump(mrb, tree->car, offset+2); - tree = tree->cdr; - dump_prefix(tree, offset+1); - printf("op='%s' (%d)\n", mrb_sym_name(mrb, sym(tree->car)), intn(tree->car)); - tree = tree->cdr; - mrb_parser_dump(mrb, tree->car, offset+1); + dump_node(mrb, op_asgn_node(tree)->lhs, offset+2); + dump_prefix(offset+1, lineno); + printf("op='%s' (%d)\n", mrb_sym_name(mrb, op_asgn_node(tree)->op), (int)op_asgn_node(tree)->op); + dump_node(mrb, op_asgn_node(tree)->rhs, offset+1); break; case NODE_SUPER: printf("NODE_SUPER:\n"); - if (tree) { - dump_prefix(tree, offset+1); - printf("args:\n"); - dump_recur(mrb, tree->car, offset+2); - if (tree->cdr) { - dump_prefix(tree, offset+1); - printf("block:\n"); - mrb_parser_dump(mrb, tree->cdr, offset+2); - } + if (super_node(tree)->args) { + dump_callargs(mrb, super_node(tree)->args, offset, lineno); } break; case NODE_ZSUPER: printf("NODE_ZSUPER:\n"); - if (tree) { - dump_prefix(tree, offset+1); - printf("args:\n"); - dump_recur(mrb, tree->car, offset+2); - if (tree->cdr) { - dump_prefix(tree, offset+1); - printf("block:\n"); - mrb_parser_dump(mrb, tree->cdr, offset+2); - } + if (super_node(tree)->args) { + dump_callargs(mrb, super_node(tree)->args, offset, lineno); } break; - case NODE_RETURN: - printf("NODE_RETURN:\n"); - mrb_parser_dump(mrb, tree, offset+1); - break; - case NODE_YIELD: printf("NODE_YIELD:\n"); - dump_recur(mrb, tree, offset+1); - break; - - case NODE_BREAK: - printf("NODE_BREAK:\n"); - mrb_parser_dump(mrb, tree, offset+1); - break; - - case NODE_NEXT: - printf("NODE_NEXT:\n"); - mrb_parser_dump(mrb, tree, offset+1); + if (yield_node(tree)->args) { + dump_callargs(mrb, yield_node(tree)->args, offset, lineno); + } break; case NODE_REDO: @@ -13716,151 +15841,82 @@ mrb_parser_dump(mrb_state *mrb, node *tree, int offset) printf("NODE_RETRY\n"); break; - case NODE_LVAR: - printf("NODE_LVAR %s\n", mrb_sym_name(mrb, sym(tree))); - break; - - case NODE_GVAR: - printf("NODE_GVAR %s\n", mrb_sym_name(mrb, sym(tree))); - break; - - case NODE_IVAR: - printf("NODE_IVAR %s\n", mrb_sym_name(mrb, sym(tree))); - break; - - case NODE_CVAR: - printf("NODE_CVAR %s\n", mrb_sym_name(mrb, sym(tree))); - break; - - case NODE_NVAR: - printf("NODE_NVAR %d\n", intn(tree)); - break; - - case NODE_CONST: - printf("NODE_CONST %s\n", mrb_sym_name(mrb, sym(tree))); - break; - - case NODE_MATCH: - printf("NODE_MATCH:\n"); - dump_prefix(tree, offset + 1); - printf("lhs:\n"); - mrb_parser_dump(mrb, tree->car, offset + 2); - dump_prefix(tree, offset + 1); - printf("rhs:\n"); - mrb_parser_dump(mrb, tree->cdr, offset + 2); - break; - case NODE_BACK_REF: - printf("NODE_BACK_REF: $%c\n", intn(tree)); + printf("NODE_BACK_REF: $%c\n", node_to_int(tree)); break; case NODE_NTH_REF: - printf("NODE_NTH_REF: $%d\n", intn(tree)); - break; - - case NODE_ARG: - printf("NODE_ARG %s\n", mrb_sym_name(mrb, sym(tree))); + printf("NODE_NTH_REF: $%d\n", node_to_int(tree)); break; case NODE_BLOCK_ARG: printf("NODE_BLOCK_ARG:\n"); - mrb_parser_dump(mrb, tree, offset+1); - break; - - case NODE_INT: - printf("NODE_INT %s base %d\n", (char*)tree->car, intn(tree->cdr->car)); - break; - - case NODE_FLOAT: - printf("NODE_FLOAT %s\n", (char*)tree); - break; - - case NODE_NEGATE: - printf("NODE_NEGATE:\n"); - mrb_parser_dump(mrb, tree, offset+1); - break; - - case NODE_STR: - printf("NODE_STR %s len %d\n", str_dump(mrb, (char*)tree->car, intn(tree->cdr)), intn(tree->cdr)); - break; - - case NODE_DSTR: - printf("NODE_DSTR:\n"); - dump_recur(mrb, tree, offset+1); - break; - - case NODE_XSTR: - printf("NODE_XSTR %s len %d\n", str_dump(mrb, (char*)tree->car, intn(tree->cdr)), intn(tree->cdr)); - break; - - case NODE_DXSTR: - printf("NODE_DXSTR:\n"); - dump_recur(mrb, tree, offset+1); + dump_node(mrb, block_arg_node(tree)->value, offset+1); break; case NODE_REGX: - printf("NODE_REGX /%s/%s\n", (char*)tree->car, (char*)tree->cdr); - break; - - case NODE_DREGX: - printf("NODE_DREGX:\n"); - dump_recur(mrb, tree->car, offset+1); - dump_prefix(tree, offset); - printf("tail: %s\n", (char*)tree->cdr->cdr->car); - if (tree->cdr->cdr->cdr->car) { - dump_prefix(tree, offset); - printf("opt: %s\n", (char*)tree->cdr->cdr->cdr->car); + printf("NODE_REGX:\n"); + if (regx_node(tree)->list) { + dump_str(mrb, regx_node(tree)->list, offset+1, lineno); } - if (tree->cdr->cdr->cdr->cdr) { - dump_prefix(tree, offset); - printf("enc: %s\n", (char*)tree->cdr->cdr->cdr->cdr); + if (regx_node(tree)->flags) { + dump_prefix(offset+1, lineno); + printf("flags: %s\n", regx_node(tree)->flags); + } + if (regx_node(tree)->encoding) { + dump_prefix(offset+1, lineno); + printf("encoding: %s\n", regx_node(tree)->encoding); } - break; - - case NODE_SYM: - printf("NODE_SYM :%s (%d)\n", mrb_sym_dump(mrb, sym(tree)), - intn(tree)); - break; - - case NODE_DSYM: - printf("NODE_DSYM:\n"); - mrb_parser_dump(mrb, tree, offset+1); break; case NODE_WORDS: printf("NODE_WORDS:\n"); - dump_recur(mrb, tree, offset+1); + if (words_node(tree)->args) { + node *list = words_node(tree)->args; + while (list && list->car) { + node *item = list->car; + if (item->car == 0 && item->cdr == 0) { + /* Skip separator (0 . 0) */ + } + else if (item->car && item->cdr) { + /* String item: (len . str) */ + dump_prefix(offset+1, lineno); + int len = node_to_int(item->car); + if (len >= 0 && len < 1000 && item->cdr) { + printf("word: \"%.*s\"\n", len, (char*)item->cdr); + } + } + list = list->cdr; + } + } break; case NODE_SYMBOLS: printf("NODE_SYMBOLS:\n"); - dump_recur(mrb, tree, offset+1); - break; - - case NODE_LITERAL_DELIM: - printf("NODE_LITERAL_DELIM\n"); - break; - - case NODE_SELF: - printf("NODE_SELF\n"); - break; - - case NODE_NIL: - printf("NODE_NIL\n"); - break; - - case NODE_TRUE: - printf("NODE_TRUE\n"); - break; - - case NODE_FALSE: - printf("NODE_FALSE\n"); + if (symbols_node(tree)->args) { + node *list = symbols_node(tree)->args; + while (list && list->car) { + node *item = list->car; + if (item->car == 0 && item->cdr == 0) { + /* Skip separator (0 . 0) */ + } + else if (item->car && item->cdr) { + /* String item: (len . str) */ + dump_prefix(offset+1, lineno); + int len = node_to_int(item->car); + if (len >= 0 && len < 1000 && item->cdr) { + printf("symbol: \"%.*s\"\n", len, (char*)item->cdr); + } + } + list = list->cdr; + } + } break; case NODE_ALIAS: printf("NODE_ALIAS %s %s:\n", - mrb_sym_dump(mrb, sym(tree->car)), - mrb_sym_dump(mrb, sym(tree->cdr))); + mrb_sym_dump(mrb, node_to_sym(tree->car)), + mrb_sym_dump(mrb, node_to_sym(tree->cdr))); break; case NODE_UNDEF: @@ -13868,7 +15924,7 @@ mrb_parser_dump(mrb_state *mrb, node *tree, int offset) { node *t = tree; while (t) { - printf(" %s", mrb_sym_dump(mrb, sym(t->car))); + printf(" %s", mrb_sym_dump(mrb, node_to_sym(t->car))); t = t->cdr; } } @@ -13877,153 +15933,238 @@ mrb_parser_dump(mrb_state *mrb, node *tree, int offset) case NODE_CLASS: printf("NODE_CLASS:\n"); - if (tree->car->car == nint(0)) { - dump_prefix(tree, offset+1); - printf(":%s\n", mrb_sym_name(mrb, sym(tree->car->cdr))); - } - else if (tree->car->car == nint(1)) { - dump_prefix(tree, offset+1); - printf("::%s\n", mrb_sym_name(mrb, sym(tree->car->cdr))); - } - else { - mrb_parser_dump(mrb, tree->car->car, offset+1); - dump_prefix(tree, offset+1); - printf("::%s\n", mrb_sym_name(mrb, sym(tree->car->cdr))); + if (class_node(tree)->name) { + dump_cpath(mrb, module_node(tree)->name, offset+1, lineno); } - if (tree->cdr->car) { - dump_prefix(tree, offset+1); + if (class_node(tree)->superclass) { + dump_prefix(offset+1, lineno); printf("super:\n"); - mrb_parser_dump(mrb, tree->cdr->car, offset+2); + dump_node(mrb, class_node(tree)->superclass, offset+2); + } + if (class_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, class_node(tree)->body->cdr, offset+2); } - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->cdr->cdr->car->cdr, offset+2); break; case NODE_MODULE: printf("NODE_MODULE:\n"); - if (tree->car->car == nint(0)) { - dump_prefix(tree, offset+1); - printf(":%s\n", mrb_sym_name(mrb, sym(tree->car->cdr))); - } - else if (tree->car->car == nint(1)) { - dump_prefix(tree, offset+1); - printf("::%s\n", mrb_sym_name(mrb, sym(tree->car->cdr))); + if (module_node(tree)->name) { + dump_cpath(mrb, module_node(tree)->name, offset+1, lineno); } - else { - mrb_parser_dump(mrb, tree->car->car, offset+1); - dump_prefix(tree, offset+1); - printf("::%s\n", mrb_sym_name(mrb, sym(tree->car->cdr))); + if (module_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, module_node(tree)->body->cdr, offset+2); } - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->cdr->car->cdr, offset+2); break; case NODE_SCLASS: printf("NODE_SCLASS:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - dump_prefix(tree, offset+1); - printf("body:\n"); - mrb_parser_dump(mrb, tree->cdr->car->cdr, offset+2); - break; - - case NODE_DEF: - printf("NODE_DEF:\n"); - dump_prefix(tree, offset+1); - printf("%s\n", mrb_sym_dump(mrb, sym(tree->car))); - tree = tree->cdr; - { - node *n2 = tree->car; - mrb_bool first_lval = TRUE; - - if (n2 && (n2->car || n2->cdr)) { - dump_prefix(n2, offset+1); - printf("local variables:\n"); - dump_prefix(n2, offset+2); - while (n2) { - if (n2->car) { - if (!first_lval) printf(", "); - printf("%s", mrb_sym_name(mrb, sym(n2->car))); - first_lval = FALSE; - } - n2 = n2->cdr; - } - printf("\n"); - } + if (sclass_node(tree)->obj) { + dump_prefix(offset+1, lineno); + printf("obj:\n"); + dump_node(mrb, sclass_node(tree)->obj, offset+2); } - tree = tree->cdr; - if (tree->car) { - dump_args(mrb, tree->car, offset); + if (sclass_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, sclass_node(tree)->body->cdr, offset+2); } - mrb_parser_dump(mrb, tree->cdr->car, offset+1); break; case NODE_SDEF: - printf("NODE_SDEF:\n"); - mrb_parser_dump(mrb, tree->car, offset+1); - tree = tree->cdr; - dump_prefix(tree, offset+1); - printf(":%s\n", mrb_sym_dump(mrb, sym(tree->car))); - tree = tree->cdr->cdr; - if (tree->car) { - dump_args(mrb, tree->car, offset+1); + printf("NODE_SDEF: %s\n", mrb_sym_dump(mrb, def_node(tree)->name)); + if (sdef_node(tree)->obj) { + dump_prefix(offset+1, lineno); + printf("recv:\n"); + dump_node(mrb, sdef_node(tree)->obj, offset+2); + } + if (sdef_node(tree)->args) { + dump_args(mrb, sdef_node(tree)->args, offset+1, lineno); + } + if (sdef_node(tree)->locals) { + dump_locals(mrb, sdef_node(tree)->locals, offset+1, lineno); + } + if (sdef_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, sdef_node(tree)->body, offset+2); } - tree = tree->cdr; - mrb_parser_dump(mrb, tree->car, offset+1); break; case NODE_POSTEXE: printf("NODE_POSTEXE:\n"); - mrb_parser_dump(mrb, tree, offset+1); + dump_node(mrb, tree, offset+1); break; case NODE_HEREDOC: - printf("NODE_HEREDOC (<<%s):\n", ((parser_heredoc_info*)tree)->term); - dump_recur(mrb, ((parser_heredoc_info*)tree)->doc, offset+1); + printf("NODE_HEREDOC:\n"); + if (heredoc_node(tree)->info.term) { + dump_prefix(offset+1, lineno); + printf("terminator: \"%s\"\n", heredoc_node(tree)->info.term); + } + if (heredoc_node(tree)->info.doc) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_str(mrb, heredoc_node(tree)->info.doc, offset+2, lineno); + } + if (heredoc_node(tree)->info.allow_indent) { + dump_prefix(offset+1, lineno); + printf("allow_indent: true\n"); + } + if (heredoc_node(tree)->info.remove_indent) { + dump_prefix(offset+1, lineno); + printf("remove_indent: true\n"); + } break; - case NODE_ARGS_TAIL: - printf("NODE_ARGS_TAIL:\n"); - { - node *kws = tree->car; - - while (kws) { - mrb_parser_dump(mrb, kws->car, offset+1); - kws = kws->cdr; + case NODE_CASE_MATCH: + printf("NODE_CASE_MATCH:\n"); + if (case_match_node(tree)->value) { + dump_prefix(offset+1, lineno); + printf("value:\n"); + dump_node(mrb, case_match_node(tree)->value, offset+2); + } + if (case_match_node(tree)->in_clauses) { + node *in_clause = case_match_node(tree)->in_clauses; + while (in_clause) { + dump_node(mrb, in_clause->car, offset+1); + in_clause = in_clause->cdr; } } - tree = tree->cdr; - if (tree->car) { - mrb_assert(intn(tree->car->car) == NODE_KW_REST_ARGS); - mrb_parser_dump(mrb, tree->car, offset+1); + break; + + case NODE_IN: + printf("NODE_IN:\n"); + if (in_node(tree)->pattern) { + dump_prefix(offset+1, lineno); + printf("pattern:\n"); + dump_node(mrb, in_node(tree)->pattern, offset+2); } - tree = tree->cdr; - if (tree->car) { - dump_prefix(tree, offset+1); - printf("block='%s'\n", mrb_sym_name(mrb, sym(tree->car))); + if (in_node(tree)->guard) { + dump_prefix(offset+1, lineno); + printf("guard (%s):\n", in_node(tree)->guard_is_unless ? "unless" : "if"); + dump_node(mrb, in_node(tree)->guard, offset+2); + } + if (in_node(tree)->body) { + dump_prefix(offset+1, lineno); + printf("body:\n"); + dump_node(mrb, in_node(tree)->body, offset+2); } break; - case NODE_KW_ARG: - printf("NODE_KW_ARG %s:\n", mrb_sym_name(mrb, sym(tree->car))); - mrb_parser_dump(mrb, tree->cdr->car, offset + 1); + case NODE_PAT_VALUE: + printf("NODE_PAT_VALUE:\n"); + if (pat_value_node(tree)->value) { + dump_node(mrb, pat_value_node(tree)->value, offset+1); + } break; - case NODE_KW_REST_ARGS: - if (tree) - printf("NODE_KW_REST_ARGS %s\n", mrb_sym_name(mrb, sym(tree))); - else - printf("NODE_KW_REST_ARGS\n"); + case NODE_PAT_VAR: + if (pat_var_node(tree)->name) { + printf("NODE_PAT_VAR: %s\n", mrb_sym_dump(mrb, pat_var_node(tree)->name)); + } + else { + printf("NODE_PAT_VAR: _ (wildcard)\n"); + } + break; + + case NODE_PAT_PIN: + printf("NODE_PAT_PIN: ^%s\n", mrb_sym_dump(mrb, pat_pin_node(tree)->name)); + break; + + case NODE_PAT_AS: + printf("NODE_PAT_AS: => %s\n", mrb_sym_dump(mrb, pat_as_node(tree)->name)); + if (pat_as_node(tree)->pattern) { + dump_prefix(offset+1, lineno); + printf("pattern:\n"); + dump_node(mrb, pat_as_node(tree)->pattern, offset+2); + } + break; + + case NODE_PAT_ALT: + printf("NODE_PAT_ALT:\n"); + if (pat_alt_node(tree)->left) { + dump_prefix(offset+1, lineno); + printf("left:\n"); + dump_node(mrb, pat_alt_node(tree)->left, offset+2); + } + if (pat_alt_node(tree)->right) { + dump_prefix(offset+1, lineno); + printf("right:\n"); + dump_node(mrb, pat_alt_node(tree)->right, offset+2); + } + break; + + case NODE_PAT_ARRAY: + printf("NODE_PAT_ARRAY:\n"); + if (pat_array_node(tree)->pre) { + dump_prefix(offset+1, lineno); + printf("pre:\n"); + dump_recur(mrb, pat_array_node(tree)->pre, offset+2); + } + if (pat_array_node(tree)->rest) { + dump_prefix(offset+1, lineno); + if (pat_array_node(tree)->rest == (node*)-1) { + printf("rest: * (anonymous)\n"); + } + else { + printf("rest:\n"); + dump_node(mrb, pat_array_node(tree)->rest, offset+2); + } + } + if (pat_array_node(tree)->post) { + dump_prefix(offset+1, lineno); + printf("post:\n"); + dump_recur(mrb, pat_array_node(tree)->post, offset+2); + } + break; + + case NODE_PAT_HASH: + printf("NODE_PAT_HASH:\n"); + if (pat_hash_node(tree)->pairs) { + dump_prefix(offset+1, lineno); + printf("pairs:\n"); + dump_recur(mrb, pat_hash_node(tree)->pairs, offset+2); + } + if (pat_hash_node(tree)->rest) { + dump_prefix(offset+1, lineno); + if (pat_hash_node(tree)->rest == (node*)-1) { + printf("rest: **nil\n"); + } + else { + printf("rest:\n"); + dump_node(mrb, pat_hash_node(tree)->rest, offset+2); + } + } + break; + + case NODE_MATCH_PAT: + printf("NODE_MATCH_PAT%s:\n", match_pat_node(tree)->raise_on_fail ? " (=>)" : " (in)"); + dump_prefix(offset+1, lineno); + printf("value:\n"); + dump_node(mrb, match_pat_node(tree)->value, offset+2); + dump_prefix(offset+1, lineno); + printf("pattern:\n"); + dump_node(mrb, match_pat_node(tree)->pattern, offset+2); break; default: - printf("node type: %d (0x%x)\n", nodetype, (unsigned)nodetype); + /* Fallback: unknown node type - skip like codegen.c does */ + printf("unknown node type %d (0x%x)\n", nodetype, (unsigned)nodetype); break; } #endif } +void +mrb_parser_dump(mrb_state *mrb, node *tree, int offset) +{ + dump_node(mrb, tree, offset); +} + typedef mrb_bool mrb_parser_foreach_top_variable_func(mrb_state *mrb, mrb_sym sym, void *user); void mrb_parser_foreach_top_variable(mrb_state *mrb, struct mrb_parser_state *p, mrb_parser_foreach_top_variable_func *func, void *user); @@ -14031,11 +16172,15 @@ void mrb_parser_foreach_top_variable(mrb_state *mrb, struct mrb_parser_state *p, mrb_parser_foreach_top_variable_func *func, void *user) { const mrb_ast_node *n = p->tree; - if ((intptr_t)n->car == NODE_SCOPE) { - n = n->cdr->car; + if (node_type_p((node*)n, NODE_SCOPE)) { + /* Extract locals from variable-sized NODE_SCOPE */ + struct mrb_ast_scope_node *scope = scope_node(n); + n = scope->locals; for (; n; n = n->cdr) { - mrb_sym sym = sym(n->car); - if (sym && !func(mrb, sym, user)) break; + mrb_sym sym = node_to_sym(n->car); + if (sym != 0) { + if (!func(mrb, sym, user)) break; + } } } } diff --git a/mrbgems/mruby-compiler/mrbgem.rake b/mrbgems/mruby-compiler/mrbgem.rake index 0f49edea3a..bd83c8bc4e 100644 --- a/mrbgems/mruby-compiler/mrbgem.rake +++ b/mrbgems/mruby-compiler/mrbgem.rake @@ -3,21 +3,23 @@ MRuby::Gem::Specification.new 'mruby-compiler' do |spec| spec.author = 'mruby developers' spec.summary = 'mruby compiler library' - objs = %w[codegen y.tab].map do |name| - src = "#{dir}/core/#{name}.c" - if build.cxx_exception_enabled? - build.compile_as_cxx(src) - else - objfile(src.pathmap("#{build_dir}/core/%n")) + spec.build_settings do + objs = %w[codegen y.tab].map do |name| + src = "#{dir}/core/#{name}.c" + if build.cxx_exception_enabled? + build.compile_as_cxx(src) + else + objfile(src.pathmap("#{build_dir}/core/%n")) + end end + build.libmruby_core_objs << objs end - build.libmruby_core_objs << objs end -if MRuby::Build.current.name == "host" - dir = __dir__ - lex_def = "#{dir}/core/lex.def" +dir = __dir__ +lex_def = "#{dir}/core/lex.def" +unless Rake::Task.task_defined?(lex_def) # Parser file "#{dir}/core/y.tab.c" => ["#{dir}/core/parse.y", lex_def] do |t| MRuby.targets["host"].yacc.run t.name, t.prerequisites.first diff --git a/mrbgems/mruby-complex/README.md b/mrbgems/mruby-complex/README.md new file mode 100644 index 0000000000..da29ce66ec --- /dev/null +++ b/mrbgems/mruby-complex/README.md @@ -0,0 +1,55 @@ +# mruby-complex + +mruby-complex is an mrbgem that provides a `Complex` class for mruby, allowing for the representation and manipulation of complex numbers. It is designed to be compatible with the Complex class in standard Ruby. + +## Functionality + +The `Complex` class provided by this mrbgem supports: + +- Creation of complex numbers from real and imaginary parts, or using the `i` suffix for numbers. +- Basic arithmetic operations: addition (`+`), subtraction (`-`), multiplication (`*`), division (`/`). +- Methods to access the real and imaginary parts (`real`, `imaginary` or `imag`). +- Calculation of absolute value (`abs`, `magnitude`), argument/angle (`arg`, `angle`, `phase`), and square of the absolute value (`abs2`). +- Complex conjugate (`conjugate`, `conj`). +- Conversion to polar coordinates (`polar`). +- Conversion to other numeric types where appropriate (`to_f`, `to_i`, `to_r`, `to_c`). +- Inspection and string representation (`inspect`, `to_s`). + +## Usage Examples + +Here are some basic examples of how to use the `Complex` class: + +```ruby +# Creating complex numbers +c1 = Complex(1, 2) # => (1+2i) +c2 = 3 + 4i # => (3+4i) +c3 = Complex.polar(5, Math::PI/2) # => (0.0+5.0i) (approximately) + +# Arithmetic operations +puts c1 + c2 # => (4+6i) +puts c1 - c2 # => (-2-2i) +puts c1 * c2 # => (-5+10i) +puts c1 / c2 # => (0.44+0.08i) + +# Accessing parts +puts c1.real # => 1 +puts c1.imaginary # => 2 + +# Other methods +puts c2.abs # => 5.0 +puts c2.arg # => 0.9272952180016122 (radians) +puts c2.conjugate # => (3-4i) +puts c1.polar # => [2.23606797749979, 1.1071487177940904] + +# Numerics can be converted to Complex +puts 5.to_c # => (5+0i) +puts 2.3.to_c # => (2.3+0i) + +# The `i` method on numerics creates a complex number with zero real part +puts 5i # => (0+5i) +puts 2.3i # => (0+2.3i) +``` + +## Note + +This mrbgem aims to provide functionality similar to the `Complex` class found in the standard Ruby library. Refer to the Ruby documentation for `Complex` for more detailed information on the behavior of various methods. diff --git a/mrbgems/mruby-complex/mrblib/complex.rb b/mrbgems/mruby-complex/mrblib/complex.rb index 4c36179cb8..08173275b9 100644 --- a/mrbgems/mruby-complex/mrblib/complex.rb +++ b/mrbgems/mruby-complex/mrblib/complex.rb @@ -1,65 +1,222 @@ class Complex < Numeric + # + # call-seq: + # Complex.polar(abs [, arg]) -> complex + # + # Returns a complex number in terms of its polar coordinates. + # abs is the absolute value (magnitude) and arg is the argument (angle). + # + # Complex.polar(3, 0) #=> (3+0i) + # Complex.polar(3, Math::PI/2) #=> (1.836909530733566e-16+3.0i) + # Complex.polar(3, Math::PI) #=> (-3.0+3.673819061467132e-16i) + # def self.polar(abs, arg = 0) Complex(abs * Math.cos(arg), abs * Math.sin(arg)) end + # + # call-seq: + # cmp.inspect -> string + # + # Returns the value as a string for inspection. + # + # Complex(2).inspect #=> "(2+0i)" + # Complex(-8, 6).inspect #=> "(-8+6i)" + # Complex(1, 2).inspect #=> "(1+2i)" + # def inspect "(#{to_s})" end + # + # call-seq: + # cmp.to_s -> string + # + # Returns the value as a string. + # + # Complex(2).to_s #=> "2+0i" + # Complex(-8, 6).to_s #=> "-8+6i" + # Complex(1, -2).to_s #=> "1-2i" + # def to_s "#{real}#{'+' unless imaginary < 0}#{imaginary}#{'*' unless imaginary.finite?}i" end + # + # call-seq: + # +cmp -> cmp + # + # Returns self. + # + # +Complex(1, 2) #=> (1+2i) + # def +@ self end + # + # call-seq: + # -cmp -> complex + # + # Returns the negation of self. + # + # -Complex(1, 2) #=> (-1-2i) + # -Complex(-1, 2) #=> (1-2i) + # def -@ Complex(-real, -imaginary) end + # + # call-seq: + # cmp <=> numeric -> -1, 0, +1, or nil + # + # Returns -1, 0, or +1 depending on whether cmp is less than, equal to, + # or greater than numeric. This is the basis for the tests in the Comparable module. + # Returns nil if the two values are incomparable. + # + # Complex(2, 3) <=> Complex(2, 3) #=> 0 + # Complex(5) <=> 5 #=> 0 + # Complex(2, 3) <=> 1 #=> 1 + # + def <=>(other) + return nil unless other.kind_of?(Numeric) + self.to_f <=> other.to_f + rescue + nil + end + + # + # call-seq: + # cmp.abs -> real + # cmp.magnitude -> real + # + # Returns the absolute part of its polar form. + # + # Complex(-1).abs #=> 1.0 + # Complex(3.0, -4.0).abs #=> 5.0 + # def abs - Math.hypot imaginary, real + Math.hypot(imaginary, real) end alias_method :magnitude, :abs + # + # call-seq: + # cmp.abs2 -> real + # + # Returns square of the absolute value. + # + # Complex(-1).abs2 #=> 1 + # Complex(3.0, -4.0).abs2 #=> 25.0 + # def abs2 real * real + imaginary * imaginary end + # + # call-seq: + # cmp.arg -> float + # cmp.angle -> float + # cmp.phase -> float + # + # Returns the angle part of its polar form. + # + # Complex.polar(3, Math::PI/2).arg #=> 1.5707963267948966 + # def arg - Math.atan2 imaginary, real + Math.atan2(imaginary, real) end alias_method :angle, :arg alias_method :phase, :arg + # + # call-seq: + # cmp.conjugate -> complex + # cmp.conj -> complex + # + # Returns the complex conjugate. + # + # Complex(1, 2).conjugate #=> (1-2i) + # def conjugate Complex(real, -imaginary) end alias_method :conj, :conjugate + # + # call-seq: + # cmp.fdiv(numeric) -> complex + # + # Performs division as each part is a float, even if the parts are not floats. + # + # Complex(11, 22).fdiv(3) #=> (3.6666666666666665+7.333333333333333i) + # def fdiv(numeric) Complex(real / numeric, imaginary / numeric) end + # + # call-seq: + # cmp.polar -> array + # + # Returns an array; [cmp.abs, cmp.arg]. + # + # Complex(1, 2).polar #=> [2.23606797749979, 1.1071487177940904] + # def polar [abs, arg] end + # + # call-seq: + # cmp.real? -> false + # + # Returns false. + # + # Complex(1).real? #=> false + # def real? false end + # + # call-seq: + # cmp.rectangular -> array + # cmp.rect -> array + # + # Returns an array; [cmp.real, cmp.imag]. + # + # Complex(1, 2).rectangular #=> [1, 2] + # def rectangular [real, imaginary] end alias_method :rect, :rectangular + # + # call-seq: + # cmp.to_c -> cmp + # + # Returns self. + # + # Complex(2).to_c #=> (2+0i) + # Complex(-8, 6).to_c #=> (-8+6i) + # def to_c self end + # + # call-seq: + # cmp.to_r -> rational + # + # Returns the value as a rational if possible (the imaginary part should be exactly zero). + # + # Complex(1, 0).to_r #=> (1/1) + # Complex(1, 0.0).to_r #=> (1/1) + # Complex(1, 2).to_r #=> RangeError + # def to_r raise RangeError.new "can't convert #{to_s} into Rational" unless imaginary.zero? Rational(real, 1) @@ -68,9 +225,35 @@ def to_r alias_method :imag, :imaginary Numeric.class_eval do + # + # call-seq: + # num.i -> complex + # + # Returns the Complex object created from this number and i (0+num*i). + # + # -42.i #=> (0-42i) + # 2.0.i #=> (0+2.0i) + # def i Complex(0, self) end end undef i end + +class Numeric + # + # call-seq: + # num.to_c -> complex + # + # Returns the value as a complex. + # + # 1.to_c #=> (1+0i) + # -1.to_c #=> (-1+0i) + # 1.0.to_c #=> (1.0+0i) + # 3.14159.to_c #=> (3.14159+0i) + # + def to_c + Complex(self, 0) + end +end diff --git a/mrbgems/mruby-complex/src/complex.c b/mrbgems/mruby-complex/src/complex.c index 63b28af3f3..4eba1fe168 100644 --- a/mrbgems/mruby-complex/src/complex.c +++ b/mrbgems/mruby-complex/src/complex.c @@ -2,7 +2,7 @@ #include #include #include -#include +#include #ifdef MRB_NO_FLOAT # error Complex conflicts with 'MRB_NO_FLOAT' configuration @@ -78,7 +78,7 @@ mrb_complex_new(mrb_state *mrb, mrb_float real, mrb_float imaginary) struct RBasic *comp = complex_alloc(mrb, c, &p); p->real = real; p->imaginary = imaginary; - MRB_SET_FROZEN_FLAG(comp); + comp->frozen = 1; return mrb_obj_value(comp); } @@ -94,6 +94,15 @@ mrb_complex_copy(mrb_state *mrb, mrb_value x, mrb_value y) p1->imaginary = p2->imaginary; } +/* + * call-seq: + * complex.real -> float + * + * Returns the real part of the complex number. + * + * Complex(3, 4).real #=> 3.0 + * Complex(-1).real #=> -1.0 + */ static mrb_value complex_real(mrb_state *mrb, mrb_value self) { @@ -101,6 +110,16 @@ complex_real(mrb_state *mrb, mrb_value self) return mrb_float_value(mrb, p->real); } +/* + * call-seq: + * complex.imaginary -> float + * complex.imag -> float + * + * Returns the imaginary part of the complex number. + * + * Complex(3, 4).imaginary #=> 4.0 + * Complex(5).imag #=> 0.0 + */ static mrb_value complex_imaginary(mrb_state *mrb, mrb_value self) { @@ -108,6 +127,19 @@ complex_imaginary(mrb_state *mrb, mrb_value self) return mrb_float_value(mrb, p->imaginary); } +/* + * call-seq: + * Complex.rectangular(real, imag = 0) -> complex + * Complex.rect(real, imag = 0) -> complex + * Complex(real, imag = 0) -> complex + * + * Returns a complex number with the given real and imaginary parts. + * The imaginary part defaults to 0 if not specified. + * + * Complex.rectangular(1, 2) #=> (1+2i) + * Complex.rect(3) #=> (3+0i) + * Complex(1, -1) #=> (1-1i) + */ static mrb_value complex_s_rect(mrb_state *mrb, mrb_value self) { @@ -117,6 +149,16 @@ complex_s_rect(mrb_state *mrb, mrb_value self) return complex_new(mrb, real, imaginary); } +/* + * call-seq: + * complex.to_f -> float + * + * Returns the real part of the complex number as a float. + * Raises RangeError if the imaginary part is not zero. + * + * Complex(3, 0).to_f #=> 3.0 + * Complex(3, 4).to_f #=> RangeError: can't convert (3+4i) into Float + */ mrb_value mrb_complex_to_f(mrb_state *mrb, mrb_value self) { @@ -129,6 +171,16 @@ mrb_complex_to_f(mrb_state *mrb, mrb_value self) return mrb_float_value(mrb, p->real); } +/* + * call-seq: + * complex.to_i -> integer + * + * Returns the real part of the complex number as an integer. + * Raises RangeError if the imaginary part is not zero. + * + * Complex(3, 0).to_i #=> 3 + * Complex(3, 4).to_i #=> RangeError: can't convert (3+4i) into Integer + */ mrb_value mrb_complex_to_i(mrb_state *mrb, mrb_value self) { @@ -149,12 +201,6 @@ mrb_complex_to_i(mrb_state *mrb, mrb_value self) return mrb_int_value(mrb, (mrb_int)p->real); } -static mrb_value -complex_to_c(mrb_state *mrb, mrb_value self) -{ - return self; -} - mrb_bool mrb_complex_eq(mrb_state *mrb, mrb_value x, mrb_value y) { @@ -182,6 +228,17 @@ mrb_complex_eq(mrb_state *mrb, mrb_value x, mrb_value y) } } +/* + * call-seq: + * complex == object -> true or false + * + * Returns true if complex equals object. Two complex numbers are equal + * if their real and imaginary parts are equal. + * + * Complex(1, 2) == Complex(1, 2) #=> true + * Complex(1, 2) == Complex(2, 1) #=> false + * Complex(1, 0) == 1 #=> true + */ static mrb_value complex_eq(mrb_state *mrb, mrb_value x) { @@ -189,26 +246,54 @@ complex_eq(mrb_state *mrb, mrb_value x) return mrb_bool_value(mrb_complex_eq(mrb, x, y)); } -mrb_value -mrb_complex_add(mrb_state *mrb, mrb_value x, mrb_value y) +static mrb_value +complex_op(mrb_state *mrb, mrb_value x, mrb_value y, char op) { struct mrb_complex *p1 = complex_ptr(mrb, x); + mrb_float r, i; switch (mrb_type(y)) { - case MRB_TT_COMPLEX: - { - struct mrb_complex *p2 = complex_ptr(mrb, y); - return mrb_complex_new(mrb, p1->real+p2->real, p1->imaginary+p2->imaginary); - } + case MRB_TT_COMPLEX: { + struct mrb_complex *p2 = complex_ptr(mrb, y); + r = p2->real; + i = p2->imaginary; + break; + } + default: { + r = mrb_as_float(mrb, y); + i = 0; + break; + } + } - default: - { - mrb_float z = mrb_as_float(mrb, y); - return mrb_complex_new(mrb, p1->real+z, p1->imaginary); - } + switch (op) { + case '+': + return mrb_complex_new(mrb, p1->real + r, p1->imaginary + i); + case '-': + return mrb_complex_new(mrb, p1->real - r, p1->imaginary - i); + case '*': + return mrb_complex_new(mrb, p1->real * r - p1->imaginary * i, p1->real * i + p1->imaginary * r); } + return mrb_nil_value(); /* should not happen */ } +mrb_value +mrb_complex_add(mrb_state *mrb, mrb_value x, mrb_value y) +{ + return complex_op(mrb, x, y, '+'); +} + +/* + * call-seq: + * complex + numeric -> complex + * + * Returns the sum of complex and numeric. If numeric is a complex number, + * adds both real and imaginary parts. If numeric is real, adds only to + * the real part. + * + * Complex(1, 2) + Complex(3, 4) #=> (4+6i) + * Complex(1, 2) + 3 #=> (4+2i) + */ static mrb_value complex_add(mrb_state *mrb, mrb_value x) { @@ -219,23 +304,20 @@ complex_add(mrb_state *mrb, mrb_value x) mrb_value mrb_complex_sub(mrb_state *mrb, mrb_value x, mrb_value y) { - struct mrb_complex *p1 = complex_ptr(mrb, x); - - switch (mrb_type(y)) { - case MRB_TT_COMPLEX: - { - struct mrb_complex *p2 = complex_ptr(mrb, y); - return mrb_complex_new(mrb, p1->real-p2->real, p1->imaginary-p2->imaginary); - } - - default: - { - mrb_float z = mrb_as_float(mrb, y); - return mrb_complex_new(mrb, p1->real-z, p1->imaginary); - } - } + return complex_op(mrb, x, y, '-'); } +/* + * call-seq: + * complex - numeric -> complex + * + * Returns the difference of complex and numeric. If numeric is a complex number, + * subtracts both real and imaginary parts. If numeric is real, subtracts only + * from the real part. + * + * Complex(5, 6) - Complex(1, 2) #=> (4+4i) + * Complex(5, 6) - 2 #=> (3+6i) + */ static mrb_value complex_sub(mrb_state *mrb, mrb_value x) { @@ -246,24 +328,19 @@ complex_sub(mrb_state *mrb, mrb_value x) mrb_value mrb_complex_mul(mrb_state *mrb, mrb_value x, mrb_value y) { - struct mrb_complex *p1 = complex_ptr(mrb, x); - - switch (mrb_type(y)) { - case MRB_TT_COMPLEX: - { - struct mrb_complex *p2 = complex_ptr(mrb, y); - return mrb_complex_new(mrb, p1->real*p2->real - p1->imaginary*p2->imaginary, - p1->real*p2->imaginary + p2->real*p1->imaginary); - } - - default: - { - mrb_float z = mrb_as_float(mrb, y); - return mrb_complex_new(mrb, p1->real*z, p1->imaginary*z); - } - } + return complex_op(mrb, x, y, '*'); } +/* + * call-seq: + * complex * numeric -> complex + * + * Returns the product of complex and numeric. Uses the standard complex + * multiplication formula: (a+bi) * (c+di) = (ac-bd) + (ad+bc)i + * + * Complex(1, 2) * Complex(3, 4) #=> (-5+10i) + * Complex(1, 2) * 3 #=> (3+6i) + */ static mrb_value complex_mul(mrb_state *mrb, mrb_value x) { @@ -284,12 +361,15 @@ add_pair(struct float_pair *s, struct float_pair const *a, { if (b->s == 0.0F) { *s = *a; - } else if (a->s == 0.0F) { + } + else if (a->s == 0.0F) { *s = *b; - } else if (a->x >= b->x) { + } + else if (a->x >= b->x) { s->s = a->s + F(ldexp)(b->s, b->x - a->x); s->x = a->x; - } else { + } + else { s->s = F(ldexp)(a->s, a->x - b->x) + b->s; s->x = b->x; } @@ -315,6 +395,7 @@ mrb_value mrb_complex_div(mrb_state *mrb, mrb_value self, mrb_value rhs) { struct mrb_complex *a, *b; + mrb_float r, den; a = complex_ptr(mrb, self); if (mrb_type(rhs) != MRB_TT_COMPLEX) { @@ -322,49 +403,76 @@ mrb_complex_div(mrb_state *mrb, mrb_value self, mrb_value rhs) mrb_int_zerodiv(mrb); } mrb_float f = mrb_as_float(mrb, rhs); + if (f == 0.0) { + mrb_int_zerodiv(mrb); + } return complex_new(mrb, mrb_div_float(a->real, f), mrb_div_float(a->imaginary, f)); } - struct float_pair ar, ai, br, bi; - struct float_pair br2, bi2; - struct float_pair div; - struct float_pair ar_br, ai_bi; - struct float_pair ai_br, ar_bi; - struct float_pair zr, zi; - b = complex_ptr(mrb, rhs); + if (b->real == 0 && b->imaginary == 0) { + mrb_int_zerodiv(mrb); + } - /* Split floating-point components into significand and exponent */ - ar.s = F(frexp)(a->real, &ar.x); - ai.s = F(frexp)(a->imaginary, &ai.x); - br.s = F(frexp)(b->real, &br.x); - bi.s = F(frexp)(b->imaginary, &bi.x); - - /* Perform arithmetic on (significand, exponent) pairs to produce - the result: */ - - /* the divisor */ - mul_pair(&br2, &br, &br); - mul_pair(&bi2, &bi, &bi); - add_pair(&div, &br2, &bi2); - - /* real component */ - mul_pair(&ar_br, &ar, &br); - mul_pair(&ai_bi, &ai, &bi); - add_pair(&zr, &ar_br, &ai_bi); - div_pair(&zr, &zr, &div); - - /* imaginary component */ - mul_pair(&ai_br, &ai, &br); - mul_pair(&ar_bi, &ar, &bi); - ar_bi.s = -ar_bi.s; - add_pair(&zi, &ai_br, &ar_bi); - div_pair(&zi, &zi, &div); - - /* assemble the result */ - return complex_new(mrb, F(ldexp)(zr.s, zr.x), F(ldexp)(zi.s, zi.x)); + mrb_float br = b->real; + mrb_float bi = b->imaginary; + + if (F(fabs)(br) < DBL_MIN * F(fabs)(bi) && F(fabs)(bi) < DBL_MIN * F(fabs)(br)) { + /* Fallback to frexp/ldexp for extreme values */ + struct float_pair ar_p, ai_p, br_p, bi_p; + struct float_pair br2_p, bi2_p; + struct float_pair div_p; + struct float_pair ar_br_p, ai_bi_p; + struct float_pair ai_br_p, ar_bi_p; + struct float_pair zr_p, zi_p; + + ar_p.s = F(frexp)(a->real, &ar_p.x); + ai_p.s = F(frexp)(a->imaginary, &ai_p.x); + br_p.s = F(frexp)(br, &br_p.x); + bi_p.s = F(frexp)(bi, &bi_p.x); + + mul_pair(&br2_p, &br_p, &br_p); + mul_pair(&bi2_p, &bi_p, &bi_p); + add_pair(&div_p, &br2_p, &bi2_p); + + mul_pair(&ar_br_p, &ar_p, &br_p); + mul_pair(&ai_bi_p, &ai_p, &bi_p); + add_pair(&zr_p, &ar_br_p, &ai_bi_p); + div_pair(&zr_p, &zr_p, &div_p); + + mul_pair(&ai_br_p, &ai_p, &br_p); + mul_pair(&ar_bi_p, &ar_p, &bi_p); + ar_bi_p.s = -ar_bi_p.s; + add_pair(&zi_p, &ai_br_p, &ar_bi_p); + div_pair(&zi_p, &zi_p, &div_p); + + return complex_new(mrb, F(ldexp)(zr_p.s, zr_p.x), F(ldexp)(zi_p.s, zi_p.x)); + } + else { + if (F(fabs)(br) > F(fabs)(bi)) { + r = bi / br; + den = br + r * bi; + return complex_new(mrb, (a->real + a->imaginary * r) / den, (a->imaginary - a->real * r) / den); + } + else { + r = br / bi; + den = bi + r * br; + return complex_new(mrb, (a->real * r + a->imaginary) / den, (a->imaginary * r - a->real) / den); + } + } } +/* + * call-seq: + * complex / numeric -> complex + * complex.quo(numeric) -> complex + * + * Returns the quotient of complex divided by numeric. Uses the standard + * complex division formula by multiplying by the conjugate. + * + * Complex(10, 5) / Complex(2, 1) #=> (5+0i) + * Complex(6, 4) / 2 #=> (3+2i) + */ static mrb_value complex_div(mrb_state *mrb, mrb_value x) { @@ -372,6 +480,15 @@ complex_div(mrb_state *mrb, mrb_value x) return mrb_complex_div(mrb, x, y); } +/* + * call-seq: + * complex.hash -> integer + * + * Returns a hash value for the complex number. Two complex numbers with + * the same real and imaginary parts will have the same hash value. + * + * Complex(1, 2).hash == Complex(1, 2).hash #=> true + */ static mrb_value complex_hash(mrb_state *mrb, mrb_value cpx) { @@ -381,29 +498,96 @@ complex_hash(mrb_state *mrb, mrb_value cpx) return mrb_int_value(mrb, hash); } +/* + * call-seq: + * nil.to_c -> complex + * + * Returns Complex(0, 0). + * + * nil.to_c #=> (0+0i) + */ +static mrb_value +nil_to_c(mrb_state *mrb, mrb_value self) +{ + return complex_new(mrb, 0, 0); +} + +/* + * call-seq: + * cmp ** numeric -> complex + * + * Returns the result of raising cmp to the power of numeric. + * + * Complex(1, 2) ** 2 #=> (-3+4i) + * Complex(1, 2) ** Complex(1, 0) #=> (1+2i) + */ +static mrb_value +complex_pow(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + struct mrb_complex *c_self = complex_ptr(mrb, self); + mrb_float self_real = c_self->real; + mrb_float self_imaginary = c_self->imaginary; + + if (mrb_type(other) == MRB_TT_COMPLEX) { + struct mrb_complex *c_other = complex_ptr(mrb, other); + mrb_float x = c_other->real; + mrb_float y = c_other->imaginary; + + mrb_float log_abs_self = F(log)(F(hypot)(self_real, self_imaginary)); + mrb_float arg_self = F(atan2)(self_imaginary, self_real); + + mrb_float a = x * log_abs_self - y * arg_self; + mrb_float b = x * arg_self + y * log_abs_self; + + mrb_float exp_a = F(exp)(a); + return mrb_complex_new(mrb, exp_a * F(cos)(b), exp_a * F(sin)(b)); + } + else { + mrb_float other_float = mrb_as_float(mrb, other); + + mrb_float abs_self = F(hypot)(self_real, self_imaginary); + mrb_float arg_self = F(atan2)(self_imaginary, self_real); + + mrb_float pow_abs_self = F(pow)(abs_self, other_float); + mrb_float new_arg = arg_self * other_float; + + return mrb_complex_new(mrb, pow_abs_self * F(cos)(new_arg), pow_abs_self * F(sin)(new_arg)); + } +} + +/* ---------------------------*/ +static const mrb_mt_entry complex_rom_entries[] = { + MRB_MT_ENTRY(complex_real, MRB_SYM(real), MRB_ARGS_NONE()), + MRB_MT_ENTRY(complex_imaginary, MRB_SYM(imaginary), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_complex_to_f, MRB_SYM(to_f), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_complex_to_i, MRB_SYM(to_i), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_obj_itself, MRB_SYM(to_c), MRB_ARGS_NONE()), + MRB_MT_ENTRY(complex_add, MRB_OPSYM(add), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(complex_sub, MRB_OPSYM(sub), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(complex_mul, MRB_OPSYM(mul), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(complex_div, MRB_OPSYM(div), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(complex_div, MRB_SYM(quo), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(complex_eq, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(complex_hash, MRB_SYM(hash), MRB_ARGS_NONE()), + MRB_MT_ENTRY(complex_pow, MRB_OPSYM(pow), MRB_ARGS_REQ(1)), +}; + void mrb_mruby_complex_gem_init(mrb_state *mrb) { struct RClass *comp; comp = mrb_define_class_id(mrb, MRB_SYM(Complex), mrb_class_get_id(mrb, MRB_SYM(Numeric))); MRB_SET_INSTANCE_TT(comp, MRB_TT_COMPLEX); + MRB_UNDEF_ALLOCATOR(comp); + + mrb_undef_class_method_id(mrb, comp, MRB_SYM(new)); + mrb_define_class_method_id(mrb, comp, MRB_SYM(rectangular), complex_s_rect, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); + mrb_define_class_method_id(mrb, comp, MRB_SYM(rect), complex_s_rect, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - mrb_undef_class_method(mrb, comp, "new"); - mrb_define_class_method(mrb, comp, "rectangular", complex_s_rect, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - mrb_define_class_method(mrb, comp, "rect", complex_s_rect, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - mrb_define_method(mrb, mrb->kernel_module, "Complex", complex_s_rect, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - mrb_define_method(mrb, comp, "real", complex_real, MRB_ARGS_NONE()); - mrb_define_method(mrb, comp, "imaginary", complex_imaginary, MRB_ARGS_NONE()); - mrb_define_method(mrb, comp, "to_f", mrb_complex_to_f, MRB_ARGS_NONE()); - mrb_define_method(mrb, comp, "to_i", mrb_complex_to_i, MRB_ARGS_NONE()); - mrb_define_method(mrb, comp, "to_c", complex_to_c, MRB_ARGS_NONE()); - mrb_define_method(mrb, comp, "+", complex_add, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, comp, "-", complex_sub, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, comp, "*", complex_mul, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, comp, "/", complex_div, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, comp, "quo", complex_div, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, comp, "==", complex_eq, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, comp, "hash", complex_hash, MRB_ARGS_NONE()); + MRB_MT_INIT_ROM(mrb, comp, complex_rom_entries); + mrb_define_method_id(mrb, mrb->nil_class, MRB_SYM(to_c), nil_to_c, MRB_ARGS_NONE()); + mrb_define_private_method_id(mrb, mrb->kernel_module, MRB_SYM(Complex), complex_s_rect, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); } void diff --git a/mrbgems/mruby-complex/test/complex.rb b/mrbgems/mruby-complex/test/complex.rb index 85ee6e9988..7478593547 100644 --- a/mrbgems/mruby-complex/test/complex.rb +++ b/mrbgems/mruby-complex/test/complex.rb @@ -76,6 +76,8 @@ def assert_complex(real, exp) one = 1e200 end assert_complex Complex(ten, ten) / Complex(one, one), Complex(10.0, 0.0) + assert_raise(ZeroDivisionError) { Complex(1,1) / 0 } + assert_raise(ZeroDivisionError) { Complex(1,1) / Complex(0,0) } end assert 'Complex#==' do @@ -161,3 +163,12 @@ def assert_complex(real, exp) assert_predicate(Complex(2,3), :frozen?) assert_predicate(4+5i, :frozen?) end + +assert 'Complex#**' do + assert_complex Complex(2, 3) ** 2, Complex(-5, 12) + assert_complex Complex(2, 3) ** 0, Complex(1, 0) + assert_complex Complex(2, 3) ** 1, Complex(2, 3) + assert_complex Complex(2, 3) ** Complex(1, 0), Complex(2, 3) + assert_complex Complex(0, 1) ** 2, Complex(-1, 0) + assert_complex Complex(0, 1) ** Complex(0, 1), Complex(Math::E ** (-Math::PI / 2), 0) +end diff --git a/mrbgems/mruby-data/README.md b/mrbgems/mruby-data/README.md new file mode 100644 index 0000000000..f835f31e4a --- /dev/null +++ b/mrbgems/mruby-data/README.md @@ -0,0 +1,126 @@ +# mruby-data + +The `mruby-data` mrbgem provides a way to define simple classes in mruby that act as data structures, similar to Ruby's `Struct` class. It allows you to bundle a number of attributes together and access them using accessor methods. + +## Purpose + +`mruby-data` is useful when you need to create lightweight objects that primarily serve to hold and provide access to a set of data attributes without the need to write a full class definition manually. + +## Functionality + +### Defining a Data Class + +You can define a new data class using the `Data.define` class method. It accepts one or more symbols as arguments, which will become the attribute names for the instances of the generated class. + +```ruby +# Defines a new class 'Point' with attributes :x and :y +Point = Data.define(:x, :y) + +# Defines a new class 'Customer' with attributes :name, :address, and :zip +Customer = Data.define(:name, :address, :zip) +``` + +### Creating Instances + +Once a data class is defined, you can create instances of it using the `new` method, providing values for each attribute in the order they were defined. + +```ruby +# Create an instance of Point +point1 = Point.new(10, 20) + +# Create an instance of Customer +customer1 = Customer.new("John Doe", "123 Main St", 12345) +``` + +Alternatively, you can pass keyword arguments (available if mruby is compiled with `MRB_KW_ARGS`): + +```ruby +customer2 = Customer.new(name: "Jane Doe", address: "456 Oak Ave", zip: 67890) +``` + +### Accessing Attributes + +Instances of data classes have accessor methods for each defined attribute. + +```ruby +puts point1.x # Output: 10 +puts point1.y # Output: 20 + +puts customer1.name # Output: "John Doe" +``` + +### Instance Methods + +Instances of classes created by `Data.define` have several useful methods: + +- **`members`**: Returns an array of symbols representing the names of the attributes. + + ```ruby + p customer1.members # Output: [:name, :address, :zip] + ``` + +- **`== (other)`**: Returns `true` if `other` is an instance of the same data class and all attribute values are equal. + + ```ruby + point2 = Point.new(10, 20) + point3 = Point.new(0, 0) + + puts point1 == point2 # Output: true + puts point1 == point3 # Output: false + ``` + +- **`eql?(other)`**: Similar to `==`, checks if `other` is an instance of the same data class and all attribute values are `eql?`. + +- **`to_h`**: Converts the data instance into a hash where keys are the attribute symbols and values are their corresponding values. + + ```ruby + p customer1.to_h + # Output: {:name=>"John Doe", :address=>"123 Main St", :zip=>12345} + ``` + +- **`to_s` / `inspect`**: Returns a string representation of the data instance. + + ```ruby + puts customer1 # Output: # + p customer1 # Output: # + ``` + +## Freezing + +By default, instances of data classes are frozen after initialization, meaning their attributes cannot be modified after creation. + +```ruby +point = Point.new(1, 2) +# The following would raise an error as the object is frozen: +# point.x = 100 +``` + +## Example Usage + +```ruby +# Define a class for 2D vectors +Vector2D = Data.define(:x, :y) + +# Create some vector instances +v1 = Vector2D.new(3, 4) +v2 = Vector2D.new(1, 5) + +# Access attributes +puts "Vector v1: (#{v1.x}, #{v1.y})" +puts "Vector v2: (#{v2.x}, #{v2.y})" + +# Use instance methods +puts "v1 members: #{v1.members}" +puts "v1 as hash: #{v1.to_h}" + +# Comparison +v3 = Vector2D.new(3, 4) +puts "v1 == v2: #{v1 == v2}" # false +puts "v1 == v3: #{v1 == v3}" # true +``` + +This mrbgem simplifies the creation of simple value objects in mruby. + +``` + +``` diff --git a/mrbgems/mruby-data/mrbgem.rake b/mrbgems/mruby-data/mrbgem.rake new file mode 100644 index 0000000000..ea8979202e --- /dev/null +++ b/mrbgems/mruby-data/mrbgem.rake @@ -0,0 +1,5 @@ +MRuby::Gem::Specification.new('mruby-data') do |spec| + spec.license = 'MIT' + spec.author = 'mruby developers' + spec.summary = 'standard Data class' +end diff --git a/mrbgems/mruby-data/src/data.c b/mrbgems/mruby-data/src/data.c new file mode 100644 index 0000000000..49fa2d6d18 --- /dev/null +++ b/mrbgems/mruby-data/src/data.c @@ -0,0 +1,549 @@ +/* +** data.c - Data class +** +** See Copyright Notice in mruby.h +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RDATA_LEN(st) RARRAY_LEN(st) +#define RDATA_PTR(st) RARRAY_PTR(st) + +#define data_p(o) (mrb_type(o) == MRB_TT_STRUCT) + +static struct RClass * +data_class(mrb_state *mrb) +{ + return mrb_class_get_id(mrb, MRB_SYM(Data)); +} + +static void +data_corrupted(mrb_state *mrb) +{ + mrb_raise(mrb, E_TYPE_ERROR, "corrupted data"); +} + +static mrb_value +data_s_members(mrb_state *mrb, struct RClass *c) +{ + struct RClass* sclass = data_class(mrb); + + for (;;) { + mrb_value mem = mrb_iv_get(mrb, mrb_obj_value(c), MRB_SYM(__members__)); + + if (!mrb_nil_p(mem)) { + if (!mrb_array_p(mem)) { + data_corrupted(mrb); + } + return mem; + } + c = c->super; + if (c == sclass || c == 0) { + mrb_raise(mrb, E_TYPE_ERROR, "uninitialized data"); + } + } +} + +static mrb_value +data_members(mrb_state *mrb, mrb_value obj) +{ + if (!data_p(obj) || RDATA_LEN(obj) == 0) { + data_corrupted(mrb); + } + + mrb_value members = data_s_members(mrb, mrb_obj_class(mrb, obj)); + if (RDATA_LEN(obj) != RARRAY_LEN(members)) { + mrb_raisef(mrb, E_TYPE_ERROR, + "data size differs (%i required %i given)", + RARRAY_LEN(members), RDATA_LEN(obj)); + } + return members; +} + +/* + * call-seq: + * DataClass.members -> array + * + * Returns an array of symbols representing the names of the data + * structure members for this class. + * + * Customer = Data.define(:name, :address, :zip) + * Customer.members #=> [:name, :address, :zip] + */ +static mrb_value +mrb_data_s_members(mrb_state *mrb, mrb_value klass) +{ + mrb_value members = data_s_members(mrb, mrb_class_ptr(klass)); + return mrb_ary_dup(mrb, members); +} + +/* + * call-seq: + * data.members -> array + * + * Returns an array of strings representing the names of the instance + * variables. + * + * Customer = Data.define(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joe.members #=> [:name, :address, :zip] + */ + +static mrb_value +mrb_data_members(mrb_state *mrb, mrb_value obj) +{ + return mrb_data_s_members(mrb, mrb_obj_value(mrb_obj_class(mrb, obj))); +} + +static mrb_value +data_ref(mrb_state *mrb, mrb_value obj, mrb_int i) +{ + mrb_int len = RDATA_LEN(obj); + mrb_value *ptr = RDATA_PTR(obj); + + if (!ptr || len <= i) + return mrb_nil_value(); + return ptr[i]; +} + +static mrb_value +mrb_data_ref(mrb_state *mrb, mrb_value obj) +{ + mrb_int argc = mrb_get_argc(mrb); + if (argc != 0) { + mrb_argnum_error(mrb, argc, 0, 0); + } + mrb_int i = mrb_integer(mrb_proc_cfunc_env_get(mrb, 0)); + return data_ref(mrb, obj, i); +} + +static mrb_value +data_ref_0(mrb_state *mrb, mrb_value obj) +{ + return data_ref(mrb, obj, 0); +} + +static mrb_value +data_ref_1(mrb_state *mrb, mrb_value obj) +{ + return data_ref(mrb, obj, 1); +} + +static mrb_value +data_ref_2(mrb_state *mrb, mrb_value obj) +{ + return data_ref(mrb, obj, 2); +} + +static mrb_value +data_ref_3(mrb_state *mrb, mrb_value obj) +{ + return data_ref(mrb, obj, 3); +} + +#define DATA_DIRECT_REF_MAX 4 + +static mrb_func_t aref[DATA_DIRECT_REF_MAX] = { + data_ref_0, + data_ref_1, + data_ref_2, + data_ref_3, +}; + +static void +make_data_define_accessors(mrb_state *mrb, mrb_value members, struct RClass *c) +{ + const mrb_value *ptr_members = RARRAY_PTR(members); + mrb_int len = RARRAY_LEN(members); + int ai = mrb_gc_arena_save(mrb); + + for (mrb_int i=0; i data_instance + * DataClass.new(**kwargs) -> data_instance + * + * Creates a new instance of the data structure. Arguments can be provided + * either positionally (in the order the members were defined) or as + * keyword arguments using member names. + * + * Customer = Data.define(:name, :address, :zip) + * Customer.new("Joe", "123 Main St", 12345) + * Customer.new(name: "Joe", address: "123 Main St", zip: 12345) + */ +static mrb_value +mrb_data_new(mrb_state *mrb, mrb_value self) +{ + struct RClass *c = mrb_class_ptr(self); + mrb_value members = data_s_members(mrb, c); + mrb_value *vals; + + mrb_int n = RARRAY_LEN(members); + mrb_value *mems = RARRAY_PTR(members); + if (mrb->c->ci->nk > 0) { + mrb_value tmp = mrb_str_new(mrb, NULL, sizeof(mrb_sym)*n); + mrb_sym *knames = (mrb_sym*)RSTRING_PTR(tmp); + mrb_value m = mrb_ary_new_capa(mrb, n); + vals = RARRAY_PTR(m); + for (mrb_int i=0; ibasic.c->super = c->c; */ + make_data_define_accessors(mrb, members, c); + + mrb_value data = mrb_obj_value(c); + mrb_iv_set(mrb, data, MRB_SYM(__members__), members); + return data; +} + +/* + * call-seq: + * DataClass.define(arg, ...) -> obj + * + * `Data::define` returns a new `Class` object, + * which can then be used to create specific instances of the new + * data structure. The number of actual parameters must be + * equal to the number of attributes defined for this class. + * Passing too many or too less parameters will raise an + * `ArgumentError`. + * + * The remaining methods listed in this section (class and instance) + * are defined for this generated class. + * + * # Create a structure named by its constant + * Customer = Data.define(:name, :address) #=> Customer + * Customer.new("Dave", "123 Main") #=> # + */ +static mrb_value +mrb_data_s_def(mrb_state *mrb, mrb_value klass) +{ + const mrb_value *argv; + mrb_int argc; + mrb_value b; + + mrb_get_args(mrb, "*&", &argv, &argc, &b); + mrb_value rest = mrb_ary_new_from_values(mrb, argc, argv); + for (mrb_int i=0; i data + * + * Initializes the data structure with values from a hash. + * This is typically called internally when creating data instances + * with custom initialize methods. + * + * # Usually called internally, not directly by user code + */ +static mrb_value +mrb_data_initialize(mrb_state *mrb, mrb_value self) +{ + mrb_value members = data_members(mrb, self); + + mrb_int n = RARRAY_LEN(members); + mrb_value hash; + mrb_get_args(mrb, "H", &hash); + if (mrb_hash_size(mrb, hash) != n) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments"); + } + mrb_ary_resize(mrb, self, n); + + mrb_value *mems = RARRAY_PTR(members); + for (mrb_int i = 0; i < n; i++) { + if (!mrb_hash_key_p(mrb, hash, mems[i])) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "undefined data member %v", mems[i]); + } + mrb_ary_set(mrb, self, i, mrb_hash_get(mrb, hash, mems[i])); + } + mrb_obj_freeze(mrb, self); + return self; +} + +/* + * call-seq: + * data.initialize_copy(other_data) -> data + * + * Initializes this data structure as a copy of another data structure. + * This is a private method used internally by dup and clone. + * + * customer = Customer.new("Joe", "123 Main St", 12345) + * copy = customer.dup # calls initialize_copy internally + */ +static mrb_value +mrb_data_init_copy(mrb_state *mrb, mrb_value copy) +{ + mrb_value s = mrb_get_arg1(mrb); + + if (mrb_obj_equal(mrb, copy, s)) return copy; + if (!mrb_obj_is_instance_of(mrb, s, mrb_obj_class(mrb, copy))) { + mrb_raise(mrb, E_TYPE_ERROR, "wrong argument class"); + } + if (!data_p(s)) { + data_corrupted(mrb); + } + mrb_ary_replace(mrb, copy, s); + mrb_obj_freeze(mrb, copy); + return copy; +} + +/* + * call-seq: + * data == other_data -> true or false + * + * Equality---Returns `true` if *other_data* is + * equal to this one: they must be of the same class as generated by + * `Data::define`, and all values of must be equal + * (according to `Object#==`). + * + * Customer = Data.define(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joe2 = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * jane = Customer.new("Jane Doe", "456 Elm, Anytown NC", 12345) + * joe == joe2 #=> true + * joe == jane #=> false + */ + +static mrb_value +mrb_data_equal(mrb_state *mrb, mrb_value s) +{ + mrb_value s2 = mrb_get_arg1(mrb); + + if (mrb_obj_equal(mrb, s, s2)) { + return mrb_true_value(); + } + if (mrb_obj_class(mrb, s) != mrb_obj_class(mrb, s2)) { + return mrb_false_value(); + } + if (RDATA_LEN(s) != RDATA_LEN(s2)) { + return mrb_false_value(); + } + mrb_value *ptr = RDATA_PTR(s); + mrb_value *ptr2 = RDATA_PTR(s2); + mrb_int len = RDATA_LEN(s); + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i=0; i true or false + * + * Two structures are equal if they are the same object, or if all their + * fields are equal (using `Object#eql?`). + */ +static mrb_value +mrb_data_eql(mrb_state *mrb, mrb_value s) +{ + mrb_value s2 = mrb_get_arg1(mrb); + + if (mrb_obj_equal(mrb, s, s2)) { + return mrb_true_value(); + } + if (mrb_obj_class(mrb, s) != mrb_obj_class(mrb, s2)) { + return mrb_false_value(); + } + if (RDATA_LEN(s) != RDATA_LEN(s2)) { + return mrb_false_value(); + } + mrb_value *ptr = RDATA_PTR(s); + mrb_value *ptr2 = RDATA_PTR(s2); + mrb_int len = RDATA_LEN(s); + for (mrb_int i=0; i hash + * + * Create a hash from member names and values. + */ +static mrb_value +mrb_data_to_h(mrb_state *mrb, mrb_value self) +{ + mrb_value members = data_members(mrb, self); + mrb_value *mems = RARRAY_PTR(members); + + mrb_value ret = mrb_hash_new_capa(mrb, RARRAY_LEN(members)); + mrb_int len = RARRAY_LEN(members); + for (mrb_int i=0; i string + * data.inspect -> string + * + * Returns a string representation of Data + */ +static mrb_value +mrb_data_to_s(mrb_state *mrb, mrb_value self) +{ + mrb_value members = data_members(mrb, self); + mrb_int mlen = RARRAY_LEN(members); + mrb_value *mems = RARRAY_PTR(members); + mrb_value ret = mrb_str_new_lit(mrb, "#0) mrb_str_cat_lit(mrb, ret, ", "); + mrb_str_cat(mrb, ret, name, len); + mrb_str_cat_lit(mrb, ret, "="); + mrb_str_cat_str(mrb, ret, mrb_inspect(mrb, RARRAY_PTR(self)[i])); + mrb_gc_arena_restore(mrb, ai); + } + mrb_str_cat_lit(mrb, ret, ">"); + + return ret; +} + +/* + * A `Data` is a convenient way to bundle a number of + * attributes together, using accessor methods, without having to write + * an explicit class. + * + * The `Data` class is a generator of specific classes, + * each one of which is defined to hold a set of variables and their + * accessors. In these examples, we'll call the generated class + * "*Customer*Class," and we'll show an example instance of that + * class as "*Customer*Inst." + * + * In the descriptions that follow, the parameter *symbol* refers + * to a symbol (such as `:name`). + */ +void +mrb_mruby_data_gem_init(mrb_state* mrb) +{ + struct RClass *d = mrb_define_class_id(mrb, MRB_SYM(Data), mrb->object_class); + MRB_SET_INSTANCE_TT(d, MRB_TT_STRUCT); + MRB_UNDEF_ALLOCATOR(d); + + mrb_undef_class_method_id(mrb, d, MRB_SYM(new)); + mrb_define_class_method_id(mrb, d, MRB_SYM(define), mrb_data_s_def, MRB_ARGS_ANY()); + + mrb_define_method_id(mrb, d, MRB_OPSYM(eq), mrb_data_equal, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, d, MRB_SYM(members), mrb_data_members, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, d, MRB_SYM(initialize), mrb_data_initialize, MRB_ARGS_ANY()); + mrb_define_private_method_id(mrb, d, MRB_SYM(initialize_copy), mrb_data_init_copy, MRB_ARGS_ANY()); + mrb_define_method_id(mrb, d, MRB_SYM_Q(eql), mrb_data_eql, MRB_ARGS_REQ(1)); + + mrb_define_method_id(mrb, d, MRB_SYM(to_h), mrb_data_to_h, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, d, MRB_SYM(to_s), mrb_data_to_s, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, d, MRB_SYM(inspect), mrb_data_to_s, MRB_ARGS_NONE()); +} + +void +mrb_mruby_data_gem_final(mrb_state* mrb) +{ +} diff --git a/mrbgems/mruby-data/test/data.rb b/mrbgems/mruby-data/test/data.rb new file mode 100644 index 0000000000..e54be06c99 --- /dev/null +++ b/mrbgems/mruby-data/test/data.rb @@ -0,0 +1,76 @@ +## +# Struct ISO Test + +assert('Data') do + assert_equal Class, Data.class +end + +assert('Data.define') do + c = Data.define(:m1, :m2) + assert_equal Data, c.superclass + assert_equal [:m1, :m2], c.members +end + +assert('Data#==') do + c = Data.define(:m1, :m2) + cc1 = c.new(1,2) + cc2 = c.new(1,2) + assert_true cc1 == cc2 +end + +assert('Data#members') do + c = Data.define(:m1, :m2) + assert_equal [:m1, :m2], c.new(1,2).members +end + +assert('wrong struct arg count') do + c = Data.define(:m1) + assert_raise ArgumentError do + cc = c.new(1,2,3) + end +end + +assert('data dup') do + c = Data.define(:m1, :m2, :m3, :m4, :m5) + cc = c.new(1,2,3,4,5) + assert_nothing_raised { + assert_equal(cc, cc.dup) + } +end + +assert('Data inspect') do + c = Data.define(:m1, :m2, :m3, :m4, :m5) + cc = c.new(1,2,3,4,5) + assert_equal "#", cc.inspect +end + +assert('Data#to_h') do + s = Data.define(:white, :red, :green).new('ruuko', 'yuzuki', 'hitoe') + assert_equal({:white => 'ruuko', :red => 'yuzuki', :green => 'hitoe'}) { s.to_h } +end + +assert("Data.define does not allow array") do + assert_raise(TypeError) do + Data.define("Test", [:a]) + end +end + +assert("Data.define generates subclass of Data") do + begin + original_struct = Data + Data = String + assert_equal original_struct, original_struct.define(:foo).superclass + ensure + Data = original_struct + end +end + +assert 'Data#freeze' do + c = Data.define(:m) + + o = c.new(:test) + assert_equal :test, o.m + assert_nothing_raised { + o.freeze + } +end diff --git a/mrbgems/mruby-dir/README.md b/mrbgems/mruby-dir/README.md new file mode 100644 index 0000000000..7cd9fcc3a4 --- /dev/null +++ b/mrbgems/mruby-dir/README.md @@ -0,0 +1,53 @@ +# mruby-dir + +Dir class for mruby. Supported methods are: + +`.chdir` +`.delete` +`.entries` +`.exist?` +`.foreach` +`.getwd` +`.mkdir` +`.open` +`#close` +`#each` +`#read` +`#rewind` +`#seek` +`#tell` + +## License + +Copyright (c) 2012 Internet Initiative Japan Inc. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +### On Windows platforms, you must agree on additional license too: + +Copyright Kevlin Henney, 1997, 2003, 2012. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose is hereby granted without fee, provided +that this copyright and permissions notice appear in all copies and +derivatives. + +This software is supplied "as is" without express or implied warranty. + +But that said, if there are any problems please get in touch. diff --git a/mrbgems/mruby-dir/include/dir_hal.h b/mrbgems/mruby-dir/include/dir_hal.h new file mode 100644 index 0000000000..3f8414c9f4 --- /dev/null +++ b/mrbgems/mruby-dir/include/dir_hal.h @@ -0,0 +1,79 @@ +/* +** dir_hal.h - Directory HAL interface for mruby +** +** See Copyright Notice in mruby.h +** +** Hardware Abstraction Layer for directory operations. +** Provides platform-independent interface for filesystem directory operations. +*/ + +#ifndef MRUBY_DIR_HAL_H +#define MRUBY_DIR_HAL_H + +#include + +/* + * Platform-independent directory handle + * Each HAL implementation defines this structure internally + */ +typedef struct mrb_dir_handle mrb_dir_handle; + +/* + * Directory Operations + */ + +/* Open directory for reading. Returns handle or NULL on error (sets errno). */ +mrb_dir_handle* mrb_hal_dir_open(mrb_state *mrb, const char *path); + +/* Close directory handle. Returns 0 on success, -1 on error. */ +int mrb_hal_dir_close(mrb_state *mrb, mrb_dir_handle *dir); + +/* Read next entry from directory. Returns name or NULL at end/error. */ +const char* mrb_hal_dir_read(mrb_state *mrb, mrb_dir_handle *dir); + +/* Rewind directory to beginning */ +void mrb_hal_dir_rewind(mrb_state *mrb, mrb_dir_handle *dir); + +/* + * Optional Operations (may not be available on all platforms) + */ + +/* Seek to position in directory. Returns -1 if unsupported (sets errno to ENOSYS). */ +int mrb_hal_dir_seek(mrb_state *mrb, mrb_dir_handle *dir, long pos); + +/* Get current position in directory. Returns -1 if unsupported (sets errno to ENOSYS). */ +long mrb_hal_dir_tell(mrb_state *mrb, mrb_dir_handle *dir); + +/* + * Filesystem Operations + */ + +/* Create directory with mode (mode may be ignored on some platforms). Returns 0 on success, -1 on error. */ +int mrb_hal_dir_mkdir(mrb_state *mrb, const char *path, int mode); + +/* Remove empty directory. Returns 0 on success, -1 on error. */ +int mrb_hal_dir_rmdir(mrb_state *mrb, const char *path); + +/* Change current working directory. Returns 0 on success, -1 on error. */ +int mrb_hal_dir_chdir(mrb_state *mrb, const char *path); + +/* Get current working directory. Returns 0 on success, -1 on error. */ +int mrb_hal_dir_getcwd(mrb_state *mrb, char *buf, size_t size); + +/* Change root directory (privileged operation). Returns -1 if unsupported (sets errno to ENOSYS). */ +int mrb_hal_dir_chroot(mrb_state *mrb, const char *path); + +/* Check if path is a directory. Returns 1 if directory, 0 if not. */ +int mrb_hal_dir_is_directory(mrb_state *mrb, const char *path); + +/* + * HAL Initialization/Finalization + */ + +/* Initialize HAL (called once at gem initialization) */ +void mrb_hal_dir_init(mrb_state *mrb); + +/* Cleanup HAL (called once at gem finalization) */ +void mrb_hal_dir_final(mrb_state *mrb); + +#endif /* MRUBY_DIR_HAL_H */ diff --git a/mrbgems/mruby-dir/mrbgem.rake b/mrbgems/mruby-dir/mrbgem.rake new file mode 100644 index 0000000000..ed05e14d1e --- /dev/null +++ b/mrbgems/mruby-dir/mrbgem.rake @@ -0,0 +1,4 @@ +MRuby::Gem::Specification.new('mruby-dir') do |spec| + spec.license = 'MIT and MIT-like license' + spec.authors = ['Internet Initiative Japan Inc.', 'Kevlin Henney'] +end diff --git a/mrbgems/mruby-dir/mrblib/dir.rb b/mrbgems/mruby-dir/mrblib/dir.rb new file mode 100644 index 0000000000..71dd929f13 --- /dev/null +++ b/mrbgems/mruby-dir/mrblib/dir.rb @@ -0,0 +1,124 @@ +class Dir + include Enumerable + + # + # call-seq: + # dir.each { |filename| block } -> dir + # dir.each -> enumerator + # + # Calls the block once for each entry in this directory, passing the filename + # of each entry as a parameter to the block. + # + # d = Dir.new("testdir") + # d.each { |x| puts "Got #{x}" } + # + def each(&block) + return to_enum(:each) unless block + while s = self.read + block.call(s) + end + self + end + + # + # call-seq: + # dir.each_child { |filename| block } -> dir + # dir.each_child -> enumerator + # + # Calls the block once for each entry in this directory except for "." and "..", + # passing the filename of each entry as a parameter to the block. + # + # d = Dir.new("testdir") + # d.each_child { |x| puts "Got #{x}" } + # + def each_child(&block) + return to_enum(:each_child) unless block + while s = self.read + block.call(s) unless s == "." || s == ".." + end + self + end + + alias pos tell + alias pos= seek + + class << self + + + # + # call-seq: + # Dir.foreach(dirname) { |filename| block } -> nil + # Dir.foreach(dirname) -> enumerator + # + # Calls the block once for each entry in the named directory, passing + # the filename of each entry as a parameter to the block. + # + # Dir.foreach("testdir") { |x| puts "Got #{x}" } + # + def foreach(path, &block) + return to_enum(:foreach, path) unless block + self.open(path) do |d| + d.each(&block) + end + end + + # + # call-seq: + # Dir.open(string) -> aDir + # Dir.open(string) { |aDir| block } -> obj + # + # With no block, open is a synonym for Dir.new. If a block is present, it is + # passed aDir as a parameter. The directory is closed at the end of the block, + # and Dir.open returns the value of the block. + # + # Dir.open("testdir") { |d| d.each { |x| puts "Got #{x}" } } + # + def open(path, &block) + if block + d = self.new(path) + begin + block.call(d) + ensure + begin + d.close + rescue IOError + end + end + else + self.new(path) + end + end + + # + # call-seq: + # Dir.chdir(string) -> 0 + # Dir.chdir(string) { |path| block } -> obj + # + # Changes the current working directory of the calling process to the given + # string. When called with a block, changes to the directory, executes the + # block, then changes back to the original directory. Returns the value of + # the block. + # + # Dir.chdir("/var/spool/mail") + # Dir.chdir("/tmp") { Dir.pwd } #=> "/tmp" + # + def chdir(path, &block) + if block + wd = self.getwd + begin + self._chdir(path) + block.call(path) + ensure + self._chdir(wd) + end + else + self._chdir(path) + end + end + + alias exists? exist? + alias pwd getwd + alias rmdir delete + alias unlink delete + end +end diff --git a/mrbgems/mruby-dir/ports/posix/dir_hal.c b/mrbgems/mruby-dir/ports/posix/dir_hal.c new file mode 100644 index 0000000000..898cef32a7 --- /dev/null +++ b/mrbgems/mruby-dir/ports/posix/dir_hal.c @@ -0,0 +1,175 @@ +/* +** dir_hal.c - POSIX HAL implementation for mruby-dir +** +** See Copyright Notice in mruby.h +** +** POSIX implementation for directory operations using standard POSIX APIs. +** Supported platforms: Linux, macOS, BSD, Unix +*/ + +#include +#include "dir_hal.h" + +#include +#include +#include +#include +#include + +#ifdef HAVE_SYS_PARAM_H +#include +#endif + +/* On POSIX, mrb_dir_handle wraps DIR */ +struct mrb_dir_handle { + DIR *dir; +}; + +/* + * Directory Operations + */ + +mrb_dir_handle* +mrb_hal_dir_open(mrb_state *mrb, const char *path) +{ + DIR *dir = opendir(path); + if (dir == NULL) { + return NULL; + } + + mrb_dir_handle *handle = (mrb_dir_handle*)mrb_malloc(mrb, sizeof(mrb_dir_handle)); + handle->dir = dir; + return handle; +} + +int +mrb_hal_dir_close(mrb_state *mrb, mrb_dir_handle *handle) +{ + int result = closedir(handle->dir); + mrb_free(mrb, handle); + return result; +} + +const char* +mrb_hal_dir_read(mrb_state *mrb, mrb_dir_handle *handle) +{ + (void)mrb; + struct dirent *dp = readdir(handle->dir); + return dp ? dp->d_name : NULL; +} + +void +mrb_hal_dir_rewind(mrb_state *mrb, mrb_dir_handle *handle) +{ + (void)mrb; + rewinddir(handle->dir); +} + +/* + * Optional Operations + */ + +int +mrb_hal_dir_seek(mrb_state *mrb, mrb_dir_handle *handle, long pos) +{ +#if defined(__ANDROID__) + /* Android doesn't have reliable seekdir */ + (void)mrb; (void)handle; (void)pos; + errno = ENOSYS; + return -1; +#else + (void)mrb; + seekdir(handle->dir, pos); + return 0; +#endif +} + +long +mrb_hal_dir_tell(mrb_state *mrb, mrb_dir_handle *handle) +{ +#if defined(__ANDROID__) + /* Android doesn't have reliable telldir */ + (void)mrb; (void)handle; + errno = ENOSYS; + return -1; +#else + (void)mrb; + return telldir(handle->dir); +#endif +} + +/* + * Filesystem Operations + */ + +int +mrb_hal_dir_mkdir(mrb_state *mrb, const char *path, int mode) +{ + (void)mrb; + return mkdir(path, (mode_t)mode); +} + +int +mrb_hal_dir_rmdir(mrb_state *mrb, const char *path) +{ + (void)mrb; + return rmdir(path); +} + +int +mrb_hal_dir_chdir(mrb_state *mrb, const char *path) +{ + (void)mrb; + return chdir(path); +} + +int +mrb_hal_dir_getcwd(mrb_state *mrb, char *buf, size_t size) +{ + (void)mrb; + return getcwd(buf, size) ? 0 : -1; +} + +int +mrb_hal_dir_chroot(mrb_state *mrb, const char *path) +{ +#if defined(__ANDROID__) || defined(__MSDOS__) + /* Not available on these platforms */ + (void)mrb; (void)path; + errno = ENOSYS; + return -1; +#else + (void)mrb; + return chroot(path); +#endif +} + +int +mrb_hal_dir_is_directory(mrb_state *mrb, const char *path) +{ + struct stat sb; + (void)mrb; + + if (stat(path, &sb) == 0 && S_ISDIR(sb.st_mode)) { + return 1; + } + return 0; +} + +/* + * HAL Initialization/Finalization + */ + +void +mrb_hal_dir_init(mrb_state *mrb) +{ + (void)mrb; + /* No initialization needed for POSIX */ +} + +void +mrb_hal_dir_final(mrb_state *mrb) +{ + (void)mrb; + /* No cleanup needed for POSIX */ +} diff --git a/mrbgems/mruby-dir/ports/win/dir_hal.c b/mrbgems/mruby-dir/ports/win/dir_hal.c new file mode 100644 index 0000000000..8bc3b1be26 --- /dev/null +++ b/mrbgems/mruby-dir/ports/win/dir_hal.c @@ -0,0 +1,216 @@ +/* +** dir_hal.c - Windows HAL implementation for mruby-dir +** +** See Copyright Notice in mruby.h +** +** Windows implementation for directory operations using _findfirst/_findnext APIs. +** Provides POSIX-compatible interface on Windows. +** +** Based on dirent.c by Kevlin Henney (kevlin@acm.org, kevlin@curbralan.com) +** Original implementation: Created March 1997. Updated June 2003 and July 2012. +** See end of file for Kevlin Henney's copyright notice. +*/ + +#include +#include "dir_hal.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Windows directory handle implementation */ +struct mrb_dir_handle { + intptr_t handle; /* _findfirst/_findnext handle */ + struct _finddata_t info; /* Current entry info */ + char *pattern; /* Search pattern with wildcard */ + int first; /* Flag: haven't read first entry yet */ +}; + +/* + * Directory Operations + */ + +mrb_dir_handle* +mrb_hal_dir_open(mrb_state *mrb, const char *path) +{ + mrb_dir_handle *handle; + size_t len = strlen(path); + const char *suffix; + + /* Add wildcard suffix if needed */ + suffix = (len > 0 && (path[len-1] == '/' || path[len-1] == '\\')) ? "*" : "/*"; + + handle = (mrb_dir_handle*)mrb_malloc(mrb, sizeof(mrb_dir_handle)); + handle->pattern = (char*)mrb_malloc(mrb, len + strlen(suffix) + 1); + strcpy(handle->pattern, path); + strcat(handle->pattern, suffix); + + handle->handle = _findfirst(handle->pattern, &handle->info); + if (handle->handle == -1) { + mrb_free(mrb, handle->pattern); + mrb_free(mrb, handle); + return NULL; + } + + handle->first = 1; + return handle; +} + +int +mrb_hal_dir_close(mrb_state *mrb, mrb_dir_handle *handle) +{ + int result = -1; + + if (handle->handle != -1) { + result = _findclose(handle->handle); + } + + mrb_free(mrb, handle->pattern); + mrb_free(mrb, handle); + + if (result == -1) { + /* Map all errors to EBADF */ + errno = EBADF; + } + + return result; +} + +const char* +mrb_hal_dir_read(mrb_state *mrb, mrb_dir_handle *handle) +{ + (void)mrb; + + if (handle->handle == -1) { + errno = EBADF; + return NULL; + } + + /* First call returns the result from _findfirst */ + if (handle->first) { + handle->first = 0; + return handle->info.name; + } + + /* Subsequent calls use _findnext */ + if (_findnext(handle->handle, &handle->info) == -1) { + return NULL; + } + + return handle->info.name; +} + +void +mrb_hal_dir_rewind(mrb_state *mrb, mrb_dir_handle *handle) +{ + (void)mrb; + + if (handle->handle == -1) { + errno = EBADF; + return; + } + + /* Close and reopen to rewind */ + _findclose(handle->handle); + handle->handle = _findfirst(handle->pattern, &handle->info); + handle->first = 1; +} + +/* + * Optional Operations + */ + +int +mrb_hal_dir_seek(mrb_state *mrb, mrb_dir_handle *handle, long pos) +{ + /* Not supported on Windows */ + (void)mrb; (void)handle; (void)pos; + errno = ENOSYS; + return -1; +} + +long +mrb_hal_dir_tell(mrb_state *mrb, mrb_dir_handle *handle) +{ + /* Not supported on Windows */ + (void)mrb; (void)handle; + errno = ENOSYS; + return -1; +} + +/* + * Filesystem Operations + */ + +int +mrb_hal_dir_mkdir(mrb_state *mrb, const char *path, int mode) +{ + /* Windows _mkdir ignores mode parameter */ + (void)mrb; (void)mode; + return _mkdir(path); +} + +int +mrb_hal_dir_rmdir(mrb_state *mrb, const char *path) +{ + (void)mrb; + return _rmdir(path); +} + +int +mrb_hal_dir_chdir(mrb_state *mrb, const char *path) +{ + (void)mrb; + return _chdir(path); +} + +int +mrb_hal_dir_getcwd(mrb_state *mrb, char *buf, size_t size) +{ + (void)mrb; + return _getcwd(buf, (int)size) ? 0 : -1; +} + +int +mrb_hal_dir_chroot(mrb_state *mrb, const char *path) +{ + /* Not available on Windows */ + (void)mrb; (void)path; + errno = ENOSYS; + return -1; +} + +int +mrb_hal_dir_is_directory(mrb_state *mrb, const char *path) +{ + struct _stat sb; + (void)mrb; + + if (_stat(path, &sb) == 0 && (sb.st_mode & _S_IFDIR)) { + return 1; + } + return 0; +} + +/* + * HAL Initialization/Finalization + */ + +void +mrb_hal_dir_init(mrb_state *mrb) +{ + (void)mrb; + /* No initialization needed for Windows */ +} + +void +mrb_hal_dir_final(mrb_state *mrb) +{ + (void)mrb; + /* No cleanup needed for Windows */ +} diff --git a/mrbgems/mruby-dir/src/dir.c b/mrbgems/mruby-dir/src/dir.c new file mode 100644 index 0000000000..eb12eb593a --- /dev/null +++ b/mrbgems/mruby-dir/src/dir.c @@ -0,0 +1,492 @@ +/* +** dir.c - Dir +** +** See Copyright Notice in mruby.h +*/ + +#include +#include +#include +#include +#include +#include +#include "dir_hal.h" + +#include +#include + +#define E_IO_ERROR mrb_exc_get_id(mrb, MRB_SYM(IOError)) + +struct mrb_dir { + mrb_dir_handle *handle; +}; + +static void +mrb_dir_free(mrb_state *mrb, void *ptr) +{ + struct mrb_dir *mdir = (struct mrb_dir*)ptr; + + if (mdir->handle) { + mrb_hal_dir_close(mrb, mdir->handle); + mdir->handle = NULL; + } + mrb_free(mrb, mdir); +} + +static struct mrb_data_type mrb_dir_type = { "DIR", mrb_dir_free }; + +/* + * call-seq: + * dir.close -> nil + * + * Closes the directory stream. Any further attempts to access dir will + * raise an IOError. + * + * d = Dir.new("testdir") + * d.close #=> nil + */ +static mrb_value +mrb_dir_close(mrb_state *mrb, mrb_value self) +{ + struct mrb_dir *mdir; + mdir = (struct mrb_dir*)mrb_get_datatype(mrb, self, &mrb_dir_type); + if (!mdir) return mrb_nil_value(); + if (!mdir->handle) { + mrb_raise(mrb, E_IO_ERROR, "closed directory"); + } + if (mrb_hal_dir_close(mrb, mdir->handle) == -1) { + mrb_sys_fail(mrb, "closedir"); + } + mdir->handle = NULL; + return mrb_nil_value(); +} + +/* + * call-seq: + * Dir.new(string) -> aDir + * + * Returns a new directory object for the named directory. + * + * d = Dir.new("testdir") + */ +static mrb_value +mrb_dir_init(mrb_state *mrb, mrb_value self) +{ + mrb_dir_handle *handle; + struct mrb_dir *mdir; + const char *path; + + mdir = (struct mrb_dir*)DATA_PTR(self); + if (mdir) { + mrb_dir_free(mrb, mdir); + } + DATA_TYPE(self) = &mrb_dir_type; + DATA_PTR(self) = NULL; + + mdir = (struct mrb_dir*)mrb_malloc(mrb, sizeof(*mdir)); + mdir->handle = NULL; + DATA_PTR(self) = mdir; + + mrb_get_args(mrb, "z", &path); + if ((handle = mrb_hal_dir_open(mrb, path)) == NULL) { + mrb_sys_fail(mrb, path); + } + mdir->handle = handle; + return self; +} + +/* + * call-seq: + * Dir.delete(string) -> 0 + * + * Deletes the named directory. Raises a subclass of SystemCallError if the + * directory isn't empty. + * + * Dir.delete("testdir") + */ +static mrb_value +mrb_dir_delete(mrb_state *mrb, mrb_value klass) +{ + const char *path; + + mrb_get_args(mrb, "z", &path); + if (mrb_hal_dir_rmdir(mrb, path) == -1) { + mrb_sys_fail(mrb, path); + } + return mrb_fixnum_value(0); +} + +/* + * call-seq: + * Dir.exist?(file_name) -> true or false + * + * Returns true if the named file is a directory, false otherwise. + * + * Dir.exist?(".") #=> true + * Dir.exist?("foo") #=> false + */ +static mrb_value +mrb_dir_existp(mrb_state *mrb, mrb_value klass) +{ + const char *path; + + mrb_get_args(mrb, "z", &path); + if (mrb_hal_dir_is_directory(mrb, path)) { + return mrb_true_value(); + } + else { + return mrb_false_value(); + } +} + +/* + * call-seq: + * Dir.getwd -> string + * Dir.pwd -> string + * + * Returns the path to the current working directory of this process as a string. + * + * Dir.getwd #=> "/usr/local" + */ +static mrb_value +mrb_dir_getwd(mrb_state *mrb, mrb_value klass) +{ + mrb_int size = 64; + + mrb_value path = mrb_str_buf_new(mrb, size); + while (mrb_hal_dir_getcwd(mrb, RSTRING_PTR(path), (size_t)size) == -1) { + int e = errno; + if (e != ERANGE) { + mrb_sys_fail(mrb, "getcwd(2)"); + } + size *= 2; + mrb_str_resize(mrb, path, size); + } + mrb_str_resize(mrb, path, strlen(RSTRING_PTR(path))); + return path; +} + +/* + * call-seq: + * Dir.mkdir(string [, integer]) -> 0 + * + * Makes a new directory named by string, with permissions specified by the + * optional parameter integer. The permissions may be modified by the value + * of File.umask, and are ignored on NT. Raises a SystemCallError if the + * directory cannot be created. + * + * Dir.mkdir("testdir") #=> 0 + * Dir.mkdir("testdir", 0755) #=> 0 + */ +static mrb_value +mrb_dir_mkdir(mrb_state *mrb, mrb_value klass) +{ + mrb_int mode; + const char *path; + + mode = 0777; + mrb_get_args(mrb, "z|i", &path, &mode); + if (mrb_hal_dir_mkdir(mrb, path, (int)mode) == -1) { + mrb_sys_fail(mrb, path); + } + return mrb_fixnum_value(0); +} + +/* Helper for Dir.chdir - internal method to change directory */ +static mrb_value +mrb_dir_chdir(mrb_state *mrb, mrb_value klass) +{ + const char *path; + + mrb_get_args(mrb, "z", &path); + if (mrb_hal_dir_chdir(mrb, path) == -1) { + mrb_sys_fail(mrb, path); + } + return mrb_fixnum_value(0); +} + +/* + * call-seq: + * Dir.chroot(string) -> 0 + * + * Changes this process's idea of the file system root. Only a privileged + * process may make this call. Not available on all platforms. + * + * Dir.chroot("/production/secure/root") + */ +static mrb_value +mrb_dir_chroot(mrb_state *mrb, mrb_value self) +{ + const char *path; + int res; + + mrb_get_args(mrb, "z", &path); + res = mrb_hal_dir_chroot(mrb, path); + if (res == -1) { + if (errno == ENOSYS) { + mrb_raise(mrb, E_NOTIMP_ERROR, "chroot() unreliable on your system"); + } + mrb_sys_fail(mrb, path); + } + + return mrb_fixnum_value(res); +} + +static mrb_bool +skip_name_p(const char *name) +{ + if (name[0] != '.') return FALSE; + if (name[1] == '\0') return TRUE; + if (name[1] != '.') return FALSE; + if (name[2] == '\0') return TRUE; + return FALSE; +} + +/* + * call-seq: + * Dir.empty?(path_name) -> true or false + * + * Returns true if the named directory is empty, false otherwise. + * + * Dir.empty?(".") #=> false + * Dir.empty?("/tmp") #=> false + */ +static mrb_value +mrb_dir_empty(mrb_state *mrb, mrb_value self) +{ + mrb_dir_handle *handle; + const char *name; + const char *path; + mrb_value result = mrb_true_value(); + + mrb_get_args(mrb, "z", &path); + if ((handle = mrb_hal_dir_open(mrb, path)) == NULL) { + mrb_sys_fail(mrb, path); + } + while ((name = mrb_hal_dir_read(mrb, handle)) != NULL) { + if (!skip_name_p(name)) { + result = mrb_false_value(); + break; + } + } + mrb_hal_dir_close(mrb, handle); + return result; +} + +/* + * call-seq: + * dir.read -> string or nil + * + * Reads the next entry from dir and returns it as a string. Returns nil + * at the end of the stream. + * + * d = Dir.new("testdir") + * d.read #=> "." + * d.read #=> ".." + * d.read #=> "config.h" + */ +static mrb_value +mrb_dir_read(mrb_state *mrb, mrb_value self) +{ + struct mrb_dir *mdir; + const char *name; + + mdir = (struct mrb_dir*)mrb_get_datatype(mrb, self, &mrb_dir_type); + if (!mdir) return mrb_nil_value(); + if (!mdir->handle) { + mrb_raise(mrb, E_IO_ERROR, "closed directory"); + } + name = mrb_hal_dir_read(mrb, mdir->handle); + if (name != NULL) { + return mrb_str_new_cstr(mrb, name); + } + else { + return mrb_nil_value(); + } +} + +/* + * call-seq: + * dir.rewind -> dir + * + * Repositions dir to the beginning of the stream. + * + * d = Dir.new("testdir") + * d.read #=> "." + * d.rewind #=> # + * d.read #=> "." + */ +static mrb_value +mrb_dir_rewind(mrb_state *mrb, mrb_value self) +{ + struct mrb_dir *mdir; + + mdir = (struct mrb_dir*)mrb_get_datatype(mrb, self, &mrb_dir_type); + if (!mdir) return mrb_nil_value(); + if (!mdir->handle) { + mrb_raise(mrb, E_IO_ERROR, "closed directory"); + } + mrb_hal_dir_rewind(mrb, mdir->handle); + return self; +} + +/* + * call-seq: + * dir.seek(integer) -> dir + * + * Seeks to a particular location in dir. integer must be a value returned + * by Dir#tell. + * + * d = Dir.new("testdir") + * pos = d.tell #=> 0 + * d.read #=> "." + * d.seek(pos) #=> # + * d.read #=> "." + */ +static mrb_value +mrb_dir_seek(mrb_state *mrb, mrb_value self) +{ + struct mrb_dir *mdir; + mrb_int pos; + + mdir = (struct mrb_dir*)mrb_get_datatype(mrb, self, &mrb_dir_type); + if (!mdir) return mrb_nil_value(); + if (!mdir->handle) { + mrb_raise(mrb, E_IO_ERROR, "closed directory"); + } + mrb_get_args(mrb, "i", &pos); + if (mrb_hal_dir_seek(mrb, mdir->handle, (long)pos) == -1) { + if (errno == ENOSYS) { + mrb_raise(mrb, E_NOTIMP_ERROR, "dirseek() unreliable on your system"); + } + } + return self; +} + +/* + * call-seq: + * dir.tell -> integer + * dir.pos -> integer + * + * Returns the current position in dir. + * + * d = Dir.new("testdir") + * d.tell #=> 0 + * d.read #=> "." + * d.tell #=> 1 + */ +static mrb_value +mrb_dir_tell(mrb_state *mrb, mrb_value self) +{ + struct mrb_dir *mdir; + long pos; + + mdir = (struct mrb_dir*)mrb_get_datatype(mrb, self, &mrb_dir_type); + if (!mdir) return mrb_nil_value(); + if (!mdir->handle) { + mrb_raise(mrb, E_IO_ERROR, "closed directory"); + } + pos = mrb_hal_dir_tell(mrb, mdir->handle); + if (pos == -1) { + if (errno == ENOSYS) { + mrb_raise(mrb, E_NOTIMP_ERROR, "dirtell() unreliable on your system"); + } + } + return mrb_fixnum_value((mrb_int)pos); +} + +/* + * call-seq: + * Dir.entries(dirname) -> array + * + * Returns an array containing all of the filenames in the given directory. + * Will raise a SystemCallError if the named directory doesn't exist. + */ +static mrb_value +mrb_dir_entries(mrb_state *mrb, mrb_value klass) +{ + const char *path; + + mrb_get_args(mrb, "z", &path); + + mrb_dir_handle *handle = mrb_hal_dir_open(mrb, path); + if (handle == NULL) { + mrb_sys_fail(mrb, path); + } + + mrb_value ary = mrb_ary_new(mrb); + const char *name; + while ((name = mrb_hal_dir_read(mrb, handle)) != NULL) { + mrb_ary_push(mrb, ary, mrb_str_new_cstr(mrb, name)); + } + + mrb_hal_dir_close(mrb, handle); + return ary; +} + +/* + * call-seq: + * Dir.children(dirname) -> array + * + * Returns an array containing all of the filenames except for "." and ".." + * in the given directory. Will raise a SystemCallError if the named + * directory doesn't exist. + */ +static mrb_value +mrb_dir_children(mrb_state *mrb, mrb_value klass) +{ + const char *path; + + mrb_get_args(mrb, "z", &path); + + mrb_dir_handle *handle = mrb_hal_dir_open(mrb, path); + if (handle == NULL) { + mrb_sys_fail(mrb, path); + } + + mrb_value ary = mrb_ary_new(mrb); + const char *name; + while ((name = mrb_hal_dir_read(mrb, handle)) != NULL) { + if (!skip_name_p(name)) { + mrb_ary_push(mrb, ary, mrb_str_new_cstr(mrb, name)); + } + } + + mrb_hal_dir_close(mrb, handle); + return ary; +} + +void +mrb_mruby_dir_gem_init(mrb_state *mrb) +{ + struct RClass *d; + + mrb_hal_dir_init(mrb); + + d = mrb_define_class_id(mrb, MRB_SYM(Dir), mrb->object_class); + MRB_SET_INSTANCE_TT(d, MRB_TT_DATA); + mrb_define_class_method_id(mrb, d, MRB_SYM(delete), mrb_dir_delete, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, d, MRB_SYM_Q(exist), mrb_dir_existp, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, d, MRB_SYM(getwd), mrb_dir_getwd, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, d, MRB_SYM(mkdir), mrb_dir_mkdir, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); + mrb_define_class_method_id(mrb, d, MRB_SYM(_chdir), mrb_dir_chdir, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, d, MRB_SYM(chroot), mrb_dir_chroot, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, d, MRB_SYM_Q(empty), mrb_dir_empty, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, d, MRB_SYM(entries), mrb_dir_entries, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, d, MRB_SYM(children), mrb_dir_children, MRB_ARGS_REQ(1)); + + mrb_define_method_id(mrb, d, MRB_SYM(close), mrb_dir_close, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, d, MRB_SYM(initialize), mrb_dir_init, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, d, MRB_SYM(read), mrb_dir_read, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, d, MRB_SYM(rewind), mrb_dir_rewind, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, d, MRB_SYM(seek), mrb_dir_seek, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, d, MRB_SYM(tell), mrb_dir_tell, MRB_ARGS_NONE()); + + mrb_define_class_id(mrb, MRB_SYM(IOError), E_STANDARD_ERROR); +} + +void +mrb_mruby_dir_gem_final(mrb_state *mrb) +{ + mrb_hal_dir_final(mrb); +} diff --git a/mrbgems/mruby-dir/test/dir.rb b/mrbgems/mruby-dir/test/dir.rb new file mode 100644 index 0000000000..9ddc9661b4 --- /dev/null +++ b/mrbgems/mruby-dir/test/dir.rb @@ -0,0 +1,138 @@ +assert('Dir') do + assert_equal(Class, Dir.class) +end + +assert('DirTest.setup') do + assert_nothing_raised{DirTest.setup} +end + +assert('Dir.chdir') do + assert_equal 0, Dir.chdir(DirTest.sandbox) +end + +assert('Dir.entries') do + a = Dir.entries(DirTest.sandbox) + assert_true a.include?("a") + assert_true a.include?("b") +end + +assert('Dir.exist?') do + assert_true Dir.exist?(DirTest.sandbox) + assert_false Dir.exist?(DirTest.sandbox + "/nosuchdir") +end + +assert('Dir.foreach') do + a = [] + Dir.foreach(DirTest.sandbox) { |s| a << s } + assert_true a.include?("a") + assert_true a.include?("b") +end + +assert('Dir.getwd') do + s = Dir.getwd + assert_true s.kind_of? String +end + +assert('Dir.mkdir') do + m1 = DirTest.sandbox + "/mkdir1" + m2 = DirTest.sandbox + "/mkdir2" + assert_equal 0, Dir.mkdir(m1) + assert_equal 0, Dir.mkdir(m2, 0765) +end + +assert('Dir.delete') do + s = DirTest.sandbox + "/delete" + Dir.mkdir(s) + assert_true Dir.exist?(s) + + Dir.delete(s) + assert_false Dir.exist?(s) +end + +assert('Dir.open') do + a = [] + Dir.open(DirTest.sandbox) { |d| + d.each_child { |s| a << s } + } + assert_true a.include?("a") + assert_true a.include?("b") +end + +assert('Dir#initialize and Dir#close') do + d = Dir.new(".") + assert_true d.instance_of?(Dir) + assert_nil d.close +end + +assert('Dir#close') do + d = Dir.new(".") + assert_nothing_raised{d.close} +end + +assert('Dir#each_child') do + a = [] + d = Dir.open(DirTest.sandbox) + d.each_child { |s| a << s } + d.close + assert_true a.include?("a") + assert_true a.include?("b") +end + +assert('Dir#read') do + a = [] + d = Dir.open(DirTest.sandbox) + while s = d.read + a << s + end + d.close + assert_true a.include?("a") + assert_true a.include?("b") +end + +assert('Dir#rewind') do + d = Dir.open(DirTest.sandbox) + while d.read; end + + assert_equal d, d.rewind + + a = [] + while s = d.read + a << s + end + d.close + assert_true a.include? "a" + assert_true a.include? "b" +end + +# Note: behaviors of seekdir(3) and telldir(3) are so platform-dependent +# that we cannot write portable tests here. + +assert('Dir#tell') do + n = nil + begin + Dir.open(DirTest.sandbox) { |d| + n = d.tell + } + assert_true n.is_a? Integer + rescue NotImplementedError => e + skip e.message + end +end + +assert('Dir#seek') do + d1 = Dir.open(DirTest.sandbox) + d1.read + begin + n = d1.tell + d1.read + d2 = d1.seek(n) + assert_equal d1, d2 + rescue NotImplementedError => e + skip e.message + end +end + +assert('DirTest.teardown') do + GC.start + assert_nothing_raised{DirTest.teardown} +end diff --git a/mrbgems/mruby-dir/test/dirtest.c b/mrbgems/mruby-dir/test/dirtest.c new file mode 100644 index 0000000000..e7e8564ff8 --- /dev/null +++ b/mrbgems/mruby-dir/test/dirtest.c @@ -0,0 +1,125 @@ +#include +#include + +#include +#include +#include + +#include +#include +#include +#include "dir_hal.h" + +#if defined(_WIN32) +#include +#include +#else +#include +#endif + +static void +make_dir(mrb_state *mrb, const char *name, const char *up) +{ + if (mrb_hal_dir_mkdir(mrb, name, 0) == -1) { + if (mrb_hal_dir_chdir(mrb, "..") == 0) { + mrb_hal_dir_rmdir(mrb, up); + } + mrb_raisef(mrb, E_RUNTIME_ERROR, "mkdir(%s) failed", mrb_str_new_cstr(mrb, name)); + } +} + +mrb_value +mrb_dirtest_setup(mrb_state *mrb, mrb_value klass) +{ + char buf[2048]; + char cwd[1024]; + const char *aname = "a"; + const char *bname = "b"; + + /* save current working directory */ + if (mrb_hal_dir_getcwd(mrb, cwd, sizeof(cwd)) != 0) { + mrb_raise(mrb, E_RUNTIME_ERROR, "getcwd() failed"); + } + mrb_cv_set(mrb, klass, mrb_intern_cstr(mrb, "pwd"), mrb_str_new_cstr(mrb, cwd)); + + /* create sandbox */ +#if defined(_WIN32) + snprintf(buf, sizeof(buf), "%s\\mruby-dir-test.XXXXXX", cwd); + if ((_mktemp(buf) == NULL) || mrb_hal_dir_mkdir(mrb, buf, 0) != 0) { + mrb_raisef(mrb, E_RUNTIME_ERROR, "mkdtemp(%s) failed", buf); + } +#else + snprintf(buf, sizeof(buf), "%s/mruby-dir-test.XXXXXX", P_tmpdir); + if (mkdtemp(buf) == NULL) { + mrb_raisef(mrb, E_RUNTIME_ERROR, "mkdtemp(%s) failed", buf); + } +#endif + mrb_cv_set(mrb, klass, mrb_intern_cstr(mrb, "sandbox"), mrb_str_new_cstr(mrb, buf)); + + /* go to sandbox */ + if (mrb_hal_dir_chdir(mrb, buf) == -1) { + mrb_hal_dir_rmdir(mrb, buf); + mrb_raisef(mrb, E_RUNTIME_ERROR, "chdir(%s) failed", buf); + } + + /* make some directories in the sandbox */ + make_dir(mrb, aname, buf); + make_dir(mrb, bname, buf); + + return mrb_true_value(); +} + +mrb_value +mrb_dirtest_teardown(mrb_state *mrb, mrb_value klass) +{ + mrb_value d, sandbox; + mrb_dir_handle *dirp; + const char *name; + const char *path; + + /* cleanup sandbox */ + sandbox = mrb_cv_get(mrb, klass, mrb_intern_cstr(mrb, "sandbox")); + path = mrb_str_to_cstr(mrb, sandbox); + + dirp = mrb_hal_dir_open(mrb, path); + while ((name = mrb_hal_dir_read(mrb, dirp)) != NULL) { + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) + continue; + if (mrb_hal_dir_rmdir(mrb, name) == -1) { + mrb_raisef(mrb, E_RUNTIME_ERROR, "rmdir(%s) failed", name); + } + } + mrb_hal_dir_close(mrb, dirp); + + /* back to original pwd */ + d = mrb_cv_get(mrb, klass, mrb_intern_cstr(mrb, "pwd")); + path = mrb_str_to_cstr(mrb, d); + if (mrb_hal_dir_chdir(mrb, path) == -1) { + mrb_raisef(mrb, E_RUNTIME_ERROR, "chdir(%s) failed", path); + } + + /* remove sandbox directory */ + sandbox = mrb_cv_get(mrb, klass, mrb_intern_cstr(mrb, "sandbox")); + path = mrb_str_to_cstr(mrb, sandbox); + if (mrb_hal_dir_rmdir(mrb, path) == -1) { + mrb_raisef(mrb, E_RUNTIME_ERROR, "rmdir(%s) failed", path); + } + + return mrb_true_value(); +} + +mrb_value +mrb_dirtest_sandbox(mrb_state *mrb, mrb_value klass) +{ + return mrb_cv_get(mrb, klass, mrb_intern_cstr(mrb, "sandbox")); +} + +void +mrb_mruby_dir_gem_test(mrb_state *mrb) +{ + struct RClass *c = mrb_define_module(mrb, "DirTest"); + + mrb_define_class_method(mrb, c, "sandbox", mrb_dirtest_sandbox, MRB_ARGS_NONE()); + mrb_define_class_method(mrb, c, "setup", mrb_dirtest_setup, MRB_ARGS_NONE()); + mrb_define_class_method(mrb, c, "teardown", mrb_dirtest_teardown, MRB_ARGS_NONE()); +} diff --git a/mrbgems/mruby-encoding/README.md b/mrbgems/mruby-encoding/README.md new file mode 100644 index 0000000000..b9196f7934 --- /dev/null +++ b/mrbgems/mruby-encoding/README.md @@ -0,0 +1,96 @@ +# mruby-encoding + +This mrbgem provides a lightweight, "poorman's" encoding functionality for mruby. It is designed to offer basic encoding support, primarily focused on UTF-8 and ASCII-8BIT. + +## Summary + +- **License:** MIT +- **Author:** mruby developers +- **Supported Encodings:** + - `Encoding::UTF_8` + - `Encoding::ASCII_8BIT` (aliased as `Encoding::BINARY`) + +## Functionality + +This gem introduces an `Encoding` module and extends the `String` and `Integer` classes with encoding-related methods. + +### `Encoding` Module + +A module (not a class, unlike standard Ruby) that holds encoding constants. + +- `Encoding::UTF_8`: Represents the UTF-8 encoding. +- `Encoding::ASCII_8BIT`: Represents the ASCII-8BIT encoding. +- `Encoding::BINARY`: An alias for `Encoding::ASCII_8BIT`. + +### `String` Methods + +- `string.valid_encoding? -> true or false` + - Returns `true` if the string is correctly encoded (particularly useful for UTF-8 strings). For `ASCII-8BIT` strings, it generally returns `true`. +- `string.encoding -> EncodingConstant` + - Returns the encoding of the string. This will be `Encoding::UTF_8` or `Encoding::BINARY`. +- `string.force_encoding(encoding_name) -> string` + - Changes the string's reported encoding to the specified `encoding_name` (e.g., "UTF-8", "ASCII-8BIT", "BINARY"). + - The actual byte sequence of the string is not changed. + - Raises an `ArgumentError` if an unsupported encoding name is provided. + +### `Integer` Method + +- `integer.chr(encoding_name = Encoding::BINARY) -> String` + - Returns a single-character string represented by the integer. + - If `encoding_name` is "UTF-8", the integer is treated as a Unicode codepoint. + - If `encoding_name` is "ASCII-8BIT" or "BINARY" (the default), the integer is treated as a byte value (0-255). + - Raises a `RangeError` if the integer is out of the valid range for the specified encoding. + - Raises an `ArgumentError` for unknown encoding names. + +## Usage Example + +```ruby +# main.rb +if __ENCODING__ == "UTF-8" + s = "helloあ" + puts s.encoding #=> Encoding::UTF_8 + puts s.valid_encoding? #=> true + + s2 = "\xff".force_encoding("UTF-8") + puts s2.valid_encoding? #=> false + + s3 = "world" + s3.force_encoding("BINARY") + puts s3.encoding #=> Encoding::BINARY + puts s3.valid_encoding? #=> true (ASCII-8BIT strings are generally considered valid) + + puts 65.chr #=> "A" (defaults to ASCII-8BIT) + puts 230.chr("UTF-8") #=> "æ" (if U+00E6 is æ) + # For mruby, this might be different based on actual UTF-8 char mapping + # For example, 12354.chr("UTF-8") might be "あ" +else + s = "hello" + puts s.encoding #=> Encoding::BINARY (or ASCII-8BIT) + + # Attempting to force to UTF-8 in a non-UTF-8 mruby build might be limited + # or behave as ASCII-8BIT depending on mruby's core string handling. +end + +# Force encoding +my_string = "\xE3\x81\x82" # UTF-8 bytes for "あ" +puts my_string.encoding # Might be BINARY by default if not created as UTF-8 literal + +my_string.force_encoding("UTF-8") +puts my_string.encoding #=> Encoding::UTF_8 +puts my_string #=> あ + +invalid_utf8 = "\xff\xfe" +invalid_utf8.force_encoding("UTF-8") +puts invalid_utf8.valid_encoding? #=> false + +# Integer#chr +puts 65.chr # => "A" +puts 65.chr("BINARY") # => "A" + +# When mruby is compiled with MRB_UTF8_STRING +if Object.const_defined?(:MRB_UTF8_STRING) + puts 12354.chr("UTF-8") # => "あ" + # puts 0x110000.chr("UTF-8") #=> RangeError +end + +``` diff --git a/mrbgems/mruby-encoding/mrbgem.rake b/mrbgems/mruby-encoding/mrbgem.rake new file mode 100644 index 0000000000..8d5e41284c --- /dev/null +++ b/mrbgems/mruby-encoding/mrbgem.rake @@ -0,0 +1,8 @@ +MRuby::Gem::Specification.new('mruby-encoding') do |spec| + spec.license = 'MIT' + spec.author = 'mruby developers' + spec.summary = "Poorman's Encoding for mruby" + spec.build.defines << "HAVE_MRUBY_ENCODING_GEM" + spec.build.defines << "MRB_UTF8_STRING" + spec.add_test_dependency 'mruby-string-ext' +end diff --git a/mrbgems/mruby-encoding/src/encoding.c b/mrbgems/mruby-encoding/src/encoding.c new file mode 100644 index 0000000000..ac9d52fcae --- /dev/null +++ b/mrbgems/mruby-encoding/src/encoding.c @@ -0,0 +1,127 @@ +#include +#include +#include +#include + +#define ENC_ASCII_8BIT "ASCII-8BIT" +#define ENC_BINARY "BINARY" +#define ENC_UTF8 "UTF-8" + +/* + * call-seq: + * string.valid_encoding? -> true or false + * + * Returns true for a string which is encoded correctly. + * + */ +static mrb_value +str_valid_enc_p(mrb_state *mrb, mrb_value str) +{ +#define utf8_islead(c) ((unsigned char)((c)&0xc0) != 0x80) + + struct RString *s = mrb_str_ptr(str); + if (RSTR_SINGLE_BYTE_P(s)) return mrb_true_value(); + if (RSTR_BINARY_P(s)) return mrb_true_value(); + + mrb_int byte_len = RSTR_LEN(s); + mrb_int utf8_len = 0; + const char *p = RSTR_PTR(s); + const char *e = p + byte_len; + while (p < e) { + mrb_int len = mrb_utf8len(p, e); + + if (len == 1 && (*p & 0x80)) return mrb_false_value(); + p += len; + utf8_len++; + } + if (byte_len == utf8_len) RSTR_SET_SINGLE_BYTE_FLAG(s); + return mrb_true_value(); +} + +static mrb_value +get_encoding(mrb_state *mrb, mrb_sym enc) +{ + struct RClass *e = mrb_module_get_id(mrb, MRB_SYM(Encoding)); + return mrb_const_get(mrb, mrb_obj_value(e), enc); +} + +/* + * call-seq: + * string.encoding -> encoding + * + * Returns the Encoding object that represents the encoding of the string. + * In mruby, this returns either "UTF-8" or "ASCII-8BIT" (BINARY). + * + * "hello".encoding #=> "UTF-8" + * "\xff\xfe".encoding #=> "ASCII-8BIT" + */ +static mrb_value +str_encoding(mrb_state *mrb, mrb_value self) +{ + struct RString *s = mrb_str_ptr(self); + if (RSTR_BINARY_P(s)) { + return get_encoding(mrb, MRB_SYM(BINARY)); + } + return get_encoding(mrb, MRB_SYM(UTF_8)); +} + +/* + * call-seq: + * string.force_encoding(encoding) -> string + * + * Changes the encoding of the string to the specified encoding. + * This method modifies the string in place and returns self. + * In mruby, only "UTF-8", "ASCII-8BIT", and "BINARY" are supported. + * + * str = "hello" + * str.force_encoding("ASCII-8BIT") #=> "hello" + * str.encoding #=> "ASCII-8BIT" + */ +static mrb_value +str_force_encoding(mrb_state *mrb, mrb_value self) +{ + mrb_value enc; + + mrb_get_args(mrb, "S", &enc); + + struct RString *s = mrb_str_ptr(self); + if (MRB_STR_CASECMP_P(enc, ENC_ASCII_8BIT) || + MRB_STR_CASECMP_P(enc, ENC_BINARY)) { + s->flags |= MRB_STR_BINARY; + } + else if (MRB_STR_CASECMP_P(enc, ENC_UTF8)) { + s->flags &= ~MRB_STR_BINARY; + } + else { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %v", enc); + } + return self; +} + +void +mrb_mruby_encoding_gem_init(mrb_state* mrb) +{ + struct RClass *s = mrb->string_class; + + mrb_define_method_id(mrb, s, MRB_SYM_Q(valid_encoding), str_valid_enc_p, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, s, MRB_SYM(encoding), str_encoding, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, s, MRB_SYM(force_encoding), str_force_encoding, MRB_ARGS_REQ(1)); + + /* Poorman's Encoding + * + * Encoding - module instead of class + * encodings - supports only UTF-8 and ASCII-8BIT (and its alias BINARY) + * each Encoding - encoding name string instead of Encoding object + * + */ + struct RClass *e = mrb_define_module_id(mrb, MRB_SYM(Encoding)); + mrb_value b = mrb_str_new_lit_frozen(mrb, ENC_ASCII_8BIT); + mrb_define_const_id(mrb, e, MRB_SYM(ASCII_8BIT), b); + mrb_define_const_id(mrb, e, MRB_SYM(BINARY), b); + mrb_define_const_id(mrb, e, MRB_SYM(UTF_8), mrb_str_new_lit_frozen(mrb, ENC_UTF8)); +} + +void +mrb_mruby_encoding_gem_final(mrb_state* mrb) +{ +} diff --git a/mrbgems/mruby-encoding/test/numeric.rb b/mrbgems/mruby-encoding/test/numeric.rb new file mode 100644 index 0000000000..f79235de09 --- /dev/null +++ b/mrbgems/mruby-encoding/test/numeric.rb @@ -0,0 +1,27 @@ +assert('Integer#chr') do + assert_equal("A", 65.chr) + assert_equal("B", 0x42.chr) + assert_equal("\xab", 171.chr) + assert_raise(RangeError) { -1.chr } + assert_raise(RangeError) { 256.chr } + + assert_equal("A", 65.chr("ASCII-8BIT")) + assert_equal("B", 0x42.chr("BINARY")) + assert_equal("\xab", 171.chr("ascii-8bit")) + assert_raise(RangeError) { -1.chr("binary") } + assert_raise(RangeError) { 256.chr("Ascii-8bit") } + assert_raise(ArgumentError) { 65.chr("ASCII") } + assert_raise(ArgumentError) { 65.chr("ASCII-8BIT", 2) } + assert_raise(TypeError) { 65.chr(:BINARY) } + + if __ENCODING__ == "ASCII-8BIT" + assert_raise(ArgumentError) { 65.chr("UTF-8") } + else + assert_equal("A", 65.chr("UTF-8")) + assert_equal("B", 0x42.chr("UTF-8")) + assert_equal("«", 171.chr("utf-8")) + assert_equal("あ", 12354.chr("Utf-8")) + assert_raise(RangeError) { -1.chr("utf-8") } + assert_raise(RangeError) { 0x110000.chr.chr("UTF-8") } + end +end diff --git a/mrbgems/mruby-encoding/test/string.rb b/mrbgems/mruby-encoding/test/string.rb new file mode 100644 index 0000000000..0c54e6dcf4 --- /dev/null +++ b/mrbgems/mruby-encoding/test/string.rb @@ -0,0 +1,30 @@ +## +# String(Ext) Test + +UTF8STRING = __ENCODING__ == "UTF-8" + +assert('String#valid_encoding?') do + assert_true "hello".valid_encoding? + if UTF8STRING + assert_true "あ".valid_encoding? + assert_false "\xfe".valid_encoding? + assert_false "あ\xfe".valid_encoding? + assert_true "あ\xfe".b.valid_encoding? + else + assert_true "\xfe".valid_encoding? + end +end + +assert('String#encoding') do + if UTF8STRING + a = "あ" + assert_equal Encoding::UTF_8, a.encoding + assert_equal Encoding::BINARY, a.b.encoding + assert_equal a, a.force_encoding(Encoding::BINARY) + assert_equal a, a.force_encoding(Encoding::BINARY) + assert_equal Encoding::BINARY, a.encoding + else + a = "hello" + assert_equal Encoding::BINARY, a.encoding + end +end diff --git a/mrbgems/mruby-enum-chain/README.md b/mrbgems/mruby-enum-chain/README.md new file mode 100644 index 0000000000..f2fd104da9 --- /dev/null +++ b/mrbgems/mruby-enum-chain/README.md @@ -0,0 +1,80 @@ +# mruby-enum-chain + +## Description + +This mrbgem provides the `Enumerator::Chain` class, which allows you to chain multiple enumerable objects together, treating them as a single, continuous enumerable. + +This is useful when you have multiple collections (e.g., arrays, ranges, or other enumerators) and you want to iterate over all of them sequentially without first concatenating them into a single, larger collection. + +## How to Use + +There are three main ways to create an `Enumerator::Chain` instance: + +### 1. Using `Enumerable#chain` + +You can call the `chain` method on any object that includes the `Enumerable` module. + +```ruby +a = [1, 2, 3] +b = (4..6) +c = {foo: 7, bar: 8}.each_key # Enumerator for keys + +chained_enum = a.chain(b, c) +chained_enum.to_a # => [1, 2, 3, 4, 5, 6, :foo, :bar] +``` + +### 2. Using `Enumerator#+` + +You can use the `+` operator on an `Enumerator` instance to chain it with another enumerable object. + +```ruby +enum1 = [1, 2].each +enum2 = %w(a b).each + +chained_enum = enum1 + enum2 +chained_enum.to_a # => [1, 2, "a", "b"] + +# You can chain multiple times +enum3 = (10..11).each +chained_enum_2 = enum1 + enum2 + enum3 +chained_enum_2.to_a # => [1, 2, "a", "b", 10, 11] +``` + +### 3. Using `Enumerator::Chain.new` + +You can directly instantiate `Enumerator::Chain` by passing enumerable objects to its constructor. + +```ruby +arr = [10, 20] +rng = (30..31) + +chained_enum = Enumerator::Chain.new(arr, rng) +chained_enum.to_a # => [10, 20, 30, 31] +``` + +## Key Features + +- **`each(&block)`**: Iterates through each element of the chained enumerables in the order they were added. Returns an enumerator if no block is given. +- **`size`**: Returns the total number of elements in all chained enumerables. If any of the chained enumerables do not respond to `size` (e.g., an infinite enumerator or one with an unknown size), this method will return `nil`. + + ```ruby + ([1, 2].chain([3, 4])).size # => 4 + ([1, 2].chain( (1..Float::INFINITY) )).size # => nil + ``` + +- **`rewind`**: Rewinds all of the chained enumerables that respond to the `rewind` method. This resets the iteration state to the beginning. + + ```ruby + e = [1,2].chain(3..4) + e.next # => 1 + e.next # => 2 + e.next # => 3 + e.rewind + e.next # => 1 + ``` + +- **`+(other)`**: Creates a new `Enumerator::Chain` by appending another enumerable to the current chain. + +## License + +MIT License diff --git a/mrbgems/mruby-enum-chain/mrblib/chain.rb b/mrbgems/mruby-enum-chain/mrblib/chain.rb index 43d0926c8d..d21ee908ee 100644 --- a/mrbgems/mruby-enum-chain/mrblib/chain.rb +++ b/mrbgems/mruby-enum-chain/mrblib/chain.rb @@ -3,12 +3,33 @@ # See Copyright Notice in mruby.h module Enumerable + # + # call-seq: + # enum.chain(*enums) -> enumerator_chain + # + # Returns an Enumerator::Chain object which can enumerate over this + # enumerable and the given enumerables in sequence. + # + # e = (1..3).chain([4, 5]) + # e.to_a #=> [1, 2, 3, 4, 5] + # def chain(*args) Enumerator::Chain.new(self, *args) end end class Enumerator + # + # call-seq: + # enum + other_enum -> enumerator_chain + # + # Returns an Enumerator::Chain object which can enumerate over this + # enumerator and the given enumerator in sequence. + # + # e1 = (1..3).each + # e2 = [4, 5].each + # (e1 + e2).to_a #=> [1, 2, 3, 4, 5] + # def +(other) Chain.new(self, other) end @@ -16,13 +37,36 @@ def +(other) class Chain include Enumerable + # + # call-seq: + # Enumerator::Chain.new(*enums) -> enumerator_chain + # + # Generates a new enumerator which iterates over each one of the + # given enumerable objects in sequence. + # + # e = Enumerator::Chain.new(1..3, [4, 5]) + # e.to_a #=> [1, 2, 3, 4, 5] + # def initialize(*args) @enums = args.freeze @pos = -1 end + # + # call-seq: + # chain.each { |obj| block } -> chain + # chain.each -> enumerator + # + # Iterates over the elements of the first enumerable by calling the + # each method on it with the given block, then proceeds to the next + # enumerable in the chain and continues until the end. + # + # e = Enumerator::Chain.new(1..3, [4, 5]) + # e.each { |x| puts x } + # # prints: 1, 2, 3, 4, 5 + # def each(&block) - return to_enum unless block + return to_enum(:each) unless block i = 0 while i < @enums.size @@ -34,6 +78,16 @@ def each(&block) self end + # + # call-seq: + # chain.size -> integer or nil + # + # Returns the total size of the enumerator chain if all of the + # chained enumerables define size. Otherwise it returns nil. + # + # Enumerator::Chain.new(1..3, [4, 5]).size #=> 5 + # Enumerator::Chain.new(1..3, loop).size #=> nil + # def size @enums.reduce(0) do |a, e| return nil unless e.respond_to?(:size) @@ -41,6 +95,19 @@ def size end end + # + # call-seq: + # chain.rewind -> chain + # + # Rewinds the enumerator chain by calling the rewind method on each + # enumerable that has been iterated, in reverse order. Each enumerable + # that defines a rewind method will be rewound. + # + # e = Enumerator::Chain.new((1..3), [4, 5]) + # e.next #=> 1 + # e.rewind + # e.next #=> 1 + # def rewind while 0 <= @pos && @pos < @enums.size e = @enums[@pos] @@ -51,10 +118,30 @@ def rewind self end + # + # call-seq: + # chain + other_enum -> enumerator_chain + # + # Returns a new Enumerator::Chain object which will enumerate over the + # elements of this chain, followed by the elements of other_enum. + # + # e1 = Enumerator::Chain.new(1..3, [4, 5]) + # e2 = e1 + [6, 7] + # e2.to_a #=> [1, 2, 3, 4, 5, 6, 7] + # def +(other) self.class.new(self, other) end + # + # call-seq: + # chain.inspect -> string + # + # Returns a printable version of the enumerator chain. + # + # Enumerator::Chain.new(1..3, [4, 5]).inspect + # #=> "#" + # def inspect "#<#{self.class}: #{@enums.inspect}>" end diff --git a/mrbgems/mruby-enum-ext/README.md b/mrbgems/mruby-enum-ext/README.md new file mode 100644 index 0000000000..a49b3bc86a --- /dev/null +++ b/mrbgems/mruby-enum-ext/README.md @@ -0,0 +1,487 @@ +# mruby-enum-ext + +This mrbgem extends the `Enumerable` module in mruby with additional useful methods. + +## Added Methods + +Below is a list of methods added to the `Enumerable` module, along with a brief description and example for each. + +--- + +### `drop(n)` + +Drops the first `n` elements from an enumerable and returns the rest of the elements in an array. + +_Example:_ + +```ruby +a = [1, 2, 3, 4, 5, 0] +a.drop(3) #=> [4, 5, 0] +``` + +--- + +### `drop_while { |obj| block }` + +Drops elements up to, but not including, the first element for which the block returns `nil` or `false`. Returns an array containing the remaining elements. If no block is given, an enumerator is returned. + +_Example:_ + +```ruby +a = [1, 2, 3, 4, 5, 0] +a.drop_while {|i| i < 3 } #=> [3, 4, 5, 0] +``` + +--- + +### `take(n)` + +Returns the first `n` elements from an enumerable as an array. + +_Example:_ + +```ruby +a = [1, 2, 3, 4, 5, 0] +a.take(3) #=> [1, 2, 3] +``` + +--- + +### `take_while { |obj| block }` + +Passes elements to the block until the block returns `nil` or `false`, then stops iterating and returns an array of all prior elements. If no block is given, an enumerator is returned. + +_Example:_ + +```ruby +a = [1, 2, 3, 4, 5, 0] +a.take_while {|i| i < 3 } #=> [1, 2] +``` + +--- + +### `each_cons(n) { |a| block }` + +Iterates the given block for each array of consecutive `n` elements. If no block is given, an enumerator is returned. + +_Example:_ + +```ruby +(1..10).each_cons(3) {|a| p a} +# outputs below +# [1, 2, 3] +# [2, 3, 4] +# [3, 4, 5] +# [4, 5, 6] +# [5, 6, 7] +# [6, 7, 8] +# [7, 8, 9] +# [8, 9, 10] +``` + +--- + +### `each_slice(n) { |a| block }` + +Iterates the given block for each slice of `n` elements. If no block is given, an enumerator is returned. + +_Example:_ + +```ruby +(1..10).each_slice(3) {|a| p a} +# outputs below +# [1, 2, 3] +# [4, 5, 6] +# [7, 8, 9] +# [10] +``` + +--- + +### `group_by { |obj| block }` + +Groups the collection by the result of the block. Returns a hash where keys are the evaluated result from the block and values are arrays of elements corresponding to the key. If no block is given, an enumerator is returned. + +_Example:_ + +```ruby +(1..6).group_by {|i| i%3} #=> {0=>[3, 6], 1=>[1, 4], 2=>[2, 5]} +``` + +--- + +### `sort_by { |obj| block }` + +Sorts the enumerable using a set of keys generated by mapping the values in the enumerable through the given block. Returns an array. If no block is given, an enumerator is returned. (Note: `Array#sort_by` and `Array#sort_by!` are also implemented for optimized in-place sorting for arrays). + +_Example:_ + +```ruby +%w(apple pear fig).sort_by { |word| word.length } +#=> ["fig", "pear", "apple"] +``` + +_(Example adapted, original was for `Array#sort_by`)_ + +--- + +### `first` / `first(n)` + +Returns the first element, or the first `n` elements, of the enumerable. If the enumerable is empty, `first` returns `nil`, and `first(n)` returns an empty array. + +_Examples:_ + +```ruby +a = [1, 2, 3, 4, 5] +a.first #=> 1 +a.first(3) #=> [1, 2, 3] +[].first #=> nil +[].first(3) #=> [] +``` + +--- + +### `count` / `count(item)` / `count { |obj| block }` + +Returns the number of items in the enumerable. + +- If an argument `item` is given, counts items equal to `item`. +- If a block is given, counts elements yielding a true value. +- If no argument or block is given, counts all items. + +_Examples:_ + +```ruby +[1, 2, 3, 2].count #=> 4 +[1, 2, 3, 2].count(2) #=> 2 +[1, 2, 3, 4].count { |x| x%2 == 0 } #=> 2 +``` + +--- + +### `flat_map { |obj| block }` (alias: `collect_concat`) + +Returns a new array with the concatenated results of running the block once for every element. If no block is given, an enumerator is returned. + +_Example:_ + +```ruby +[1, 2, 3, 4].flat_map { |e| [e, -e] } #=> [1, -1, 2, -2, 3, -3, 4, -4] +[[1, 2], [3, 4]].flat_map { |e| e + [100] } #=> [1, 2, 100, 3, 4, 100] +``` + +--- + +### `max_by { |obj| block }` + +Returns the object in the enumerable that gives the maximum value from the given block. If no block is given, an enumerator is returned. + +_Example:_ + +```ruby +%w[albatross dog horse].max_by {|x| x.length } #=> "albatross" +``` + +--- + +### `min_by { |obj| block }` + +Returns the object in the enumerable that gives the minimum value from the given block. If no block is given, an enumerator is returned. + +_Example:_ + +```ruby +%w[albatross dog horse].min_by {|x| x.length } #=> "dog" +``` + +--- + +### `minmax` / `minmax { |a, b| block }` + +Returns a two-element array containing the minimum and maximum values. The first form assumes objects implement `Comparable`; the second uses the block to return `a <=> b`. + +_Examples:_ + +```ruby +a = %w(albatross dog horse) +a.minmax #=> ["albatross", "horse"] +a.minmax { |a, b| a.length <=> b.length } #=> ["dog", "albatross"] +``` + +--- + +### `minmax_by { |obj| block }` + +Returns a two-element array containing the objects that correspond to the minimum and maximum values from the given block. If no block is given, an enumerator is returned. + +_Example:_ + +```ruby +%w(albatross dog horse).minmax_by { |x| x.length } #=> ["dog", "albatross"] +``` + +--- + +### `none?` / `none? { |obj| block }` / `none?(pattern)` + +- With a block: Returns `true` if the block never returns `true` for any element. +- With a pattern: Returns `true` if `pattern === element` is never true for any element. +- With no argument or block: Returns `true` if none of the collection members are truthy. + +_Examples:_ + +```ruby +%w(ant bear cat).none? { |word| word.length == 5 } #=> true +%w(ant bear cat).none? { |word| word.length >= 4 } #=> false +%w{ant bear cat}.none?(/d/) #=> true +[1, 3.14, 42].none?(Float) #=> false +[].none? #=> true +[nil, false].none? #=> true +``` + +--- + +### `one?` / `one? { |obj| block }` / `one?(pattern)` + +- With a block: Returns `true` if the block returns `true` exactly once. +- With a pattern: Returns `true` if `pattern === element` is true for exactly one element. +- With no argument or block: Returns `true` if exactly one of the collection members is truthy. + +_Examples:_ + +```ruby +%w(ant bear cat).one? { |word| word.length == 4 } #=> true +%w(ant bear cat).one? { |word| word.length > 4 } #=> false +%w{ant bear cat}.one?(/t/) #=> false # (Note: original example might be specific to mruby version, "ant" and "cat" contain "t") +[nil, true, false].one? #=> true +[nil, true, 99].one?(Integer) #=> true +``` + +--- + +### `all?` / `all? { |obj| block }` / `all?(pattern)` + +- With a block: Returns `true` if the block never returns `false` or `nil` for any element. +- With a pattern: Returns `true` if `pattern === element` is true for every element. +- With no argument or block: Returns `true` if all collection members are truthy. + +_Examples:_ + +```ruby +%w[ant bear cat].all? { |word| word.length >= 3 } #=> true +%w[ant bear cat].all? { |word| word.length >= 4 } #=> false +[1, 2i, 3.14].all?(Numeric) #=> true +[nil, true, 99].all? #=> false +``` + +--- + +### `any?` / `any? { |obj| block }` / `any?(pattern)` + +- With a block: Returns `true` if the block ever returns a truthy value for an element. +- With a pattern: Returns `true` if `pattern === element` is true for any element. +- With no argument or block: Returns `true` if at least one collection member is truthy. + +_Examples:_ + +```ruby +%w[ant bear cat].any? { |word| word.length >= 3 } #=> true +%w[ant bear cat].any?(/d/) #=> false +[nil, true, 99].any?(Integer) #=> true +[nil, true, 99].any? #=> true +[].any? #=> false +``` + +--- + +### `each_with_object(obj) { |(*args), memo_obj| ... }` + +Iterates the given block for each element with an arbitrary object `obj` given, and returns the initially given `obj`. If no block is given, an enumerator is returned. + +_Example:_ + +```ruby +(1..10).each_with_object([]) { |i, a| a << i*2 } +#=> [2, 4, 6, 8, 10, 12, 14, 16, 18, 20] +``` + +--- + +### `reverse_each { |item| block }` + +Builds a temporary array and traverses that array in reverse order, executing the block for each item. If no block is given, an enumerator is returned. + +_Example:_ + +```ruby +(1..3).reverse_each { |v| p v } +# produces: +# 3 +# 2 +# 1 +``` + +--- + +### `cycle(n=nil) { |obj| block }` + +Calls the block for each element repeatedly `n` times, or forever if `n` is `nil`. If `n` is non-positive or the collection is empty, does nothing. Returns `nil` if the loop finishes. If no block is given, an enumerator is returned. + +_Examples:_ + +```ruby +a = ["a", "b", "c"] +# a.cycle { |x| puts x } # print, a, b, c, a, b, c,.. forever. +a.cycle(2) { |x| puts x } # print, a, b, c, a, b, c. +``` + +--- + +### `find_index(value)` / `find_index { |obj| block }` + +Compares each entry with `value` or passes to `block`. Returns the index for the first non-false evaluation. If no object matches, returns `nil`. If no argument or block is given, an enumerator is returned. + +_Examples:_ + +```ruby +(1..100).find_index { |i| i % 5 == 0 and i % 7 == 0 } #=> 34 +(1..100).find_index(50) #=> 49 +``` + +--- + +### `zip(arg, ...) { |arr| block }` + +Takes one element from the enumerable and merges corresponding elements from each `arg`. Generates a sequence of n-element arrays. The length will be `enum#size`. If any argument is shorter, `nil` values are supplied. If a block is given, it's invoked for each output array; otherwise, an array of arrays is returned. + +_Examples:_ + +```ruby +a = [ 4, 5, 6 ] +b = [ 7, 8, 9 ] +a.zip(b) #=> [[4, 7], [5, 8], [6, 9]] +[1, 2].zip(a, b) #=> [[1, 4, 7], [2, 5, 8]] +c = [] +a.zip(b) { |x, y| c << x + y } #=> nil +c #=> [11, 13, 15] +``` + +--- + +### `to_h { |element| block }` + +Returns a hash by interpreting the enumerable as a list of `[key, value]` pairs. If a block is given, it's used to transform elements before they are converted into pairs. + +_Example:_ + +```ruby +%i[hello world].each_with_index.to_h + # => {:hello => 0, :world => 1} + +# With a block (example assumes a suitable block) +# (0..3).to_h {|x| [x, x*x] } # => {0=>0, 1=>1, 2=>4, 3=>9} (Conceptual example) +``` + +_(The `to_h` method in the source is a bit complex with its block handling; the example is simplified.)_ + +--- + +### `uniq { |item| block }` + +Returns a new array by removing duplicate values. If a block is given, it will use the return value of the block for uniqueness comparison. + +_Examples:_ + +```ruby +[1, 2, 2, 3, 1].uniq #=> [1, 2, 3] +["a", "B", "c", "b"].uniq { |s| s.downcase } #=> ["a", "B", "c"] +``` + +--- + +### `filter_map { |obj| block }` + +Returns a new array containing the truthy results of running the block for every element. If no block is given, an enumerator is returned. + +_Example:_ + +```ruby +[-2, -1, 0, 1, 2].filter_map { |x| x*2 if x > 0 } #=> [2, 4] +``` + +--- + +### `grep_v(pattern) { |obj| block }` + +Returns an array of elements that do NOT match the `pattern`. If a block is given, the elements of the result are passed to the block and the block's results are returned. + +_Example:_ + +```ruby +a = ["apple", "banana", "orange", "grape"] +a.grep_v(/an/) #=> ["apple", "orange", "grape"] +a.grep_v(/an/) { |s| s.upcase } #=> ["APPLE", "ORANGE", "GRAPE"] +``` + +--- + +### `tally` + +Tallies the collection. Returns a hash where keys are elements and values are their counts. + +_Example:_ + +```ruby +["a", "b", "c", "b"].tally #=> {"a"=>1, "b"=>2, "c"=>1} +``` + +--- + +### `sum(initial=0) { |obj| block }` + +Returns the sum of elements. If a block is given, each element is processed by the block before addition. `initial` is the starting value for the sum. + +_Examples:_ + +```ruby +[1, 2, 3].sum #=> 6 +[1, 2, 3].sum(10) #=> 16 +[1, 2, 3].sum { |x| x * 2 } #=> 12 +({a:1, b:2, c:3}).sum {|k,v| v} #=> 6 (Example from Ruby docs, assuming similar behavior) +``` + +--- + +### `each_entry(*args) { |obj| block }` + +Calls block once for each element, passing that element as a parameter (converting multiple values from yield to an array). If no block is given, an enumerator is returned. + +_Example:_ + +```ruby +class Foo + include Enumerable + def each + yield 1 + yield 1, 2 + yield + end +end +Foo.new.each_entry{ |o| p o } +# produces: +# 1 +# [1, 2] +# nil +``` + +--- + +**Note on `Array#sort_by` and `Array#sort_by!`** + +The gem also provides optimized `sort_by` and `sort_by!` methods directly on the `Array` class. `sort_by!` sorts the array in place. + +```ruby +arr = ["apple", "pear", "fig"] +arr.sort_by! { |word| word.length } +p arr #=> ["fig", "pear", "apple"] +``` diff --git a/mrbgems/mruby-enum-ext/mrblib/enum.rb b/mrbgems/mruby-enum-ext/mrblib/enum.rb index 3a4416196c..41d85b31bd 100644 --- a/mrbgems/mruby-enum-ext/mrblib/enum.rb +++ b/mrbgems/mruby-enum-ext/mrblib/enum.rb @@ -6,7 +6,7 @@ module Enumerable # call-seq: # enum.drop(n) -> array # - # Drops first n elements from enum, and returns rest elements + # Drops first n elements from *enum*, and returns rest elements # in an array. # # a = [1, 2, 3, 4, 5, 0] @@ -27,7 +27,7 @@ def drop(n) # enum.drop_while -> an_enumerator # # Drops elements up to, but not including, the first element for - # which the block returns +nil+ or +false+ and returns an array + # which the block returns `nil` or `false` and returns an array # containing the remaining elements. # # If no block is given, an enumerator is returned instead. @@ -36,7 +36,7 @@ def drop(n) # a.drop_while {|i| i < 3 } #=> [3, 4, 5, 0] def drop_while(&block) - return to_enum :drop_while unless block + return to_enum(:drop_while) unless block ary, state = [], false self.each do |*val| @@ -50,7 +50,7 @@ def drop_while(&block) # call-seq: # enum.take(n) -> array # - # Returns first n elements from enum. + # Returns first n elements from *enum*. # # a = [1, 2, 3, 4, 5, 0] # a.take(3) #=> [1, 2, 3] @@ -74,7 +74,7 @@ def take(n) # enum.take_while {|arr| block } -> array # enum.take_while -> an_enumerator # - # Passes elements to the block until the block returns +nil+ or +false+, + # Passes elements to the block until the block returns `nil` or `false`, # then stops iterating and returns an array of all prior elements. # # If no block is given, an enumerator is returned instead. @@ -83,7 +83,7 @@ def take(n) # a.take_while {|i| i < 3 } #=> [1, 2] # def take_while(&block) - return to_enum :take_while unless block + return to_enum(:take_while) unless block ary = [] self.each do |*val| @@ -163,13 +163,13 @@ def each_slice(n, &block) # enum.group_by -> an_enumerator # # Returns a hash, which keys are evaluated result from the - # block, and values are arrays of elements in enum + # block, and values are arrays of elements in *enum* # corresponding to the key. # # (1..6).group_by {|i| i%3} #=> {0=>[3, 6], 1=>[1, 4], 2=>[2, 5]} # def group_by(&block) - return to_enum :group_by unless block + return to_enum(:group_by) unless block h = {} self.each do |*val| @@ -185,24 +185,13 @@ def group_by(&block) # enum.sort_by { |obj| block } -> array # enum.sort_by -> an_enumerator # - # Sorts enum using a set of keys generated by mapping the - # values in enum through the given block. + # Sorts *enum* using a set of keys generated by mapping the + # values in *enum* through the given block. # # If no block is given, an enumerator is returned instead. - def sort_by(&block) - return to_enum :sort_by unless block - - ary = [] - orig = [] - self.each_with_index{|e, i| - orig.push(e) - ary.push([block.call(e), i]) - } - if ary.size > 1 - ary.sort! - end - ary.collect{|e,i| orig[i]} + return to_enum(:sort_by) unless block + self.to_a.sort_by(&block) end ## @@ -210,8 +199,8 @@ def sort_by(&block) # enum.first -> obj or nil # enum.first(n) -> an_array # - # Returns the first element, or the first +n+ elements, of the enumerable. - # If the enumerable is empty, the first form returns nil, and the + # Returns the first element, or the first `n` elements, of the enumerable. + # If the enumerable is empty, the first form returns `nil`, and the # second form returns an empty array. def first(*args) case args.length @@ -242,9 +231,9 @@ def first(*args) # enum.count(item) -> int # enum.count { |obj| block } -> int # - # Returns the number of items in +enum+ through enumeration. - # If an argument is given, the number of items in +enum+ that - # are equal to +item+ are counted. If a block is given, it + # Returns the number of items in `enum` through enumeration. + # If an argument is given, the number of items in `enum` that + # are equal to `item` are counted. If a block is given, it # counts the number of elements yielding a true value. def count(v=NONE, &block) count = 0 @@ -253,7 +242,7 @@ def count(v=NONE, &block) count += 1 if block.call(*val) end else - if v == NONE + if NONE.equal?(v) self.each { count += 1 } else self.each do |*val| @@ -272,19 +261,19 @@ def count(v=NONE, &block) # enum.collect_concat -> an_enumerator # # Returns a new array with the concatenated results of running - # block once for every element in enum. + # block once for every element in *enum*. # # If no block is given, an enumerator is returned instead. # # [1, 2, 3, 4].flat_map { |e| [e, -e] } #=> [1, -1, 2, -2, 3, -3, 4, -4] # [[1, 2], [3, 4]].flat_map { |e| e + [100] } #=> [1, 2, 100, 3, 4, 100] def flat_map(&block) - return to_enum :flat_map unless block + return to_enum(:flat_map) unless block ary = [] self.each do |*e| e2 = block.call(*e) - if e2.respond_to? :each + if e2.respond_to?(:each) e2.each {|e3| ary.push(e3) } else ary.push(e2) @@ -299,7 +288,7 @@ def flat_map(&block) # enum.max_by {|obj| block } -> obj # enum.max_by -> an_enumerator # - # Returns the object in enum that gives the maximum + # Returns the object in *enum* that gives the maximum # value from the given block. # # If no block is given, an enumerator is returned instead. @@ -307,7 +296,7 @@ def flat_map(&block) # %w[albatross dog horse].max_by {|x| x.length } #=> "albatross" def max_by(&block) - return to_enum :max_by unless block + return to_enum(:max_by) unless block first = true max = nil @@ -333,7 +322,7 @@ def max_by(&block) # enum.min_by {|obj| block } -> obj # enum.min_by -> an_enumerator # - # Returns the object in enum that gives the minimum + # Returns the object in *enum* that gives the minimum # value from the given block. # # If no block is given, an enumerator is returned instead. @@ -341,7 +330,7 @@ def max_by(&block) # %w[albatross dog horse].min_by {|x| x.length } #=> "dog" def min_by(&block) - return to_enum :min_by unless block + return to_enum(:min_by) unless block first = true min = nil @@ -368,8 +357,8 @@ def min_by(&block) # enum.minmax { |a, b| block } -> [min, max] # # Returns two elements array which contains the minimum and the - # maximum value in the enumerable. The first form assumes all - # objects implement Comparable; the second uses the + # maximum value in the enumerable. The first form assumes all + # objects implement `Comparable`; the second uses the # block to return a <=> b. # # a = %w(albatross dog horse) @@ -407,7 +396,7 @@ def minmax(&block) # enum.minmax_by -> an_enumerator # # Returns a two element array containing the objects in - # enum that correspond to the minimum and maximum values respectively + # *enum* that correspond to the minimum and maximum values respectively # from the given block. # # If no block is given, an enumerator is returned instead. @@ -415,7 +404,7 @@ def minmax(&block) # %w(albatross dog horse).minmax_by { |x| x.length } #=> ["dog", "albatross"] def minmax_by(&block) - return to_enum :minmax_by unless block + return to_enum(:minmax_by) unless block max = nil max_cmp = nil @@ -448,12 +437,12 @@ def minmax_by(&block) # enum.none?(pattern) -> true or false # # Passes each element of the collection to the given block. The method - # returns true if the block never returns true - # for all elements. If the block is not given, none? will return - # true only if none of the collection members is true. + # returns `true` if the block never returns `true` + # for all elements. If the block is not given, `none?` will return + # `true` only if none of the collection members is true. # # If a pattern is supplied instead, the method returns whether - # pattern === element for none of the collection members. + # `pattern === element` for none of the collection members. # # %w(ant bear cat).none? { |word| word.length == 5 } #=> true # %w(ant bear cat).none? { |word| word.length >= 4 } #=> false @@ -464,7 +453,7 @@ def minmax_by(&block) # [nil, true].none? #=> false def none?(pat=NONE, &block) - if pat != NONE + if !NONE.equal?(pat) self.each do |*val| return false if pat === val.__svalue end @@ -486,13 +475,13 @@ def none?(pat=NONE, &block) # enum.one?(pattern) -> true or false # # Passes each element of the collection to the given block. The method - # returns true if the block returns true - # exactly once. If the block is not given, one? will return - # true only if exactly one of the collection members is + # returns `true` if the block returns `true` + # exactly once. If the block is not given, `one?` will return + # `true` only if exactly one of the collection members is # true. # # If a pattern is supplied instead, the method returns whether - # pattern === element for exactly one collection member. + # `pattern === element` for exactly one collection member. # # %w(ant bear cat).one? { |word| word.length == 4 } #=> true # %w(ant bear cat).one? { |word| word.length > 4 } #=> false @@ -505,7 +494,7 @@ def none?(pat=NONE, &block) def one?(pat=NONE, &block) count = 0 - if pat!=NONE + if !NONE.equal?(pat) self.each do |*val| count += 1 if pat === val.__svalue return false if count > 1 @@ -531,14 +520,14 @@ def one?(pat=NONE, &block) # enum.all?(pattern) -> true or false # # Passes each element of the collection to the given block. The method - # returns true if the block never returns - # false or nil. If the block is not given, - # Ruby adds an implicit block of { |obj| obj } which will - # cause #all? to return +true+ when none of the collection members are - # +false+ or +nil+. + # returns `true` if the block never returns + # `false` or `nil`. If the block is not given, + # Ruby adds an implicit block of `{ |obj| obj }` which will + # cause #all? to return `true` when none of the collection members are + # `false` or `nil`. # # If a pattern is supplied instead, the method returns whether - # pattern === element for every collection member. + # `pattern === element` for every collection member. # # %w[ant bear cat].all? { |word| word.length >= 3 } #=> true # %w[ant bear cat].all? { |word| word.length >= 4 } #=> false @@ -547,12 +536,12 @@ def one?(pat=NONE, &block) # [nil, true, 99].all? #=> false # def all?(pat=NONE, &block) - if pat != NONE - self.each{|*val| return false unless pat === val.__svalue} + if !NONE.equal?(pat) + self.each {|*val| return false unless pat === val.__svalue} elsif block - self.each{|*val| return false unless block.call(*val)} + self.each {|*val| return false unless block.call(*val)} else - self.each{|*val| return false unless val.__svalue} + self.each {|*val| return false unless val.__svalue} end true end @@ -563,14 +552,14 @@ def all?(pat=NONE, &block) # enum.any?(pattern) -> true or false # # Passes each element of the collection to the given block. The method - # returns true if the block ever returns a value other - # than false or nil. If the block is not - # given, Ruby adds an implicit block of { |obj| obj } that - # will cause #any? to return +true+ if at least one of the collection - # members is not +false+ or +nil+. + # returns `true` if the block ever returns a value other + # than `false` or `nil`. If the block is not + # given, Ruby adds an implicit block of `{ |obj| obj }` that + # will cause #any? to return `true` if at least one of the collection + # members is not `false` or `nil`. # # If a pattern is supplied instead, the method returns whether - # pattern === element for any collection member. + # `pattern === element` for any collection member. # # %w[ant bear cat].any? { |word| word.length >= 3 } #=> true # %w[ant bear cat].any? { |word| word.length >= 4 } #=> true @@ -580,12 +569,12 @@ def all?(pat=NONE, &block) # [].any? #=> false # def any?(pat=NONE, &block) - if pat != NONE - self.each{|*val| return true if pat === val.__svalue} + if !NONE.equal?(pat) + self.each {|*val| return true if pat === val.__svalue} elsif block - self.each{|*val| return true if block.call(*val)} + self.each {|*val| return true if block.call(*val)} else - self.each{|*val| return true if val.__svalue} + self.each {|*val| return true if val.__svalue} end false end @@ -630,7 +619,7 @@ def each_with_object(obj, &block) # def reverse_each(&block) - return to_enum :reverse_each unless block + return to_enum(:reverse_each) unless block ary = self.to_a i = ary.size - 1 @@ -646,13 +635,13 @@ def reverse_each(&block) # enum.cycle(n=nil) { |obj| block } -> nil # enum.cycle(n=nil) -> an_enumerator # - # Calls block for each element of enum repeatedly _n_ - # times or forever if none or +nil+ is given. If a non-positive - # number is given or the collection is empty, does nothing. Returns - # +nil+ if the loop has finished without getting interrupted. + # Calls *block* for each element of *enum* repeatedly _n_ + # times or forever if none or `nil` is given. If a non-positive + # number is given or the collection is empty, does nothing. Returns + # `nil` if the loop has finished without getting interrupted. # # Enumerable#cycle saves elements in an internal array so changes - # to enum after the first pass have no effect. + # to *enum* after the first pass have no effect. # # If no block is given, an enumerator is returned instead. # @@ -695,10 +684,10 @@ def cycle(nv = nil, &block) # enum.find_index { |obj| block } -> int or nil # enum.find_index -> an_enumerator # - # Compares each entry in enum with value or passes - # to block. Returns the index for the first for which the + # Compares each entry in *enum* with value or passes + # to block. Returns the index for the first for which the # evaluated value is non-false. If no object matches, returns - # nil + # `nil` # # If neither block nor argument is given, an enumerator is returned instead. # @@ -708,7 +697,7 @@ def cycle(nv = nil, &block) # def find_index(val=NONE, &block) - return to_enum(:find_index, val) if !block && val == NONE + return to_enum(:find_index, val) if !block && NONE.equal?(val) idx = 0 if block @@ -730,12 +719,12 @@ def find_index(val=NONE, &block) # enum.zip(arg, ...) -> an_array_of_array # enum.zip(arg, ...) { |arr| block } -> nil # - # Takes one element from enum and merges corresponding - # elements from each args. This generates a sequence of + # Takes one element from *enum* and merges corresponding + # elements from each *args*. This generates a sequence of # n-element arrays, where n is one more than the - # count of arguments. The length of the resulting sequence will be - # enum#size. If the size of any argument is less than - # enum#size, nil values are supplied. If + # count of arguments. The length of the resulting sequence will be + # `enum#size`. If the size of any argument is less than + # `enum#size`, `nil` values are supplied. If # a block is given, it is invoked for each output array, otherwise # an array of arrays is returned. # @@ -784,8 +773,8 @@ def zip(*arg, &block) # call-seq: # enum.to_h -> hash # - # Returns the result of interpreting enum as a list of - # [key, value] pairs. + # Returns the result of interpreting *enum* as a list of + # `[key, value]` pairs. # # %i[hello world].each_with_index.to_h # # => {:hello => 0, :world => 1} @@ -831,8 +820,8 @@ def filter_map(&blk) return to_enum(:filter_map) unless blk ary = [] - self.each do |x| - x = blk.call(x) + self.each do |*x| + x = blk.call(*x) ary.push x if x end ary @@ -840,18 +829,30 @@ def filter_map(&blk) alias filter select + def grep_v(pattern, &block) + ary = [] + self.each {|*val| + sv = val.__svalue + unless pattern === sv + ary.push((block)? block.call(*val): sv) + end + } + ary + end + ## # call-seq: # enum.tally -> a_hash # - # Tallys the collection. Returns a hash where the keys are the + # Tallys the collection. Returns a hash where the keys are the # elements and the values are numbers of elements in the collection # that correspond to the key. # # ["a", "b", "c", "b"].tally #=> {"a"=>1, "b"=>2, "c"=>1} def tally hash = {} - self.each do |x| + self.each do |*x| + x = x.__svalue hash[x] = (hash[x]||0)+1 end hash @@ -870,14 +871,68 @@ def tally def sum(init=0,&block) result=init if block - self.each do |e| - result += block.call(e) + self.each do |*e| + result += block.call(*e) end else - self.each do |e| - result += e + self.each do |*e| + result += e.__svalue end end result end + + ## + # call-seq: + # enum.each_entry { |obj| block } -> enum + # enum.each_entry -> an_enumerator + # + # Calls block once for each element in self, passing that + # element as a parameter, converting multiple values from yield to an + # array. + # + # If no block is given, an enumerator is returned instead. + # + # class Foo + # include Enumerable + # def each + # yield 1 + # yield 1, 2 + # yield + # end + # end + # Foo.new.each_entry{ |o| p o } + # + # produces: + # + # 1 + # [1, 2] + # nil + # + def each_entry(*args, &blk) + return to_enum(:each_entry) unless blk + self.each do |*a| + yield a.__svalue + end + return self + end +end + +class Array + def sort_by(&block) + return to_enum(:sort_by) unless block + + ary = [] + self.each_with_index {|e, i| + ary.push([block.call(e), i]) + } + if ary.size > 1 + ary.sort! + end + ary.collect! {|e,i| self[i]} + end + + def sort_by!(&block) + self.replace(self.sort_by(&block)) + end end diff --git a/mrbgems/mruby-enum-ext/test/enum.rb b/mrbgems/mruby-enum-ext/test/enum.rb index 31181fe1a3..f0441ba15e 100644 --- a/mrbgems/mruby-enum-ext/test/enum.rb +++ b/mrbgems/mruby-enum-ext/test/enum.rb @@ -195,3 +195,29 @@ def each assert("Enumerable#tally") do assert_equal({"a"=>1, "b"=>2, "c"=>1}, ["a", "b", "c", "b"].tally) end + +assert("Enumerable#grep_v") do + a = [1, 2, 3, 4, 5, 0] + assert_equal [1, 5, 0], a.grep_v(2..4) + assert_equal [1, 2, 3, 4, 5, 0], a.grep_v(6..8) + assert_equal [2, 4, 6, 8, 10], a.grep_v(0) {|v| v * 2} +end + +assert("Enumerable#each_entry") do + each_entry_test = Class.new { + include Enumerable + def each + yield 1 + yield 1, 2 + yield + end + } + e = each_entry_test.new + a = [] + e.each_entry {|v| + a.push(v) + } + assert_equal 1, a[0] + assert_equal [1,2], a[1] + assert_equal nil, a[2] +end diff --git a/mrbgems/mruby-enum-lazy/README.md b/mrbgems/mruby-enum-lazy/README.md new file mode 100644 index 0000000000..eff746f0fb --- /dev/null +++ b/mrbgems/mruby-enum-lazy/README.md @@ -0,0 +1,91 @@ +# mruby-enum-lazy + +## Overview + +This mrbgem provides lazy evaluation for Enumerable objects in mruby. It introduces the `Enumerable#lazy` method, which returns an instance of `Enumerator::Lazy`. This allows for more efficient processing of collections, especially large or potentially infinite sequences, by evaluating elements only when they are needed. + +## Functionality + +When you call `.lazy` on an Enumerable object (like an Array or Range), you get back an `Enumerator::Lazy` object. This object behaves much like a regular Enumerator, but with a key difference: methods that transform the collection are deferred until the results are actually required. + +The following methods are implemented to act lazily: + +- `map` / `collect` +- `select` / `find_all` +- `reject` +- `grep` +- `grep_v` +- `drop` +- `drop_while` +- `take` +- `take_while` +- `flat_map` / `collect_concat` +- `zip` +- `uniq` + +To trigger the evaluation of the lazy operations and retrieve all results (if finite), you can use methods like `force` or `to_a`. + +### How it works + +Operations on an `Enumerator::Lazy` object are chained together. The actual computation of each element is postponed until it's requested (e.g., by `force`, `to_a`, or iterating with `each`). This can lead to significant performance improvements by avoiding unnecessary computations and memory allocations, particularly when dealing with large data sets or when only a subset of results is needed. + +## Usage Example + +Here's a simple example demonstrating lazy evaluation: + +```ruby +# Without lazy evaluation +# This would attempt to create an infinite array, which is not feasible. +# (1..Float::INFINITY).map { |x| x * x }.select { |x| x % 2 == 0 }.take(5).to_a + +# With lazy evaluation +p (1..Float::INFINITY).lazy.map { |x| x * x }.select { |x| x % 2 == 0 }.take(5).force +# Output: [4, 16, 36, 64, 100] + +# Another example: +a = [1, 2, 3, 4, 5] +lazy_sequence = a.lazy.map do |x| + puts "mapping #{x}" + x * 10 +end.select do |x| + puts "selecting #{x}" + x > 20 +end + +puts "Applying force..." +result = lazy_sequence.force +# Output: +# mapping 1 +# selecting 10 +# mapping 2 +# selecting 20 +# mapping 3 +# selecting 30 +# mapping 4 +# selecting 40 +# mapping 5 +# selecting 50 +# Applying force... +p result # Output: [30, 40, 50] +``` + +## Dependencies + +This gem depends on the following mruby core gems: + +- `mruby-enumerator` +- `mruby-enum-ext` + +## License + +MIT License + +## Author + +mruby developers + +## Acknowledgements + +Based on +Inspired by +Reference: (ja) diff --git a/mrbgems/mruby-enum-lazy/mrblib/lazy.rb b/mrbgems/mruby-enum-lazy/mrblib/lazy.rb index e4f116a933..25e728004e 100644 --- a/mrbgems/mruby-enum-lazy/mrblib/lazy.rb +++ b/mrbgems/mruby-enum-lazy/mrblib/lazy.rb @@ -1,20 +1,34 @@ module Enumerable - # = Enumerable#lazy implementation # - # Enumerable#lazy returns an instance of Enumerator::Lazy. - # You can use it just like as normal Enumerable object, - # except these methods act as 'lazy': + # call-seq: + # enum.lazy -> lazy_enumerator + # + # Returns an Enumerator::Lazy, which redefines most Enumerable + # methods to postpone enumeration and enumerate values only on an + # as-needed basis. + # + # === Example + # + # The following program finds pythagorean triples: + # + # def pythagorean_triples + # (1..Float::INFINITY).lazy.flat_map {|z| + # (1..z).flat_map {|x| + # (x..z).select {|y| + # x*x + y*y == z*z + # }.map {|y| + # [x, y, z] + # } + # } + # } + # end + # # show first ten pythagorean triples + # p pythagorean_triples.take(10).force # take is lazy, so force is needed + # p pythagorean_triples.first(10) # first is eager + # # show pythagorean triples less than 100 + # p pythagorean_triples.take_while { |*, z| z < 100 }.force # - # - map collect - # - select find_all - # - reject - # - grep - # - drop - # - drop_while - # - take_while - # - flat_map collect_concat - # - zip def lazy Enumerator::Lazy.new(self) end @@ -27,10 +41,25 @@ class Enumerator # Inspired by https://github.com/antimon2/enumerable_lz # http://jp.rubyist.net/magazine/?0034-Enumerable_lz (ja) class Lazy < Enumerator + # + # call-seq: + # Lazy.new(obj, &block) + # + # Creates a new Lazy enumerator. When the enumerator is actually enumerated + # (e.g. by calling #force), obj will be enumerated and each value passed + # to the given block. The block can yield values back by calling yielder.yield. + # For example, to create a method that acts like Array#select: + # + # def select + # Lazy.new(self) do |yielder, value| + # yielder.yield(value) if yield(value) + # end + # end + # def initialize(obj, &block) super(){|yielder| begin - obj.each{|x| + obj.each {|x| if block block.call(yielder, x) else @@ -42,18 +71,52 @@ def initialize(obj, &block) } end + # + # call-seq: + # lazy.to_enum(method = :each, *args) -> lazy_enum + # lazy.to_enum(method = :each, *args) {|*args| ... } -> lazy_enum + # lazy.enum_for(method = :each, *args) -> lazy_enum + # lazy.enum_for(method = :each, *args) {|*args| ... } -> lazy_enum + # + # Similar to Object#to_enum, except it returns a lazy enumerator. + # This makes it easy to define Enumerable methods that will + # naturally remain lazy if called on a lazy enumerator. + # + # For example: + # + # module Enumerable + # def filter_map(&block) + # map(&block).compact + # end + # end + # def to_enum(meth=:each, *args, &block) unless self.respond_to?(meth) raise ArgumentError, "undefined method #{meth}" end lz = Lazy.new(self, &block) - lz.obj = self - lz.meth = meth - lz.args = args + obj = self + lz.instance_eval { + @obj = obj + @meth = meth + @args = args + } lz end alias enum_for to_enum + # + # call-seq: + # lazy.map {|obj| block } -> lazy_enumerator + # lazy.collect {|obj| block } -> lazy_enumerator + # + # Like Enumerable#map, but chains operation to be lazy-evaluated. + # + # (1..Float::INFINITY).lazy.map {|i| i**2 } + # #=> #:map> + # (1..Float::INFINITY).lazy.map {|i| i**2 }.first(3) + # #=> [1, 4, 9] + # def map(&block) Lazy.new(self){|yielder, val| yielder << block.call(val) @@ -61,6 +124,16 @@ def map(&block) end alias collect map + # + # call-seq: + # lazy.select {|obj| block } -> lazy_enumerator + # lazy.find_all {|obj| block } -> lazy_enumerator + # + # Like Enumerable#select, but chains operation to be lazy-evaluated. + # + # (1..Float::INFINITY).lazy.select {|i| i.even? }.first(3) + # #=> [2, 4, 6] + # def select(&block) Lazy.new(self){|yielder, val| if block.call(val) @@ -70,6 +143,15 @@ def select(&block) end alias find_all select + # + # call-seq: + # lazy.reject {|obj| block } -> lazy_enumerator + # + # Like Enumerable#reject, but chains operation to be lazy-evaluated. + # + # (1..Float::INFINITY).lazy.reject {|i| i.even? }.first(3) + # #=> [1, 3, 5] + # def reject(&block) Lazy.new(self){|yielder, val| unless block.call(val) @@ -78,6 +160,15 @@ def reject(&block) } end + # + # call-seq: + # lazy.grep(pattern) -> lazy_enumerator + # + # Like Enumerable#grep, but chains operation to be lazy-evaluated. + # + # (1..Float::INFINITY).lazy.grep(1..10).force + # #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + # def grep(pattern) Lazy.new(self){|yielder, val| if pattern === val @@ -86,6 +177,32 @@ def grep(pattern) } end + # + # call-seq: + # lazy.grep_v(pattern) -> lazy_enumerator + # + # Like Enumerable#grep_v, but chains operation to be lazy-evaluated. + # + # (1..Float::INFINITY).lazy.grep_v(2..4).first(3) + # #=> [1, 5, 6] + # + def grep_v(pattern) + Lazy.new(self){|yielder, val| + unless pattern === val + yielder << val + end + } + end + + # + # call-seq: + # lazy.drop(n) -> lazy_enumerator + # + # Like Enumerable#drop, but chains operation to be lazy-evaluated. + # + # (1..Float::INFINITY).lazy.drop(3).first(3) + # #=> [4, 5, 6] + # def drop(n) dropped = 0 Lazy.new(self){|yielder, val| @@ -97,6 +214,15 @@ def drop(n) } end + # + # call-seq: + # lazy.drop_while {|obj| block } -> lazy_enumerator + # + # Like Enumerable#drop_while, but chains operation to be lazy-evaluated. + # + # (1..Float::INFINITY).lazy.drop_while {|i| i < 4 }.first(3) + # #=> [4, 5, 6] + # def drop_while(&block) dropping = true Lazy.new(self){|yielder, val| @@ -111,6 +237,15 @@ def drop_while(&block) } end + # + # call-seq: + # lazy.take(n) -> lazy_enumerator + # + # Like Enumerable#take, but chains operation to be lazy-evaluated. + # + # (1..Float::INFINITY).lazy.take(3).force + # #=> [1, 2, 3] + # def take(n) if n == 0 return Lazy.new(self){raise StopIteration} @@ -125,6 +260,15 @@ def take(n) } end + # + # call-seq: + # lazy.take_while {|obj| block } -> lazy_enumerator + # + # Like Enumerable#take_while, but chains operation to be lazy-evaluated. + # + # (1..Float::INFINITY).lazy.take_while {|i| i < 4 }.force + # #=> [1, 2, 3] + # def take_while(&block) Lazy.new(self){|yielder, val| if block.call(val) @@ -135,21 +279,43 @@ def take_while(&block) } end + # + # call-seq: + # lazy.flat_map {|obj| block } -> lazy_enumerator + # lazy.collect_concat {|obj| block } -> lazy_enumerator + # + # Like Enumerable#flat_map, but chains operation to be lazy-evaluated. + # + # ["foo", "bar"].lazy.flat_map {|i| i.each_char.lazy}.force + # #=> ["f", "o", "o", "b", "a", "r"] + # def flat_map(&block) Lazy.new(self){|yielder, val| - ary = block.call(val) - # TODO: check ary is an Array - ary.each{|x| - yielder << x - } + result = block.call(val) + if result.respond_to?(:each) + result.each {|x| yielder << x } + else + yielder << result + end } end alias collect_concat flat_map + # + # call-seq: + # lazy.zip(arg, ...) -> lazy_enumerator + # lazy.zip(arg, ...) {|arr| block } -> lazy_enumerator + # + # Like Enumerable#zip, but chains operation to be lazy-evaluated. + # However, if a block is given to zip, values are enumerated immediately. + # + # (1..Float::INFINITY).lazy.zip(('a'..'z').cycle).first(3) + # #=> [[1, "a"], [2, "b"], [3, "c"]] + # def zip(*args, &block) enums = [self] + args Lazy.new(self){|yielder, val| - ary = enums.map{|e| e.next} + ary = enums.map {|e| e.next} if block yielder << block.call(ary) else @@ -158,6 +324,16 @@ def zip(*args, &block) } end + # + # call-seq: + # lazy.uniq -> lazy_enumerator + # lazy.uniq {|item| block } -> lazy_enumerator + # + # Like Enumerable#uniq, but chains operation to be lazy-evaluated. + # + # (1..Float::INFINITY).lazy.map {|i| i % 3}.uniq.first(3) + # #=> [1, 2, 0] + # def uniq(&block) hash = {} Lazy.new(self){|yielder, val| @@ -173,6 +349,36 @@ def uniq(&block) } end + # + # call-seq: + # lazy.tap_each {|obj| block } -> lazy_enumerator + # + # Yields each element to the block for side effects (e.g. logging, + # debugging) and passes it through unmodified. + # + # (1..Float::INFINITY).lazy + # .tap_each {|i| puts "saw: #{i}" } + # .select(&:even?) + # .first(3) + # #=> [2, 4, 6] (prints "saw: 1", "saw: 2", ... along the way) + # + def tap_each(&block) + Lazy.new(self){|yielder, val| + block.call(val) + yielder << val + } + end + + # + # call-seq: + # lazy.force -> array + # + # Forces lazy evaluation and returns an array containing the values + # enumerated by the lazy enumerator. This is an alias for to_a. + # + # (1..Float::INFINITY).lazy.take(3).force + # #=> [1, 2, 3] + # alias force to_a end end diff --git a/mrbgems/mruby-enum-lazy/test/lazy.rb b/mrbgems/mruby-enum-lazy/test/lazy.rb index e8277f3d34..0464441ee6 100644 --- a/mrbgems/mruby-enum-lazy/test/lazy.rb +++ b/mrbgems/mruby-enum-lazy/test/lazy.rb @@ -46,6 +46,37 @@ def a.b(b=nil) assert_equal [0*1, 2*3, 4*5, 6*7], lazy_enum.map { |a| a.first * a.last }.first(4) end +assert("Enumerator::Lazy#flat_map with array from block") do + assert_equal [1, 10, 2, 20, 3, 30], [1, 2, 3].lazy.flat_map {|x| [x, x*10]}.force +end + +assert("Enumerator::Lazy#flat_map with non-enumerable from block") do + assert_equal [1, 2, 3], [1, 2, 3].lazy.flat_map {|x| x}.force +end + +assert("Enumerator::Lazy#flat_map with nested array from block") do + assert_equal [[1, 2], [3, 4]], [1, 3].lazy.flat_map {|x| [[x, x+1]]}.force +end + +assert("Enumerator::Lazy#grep_v") do + lazy_grep_v = (0..).lazy.grep_v(2..4) + assert_kind_of Enumerator::Lazy, lazy_grep_v + assert_equal [0, 1, 5, 6], lazy_grep_v.first(4) +end + +assert("Enumerator::Lazy#tap_each") do + seen = [] + result = [1, 2, 3, 4, 5].lazy.tap_each{|x| seen << x }.select{|x| x % 2 == 0 }.force + assert_equal [2, 4], result + assert_equal [1, 2, 3, 4, 5], seen +end + +assert("Enumerator::Lazy#tap_each laziness") do + seen = [] + [1, 2, 3, 4, 5].lazy.tap_each{|x| seen << x }.first(3) + assert_equal [1, 2, 3], seen +end + assert("Enumerator::Lazy#zip with cycle") do e1 = [1, 2, 3].cycle e2 = [:a, :b].cycle diff --git a/mrbgems/mruby-enumerator/README.md b/mrbgems/mruby-enumerator/README.md new file mode 100644 index 0000000000..55164a9c63 --- /dev/null +++ b/mrbgems/mruby-enumerator/README.md @@ -0,0 +1,89 @@ +# mruby-enumerator + +The `mruby-enumerator` mrbgem provides the `Enumerator` class, which allows for both internal and external iteration in mruby. + +## Purpose and Functionality + +Enumerators are useful when you want to work with collections of data in a flexible way. They allow you to: + +- **Iterate externally:** Fetch elements one by one using methods like `next`. +- **Chain operations:** Combine multiple iteration steps (e.g., mapping and then selecting) without creating intermediate arrays. +- **Create custom iterators:** Define your own iteration logic. + +## Creating an Enumerator + +You can create an `Enumerator` in several ways: + +1. **Using `Kernel#to_enum` or `Kernel#enum_for`:** This is the most common way. You can turn any object that has an `each` method (or a similar iteration method) into an Enumerator. + + ```ruby + e = [1, 2, 3].to_enum # Creates an enumerator for the array + e = "hello".enum_for(:each_byte) # Creates an enumerator for iterating over bytes + ``` + +2. **Using `Enumerator.new`:** You can create an Enumerator by providing a block that defines how values are yielded. + + ```ruby + fib = Enumerator.new do |yielder| + a = b = 1 + loop do + yielder << a + a, b = b, a + b + end + end + + fib.take(5) # => [1, 1, 2, 3, 5] + ``` + +## Key Methods + +The `Enumerator` class includes many helpful methods from the `Enumerable` module, as well as some of its own: + +- **`next`**: Returns the next value from the enumerator. Raises `StopIteration` if the enumerator is at the end. +- **`peek`**: Returns the next value without advancing the iterator. +- **`rewind`**: Resets the enumerator to its beginning. +- **`with_index(offset = 0)`**: Iterates over elements, providing both the element and its index (with an optional offset). Returns a new Enumerator if no block is given. + + ```ruby + ["a", "b"].each.with_index(1) do |char, index| + puts "#{index}: #{char}" + end + # Output: + # 1: a + # 2: b + ``` + +- **`each_with_index`**: Similar to `with_index(0)`. +- **`with_object(obj)`**: Iterates over elements, passing an arbitrary object along with each element. Returns the given object. Returns a new Enumerator if no block is given. + + ```ruby + (1..3).each.with_object([]) do |i, arr| + arr << i * 2 + end # => [2, 4, 6] + ``` + +- **`each(*args)`**: Iterates over the enumerator. If arguments are provided, they are passed to the underlying iteration method. +- **`feed(value)`**: Sets a value to be returned by the `yield` inside the enumerator on its next call. + +## Chaining Enumerators + +One of the powerful features of Enumerators is the ability to chain operations: + +```ruby +data = [1, 2, 3, 4, 5] + +result = data.to_enum + .with_index # [[1, 0], [2, 1], [3, 2], [4, 3], [5, 4]] + .select { |num, idx| num.even? } # [[2, 1], [4, 3]] + .map { |num, idx| "#{idx}:#{num}" } # ["1:2", "3:4"] + +p result # => ["1:2", "3:4"] +``` + +This avoids creating intermediate arrays for each step, making the code more efficient and readable for complex data transformations. + +## Integration + +This mrbgem integrates the `Enumerator` class into mruby, making it available for use in your mruby projects. It also extends `Kernel` with `to_enum` and `enum_for`, and `Enumerable` with methods like `zip`, `chunk`, and `chunk_while` that leverage `Enumerator`. + +Refer to the source code and tests for more detailed examples and advanced usage. diff --git a/mrbgems/mruby-enumerator/mrblib/enumerator.rb b/mrbgems/mruby-enumerator/mrblib/enumerator.rb index 58efceb17e..c6f42eefa9 100644 --- a/mrbgems/mruby-enumerator/mrblib/enumerator.rb +++ b/mrbgems/mruby-enumerator/mrblib/enumerator.rb @@ -36,7 +36,7 @@ # # foo: two # # foo: three # -# This allows you to chain Enumerators together. For example, you +# This allows you to chain Enumerators together. For example, you # can map a list's elements to strings containing the index # and the element as a string via: # @@ -96,7 +96,7 @@ class Enumerator # # In the first form, iteration is defined by the given block, in # which a "yielder" object, given as block parameter, can be used to - # yield a value by calling the +yield+ method (aliased as +<<+): + # yield a value by calling the `yield` method (aliased as +<<+): # # fib = Enumerator.new do |y| # a = b = 1 @@ -112,12 +112,12 @@ class Enumerator # given object using the given method with the given arguments passed. This # form is left only for internal use. # - # Use of this form is discouraged. Use Kernel#enum_for or Kernel#to_enum + # Use of this form is discouraged. Use Kernel#enum_for or Kernel#to_enum # instead. def initialize(obj=NONE, meth=:each, *args, **kwd, &block) if block obj = Generator.new(&block) - elsif obj == NONE + elsif NONE.equal?(obj) raise ArgumentError, "wrong number of arguments (given 0, expected 1+)" end @@ -132,16 +132,20 @@ def initialize(obj=NONE, meth=:each, *args, **kwd, &block) @stop_exc = false end - attr_accessor :obj, :meth, :args, :kwd - attr_reader :fib - - def initialize_copy(obj) + private def initialize_copy(obj) raise TypeError, "can't copy type #{obj.class}" unless obj.kind_of? Enumerator - raise TypeError, "can't copy execution context" if obj.fib - @obj = obj.obj - @meth = obj.meth - @args = obj.args - @kwd = obj.kwd + raise TypeError, "can't copy execution context" if obj.instance_eval{@fib} + meth = args = kwd = fib = nil + obj.instance_eval { + obj = @obj + meth = @meth + args = @args + kwd = @kwd + } + @obj = obj + @meth = meth + @args = args + @kwd = kwd @fib = nil @lookahead = nil @feedvalue = nil @@ -154,13 +158,13 @@ def initialize_copy(obj) # e.with_index(offset = 0) # # Iterates the given block for each element with an index, which - # starts from +offset+. If no block is given, returns a new Enumerator - # that includes the index, starting from +offset+ + # starts from `offset`. If no block is given, returns a new Enumerator + # that includes the index, starting from `offset` # - # +offset+:: the starting index to use + # `offset`:: the starting index to use # def with_index(offset=0, &block) - return to_enum :with_index, offset unless block + return to_enum(:with_index, offset) unless block if offset.nil? offset = 0 @@ -169,9 +173,9 @@ def with_index(offset=0, &block) end n = offset - 1 - enumerator_block_call do |*i| + __enumerator_block_call do |*i| n += 1 - block.call i.__svalue, n + block.call(i.__svalue, n) end end @@ -195,8 +199,8 @@ def each_with_index(&block) # e.with_object(obj) {|(*args), obj| ... } # e.with_object(obj) # - # Iterates the given block for each element with an arbitrary object, +obj+, - # and returns +obj+ + # Iterates the given block for each element with an arbitrary object, `obj`, + # and returns `obj` # # If no block is given, returns a new Enumerator. # @@ -219,12 +223,21 @@ def each_with_index(&block) def with_object(object, &block) return to_enum(:with_object, object) unless block - enumerator_block_call do |i| - block.call [i,object] + __enumerator_block_call do |i| + block.call([i,object]) end object end + ## + # call-seq: + # enum.inspect -> string + # + # Returns a string representation of the enumerator. + # + # [1, 2, 3].each.inspect #=> "#" + # [1, 2, 3].map.inspect #=> "#" + # def inspect if @args && @args.size > 0 args = @args.join(", ") @@ -234,6 +247,24 @@ def inspect end end + ## + # call-seq: + # enum.size -> int, float, or nil + # + # Returns the size of the enumerator, or nil if it cannot be calculated lazily. + # + # [1, 2, 3].each.size #=> 3 + # (1..100).each.size #=> 100 + # loop.size #=> nil + # + def size + if @size + @size + elsif @obj.respond_to?(:size) + @obj.size + end + end + ## # call-seq: # enum.each { |elm| block } -> obj @@ -274,30 +305,30 @@ def each(*argv, &block) obj = self if 0 < argv.length obj = self.dup - args = obj.args + args = obj.instance_eval{@args} if !args.empty? args = args.dup args.concat argv else args = argv.dup end - obj.args = args + obj.instance_eval{@args = args} end return obj unless block - enumerator_block_call(&block) + __enumerator_block_call(&block) end - def enumerator_block_call(&block) + def __enumerator_block_call(&block) @obj.__send__ @meth, *@args, **@kwd, &block end - private :enumerator_block_call + private :__enumerator_block_call ## # call-seq: # e.next -> object # # Returns the next object in the enumerator, and move the internal position - # forward. When the position reached at the end, StopIteration is raised. + # forward. When the position reached at the end, StopIteration is raised. # # === Example # @@ -308,7 +339,7 @@ def enumerator_block_call(&block) # p e.next #=> 3 # p e.next #raises StopIteration # - # Note that enumeration sequence by +next+ does not affect other non-external + # Note that enumeration sequence by `next` does not affect other non-external # enumeration methods, unless the underlying iteration methods itself has # side-effect # @@ -321,11 +352,11 @@ def next # e.next_values -> array # # Returns the next object as an array in the enumerator, and move the - # internal position forward. When the position reached at the end, + # internal position forward. When the position reached at the end, # StopIteration is raised. # - # This method can be used to distinguish yield and yield - # nil. + # This method can be used to distinguish `yield` and `yield + # nil`. # # === Example # @@ -357,7 +388,7 @@ def next # # yield nil [nil] nil # # yield [1, 2] [[1, 2]] [1, 2] # - # Note that +next_values+ does not affect other non-external enumeration + # Note that `next_values` does not affect other non-external enumeration # methods unless underlying iteration method itself has side-effect # def next_values @@ -405,7 +436,7 @@ def next_values # e.peek -> object # # Returns the next object in the enumerator, but doesn't move the internal - # position forward. If the position is already at the end, StopIteration + # position forward. If the position is already at the end, StopIteration # is raised. # # === Example @@ -429,7 +460,7 @@ def peek # e.peek_values -> array # # Returns the next object as an array, similar to Enumerator#next_values, but - # doesn't move the internal position forward. If the position is already at + # doesn't move the internal position forward. If the position is already at # the end, StopIteration is raised. # # === Example @@ -479,7 +510,7 @@ def rewind # call-seq: # e.feed obj -> nil # - # Sets the value to be returned by the next yield inside +e+. + # Sets the value to be returned by the next yield inside `e`. # # If the value is not set, the yield returns nil. # @@ -564,17 +595,17 @@ def << *args # Enumerator.produce(initial = nil) { |val| } -> enumerator # # Creates an infinite enumerator from any block, just called over and - # over. Result of the previous iteration is passed to the next one. - # If +initial+ is provided, it is passed to the first iteration, and + # over. Result of the previous iteration is passed to the next one. + # If `initial` is provided, it is passed to the first iteration, and # becomes the first element of the enumerator; if it is not provided, - # first iteration receives +nil+, and its result becomes first + # first iteration receives `nil`, and its result becomes first # element of the iterator. # # Raising StopIteration from the block stops an iteration. # # Examples of usage: # - # Enumerator.produce(1, &:succ) # => enumerator of 1, 2, 3, 4, .... + # Enumerator.produce(1, &:succ) # => enumerator of 1, 2, 3, 4, ... # # Enumerator.produce { rand(10) } # => infinite random number sequence # @@ -583,7 +614,7 @@ def << *args def Enumerator.produce(init=NONE, &block) raise ArgumentError, "no block given" if block.nil? Enumerator.new do |y| - if init == NONE + if NONE.equal?(init) val = nil else val = init @@ -606,8 +637,8 @@ module Kernel # obj.to_enum(method = :each, *args) -> enum # obj.enum_for(method = :each, *args) -> enum # - # Creates a new Enumerator which will enumerate by calling +method+ on - # +obj+, passing +args+ if any. + # Creates a new Enumerator which will enumerate by calling `method` on + # `obj`, passing `args` if any. # # === Examples # @@ -647,8 +678,8 @@ module Kernel # # => returns an Enumerator when called without a block # enum.first(4) # => [1, 1, 1, 2] # - def to_enum(meth=:each, *args) - Enumerator.new self, meth, *args + def to_enum(meth=:each, *args, **kwd) + Enumerator.new self, meth, *args, **kwd end alias enum_for to_enum end @@ -689,4 +720,142 @@ def zip(*args, &block) result end + + ## + # call-seq: + # enum.chunk -> enumerator + # enum.chunk { |arr| block } -> enumerator + # + # Each element in the returned enumerator is a 2-element array consisting of: + # + # - A value returned by the block. + # - An array ("chunk") containing the element for which that value was returned, + # and all following elements for which the block returned the same value: + # + # So that: + # + # - Each block return value that is different from its predecessor + # begins a new chunk. + # - Each block return value that is the same as its predecessor + # continues the same chunk. + # + # Example: + # + # e = (0..10).chunk {|i| (i / 3).floor } # => # + # # The enumerator elements. + # e.next # => [0, [0, 1, 2]] + # e.next # => [1, [3, 4, 5]] + # e.next # => [2, [6, 7, 8]] + # e.next # => [3, [9, 10]] + # + # You can use the special symbol `:_alone` to force an element + # into its own separate chuck: + # + # a = [0, 0, 1, 1] + # e = a.chunk{|i| i.even? ? :_alone : true } + # e.to_a # => [[:_alone, [0]], [:_alone, [0]], [true, [1, 1]]] + # + # You can use the special symbol `:_separator` or `nil` + # to force an element to be ignored (not included in any chunk): + # + # a = [0, 0, -1, 1, 1] + # e = a.chunk{|i| i < 0 ? :_separator : true } + # e.to_a # => [[true, [0, 0]], [true, [1, 1]]] + def chunk(&block) + return to_enum(:chunk) unless block + + enum = self + Enumerator.new do |y| + last_value, arr = nil, [] + enum.each do |element| + value = block.call(element) + case value + when :_alone + y.yield [last_value, arr] if arr.size > 0 + y.yield [value, [element]] + last_value, arr = nil, [] + when :_separator, nil + y.yield [last_value, arr] if arr.size > 0 + last_value, arr = nil, [] + when last_value + arr << element + else + raise 'symbols beginning with an underscore are reserved' if value.is_a?(Symbol) && value.to_s[0] == '_' + y.yield [last_value, arr] if arr.size > 0 + last_value, arr = value, [element] + end + end + y.yield [last_value, arr] if arr.size > 0 + end + end + + + ## + # call-seq: + # enum.chunk_while {|elt_before, elt_after| bool } -> an_enumerator + # + # Creates an enumerator for each chunked elements. + # The beginnings of chunks are defined by the block. + # + # This method splits each chunk using adjacent elements, + # _elt_before_ and _elt_after_, + # in the receiver enumerator. + # This method split chunks between _elt_before_ and _elt_after_ where + # the block returns `false`. + # + # The block is called the length of the receiver enumerator minus one. + # + # The result enumerator yields the chunked elements as an array. + # So `each` method can be called as follows: + # + # enum.chunk_while { |elt_before, elt_after| bool }.each { |ary| ... } + # + # Other methods of the Enumerator class and Enumerable module, + # such as `to_a`, `map`, etc., are also usable. + # + # For example, one-by-one increasing subsequence can be chunked as follows: + # + # a = [1,2,4,9,10,11,12,15,16,19,20,21] + # b = a.chunk_while {|i, j| i+1 == j } + # p b.to_a #=> [[1, 2], [4], [9, 10, 11, 12], [15, 16], [19, 20, 21]] + # c = b.map {|a| a.length < 3 ? a : "#{a.first}-#{a.last}" } + # p c #=> [[1, 2], [4], "9-12", [15, 16], "19-21"] + # d = c.join(",") + # p d #=> "1,2,4,9-12,15,16,19-21" + # + # Increasing (non-decreasing) subsequence can be chunked as follows: + # + # a = [0, 9, 2, 2, 3, 2, 7, 5, 9, 5] + # p a.chunk_while {|i, j| i <= j }.to_a + # #=> [[0, 9], [2, 2, 3], [2, 7], [5, 9], [5]] + # + # Adjacent evens and odds can be chunked as follows: + # (Enumerable#chunk is another way to do it.) + # + # a = [7, 5, 9, 2, 0, 7, 9, 4, 2, 0] + # p a.chunk_while {|i, j| i.even? == j.even? }.to_a + # #=> [[7, 5, 9], [2, 0], [7, 9], [4, 2, 0]] + # + # Enumerable#slice_when does the same, except splitting when the block + # returns `true` instead of `false`. + # + def chunk_while(&block) + enum = self + Enumerator.new do |y| + n = 0 + last_value, arr = nil, [] + enum.each do |element| + if n > 0 + unless block.call(last_value, element) + y.yield arr + arr = [] + end + end + arr.push(element) + n += 1 + last_value = element + end + y.yield arr if arr.size > 0 + end + end end diff --git a/mrbgems/mruby-enumerator/test/enumerator.rb b/mrbgems/mruby-enumerator/test/enumerator.rb index 3e0c6c3bea..d206c290c8 100644 --- a/mrbgems/mruby-enumerator/test/enumerator.rb +++ b/mrbgems/mruby-enumerator/test/enumerator.rb @@ -598,3 +598,67 @@ def (o = Object.new).each ], enum.to_a } end + +assert("Enumerable#chunk") do + chunk = [1, 2, 3, 1, 2].chunk + assert_equal Enumerator, chunk.class + result = chunk.with_index { |elt, i| elt - i }.to_a + assert_equal [[1, [1, 2, 3]], [-2, [1, 2]]], result + + assert_equal Enumerator, [].chunk {}.class + + e = [1, 2, 3] + recorded = [] + e.chunk { |x| recorded << x }.to_a + assert_equal [1, 2, 3], recorded + + e = [1, 2, 3, 2, 3, 2, 1] + result = e.chunk { |x| x < 3 && 1 || 0 }.to_a + assert_equal [[1, [1, 2]], [0, [3]], [1, [2]], [0, [3]], [1, [2, 1]]], result + + e = [1, 2, 3] + assert_equal [[1, 2], [3]], e.chunk { |x| x > 2 }.map(&:last) + + e = [1, 2, 3, 2, 1] + result = e.chunk { |x| x < 2 && :_alone }.to_a + assert_equal [[:_alone, [1]], [false, [2, 3, 2]], [:_alone, [1]]], result + + e = [[1, 2]] + inner_value = [] + e.chunk { |*x| inner_value << x }.to_a + assert_equal [[[1, 2]]], inner_value + + e = [1, 2, 3, 3, 2, 1] + result = e.chunk { |x| x == 2 ? :_separator : 1 }.to_a + assert_equal [[1, [1]], [1, [3, 3]], [1, [1]]], result + + e = [1, 2, 3, 2, 1] + result = e.chunk { |x| x == 2 ? nil : 1 }.to_a + assert_equal [[1, [1]], [1, [3]], [1, [1]]], result + + + e = [1, 2, 3, 2, 1] + assert_raise(RuntimeError) { e.chunk { |x| :_arbitrary }.to_a } + + e = [1, 2, 3] + assert_raise(ArgumentError) { e.chunk(1) {} } + + e = [1, 2, 3, 2, 1] + enum = e.chunk { |x| true } + assert_nil enum.size +end + +assert("Enumerable#chunk_while") do + a = [1,2,4,9,10,11,12,15,16,19,20,21] + b = a.chunk_while {|i, j| i+1 == j } + assert_equal [[1, 2], [4], [9, 10, 11, 12], [15, 16], [19, 20, 21]], b.to_a + c = b.map {|a| a.length < 3 ? a : "#{a.first}-#{a.last}" } + assert_equal [[1, 2], [4], "9-12", [15, 16], "19-21"], c + + a = [0, 9, 2, 2, 3, 2, 7, 5, 9, 5] + assert_equal [[0], [9], [2], [2], [3], [2], [7], [5], [9], [5]], a.chunk_while {|i, j| false }.to_a + assert_equal [[0, 9], [2, 2, 3], [2, 7], [5, 9], [5]], a.chunk_while {|i, j| i <= j }.to_a + + a = [7, 5, 9, 2, 0, 7, 9, 4, 2, 0] + assert_equal [[7, 5, 9], [2, 0], [7, 9], [4, 2, 0]], a.chunk_while {|i, j| i % 2 == j % 2 }.to_a +end diff --git a/mrbgems/mruby-env/README.md b/mrbgems/mruby-env/README.md new file mode 100644 index 0000000000..37f8f894d8 --- /dev/null +++ b/mrbgems/mruby-env/README.md @@ -0,0 +1,33 @@ +# mruby-env + +ENV object for environment variable access. + +This gem is a built-in replacement for [iij/mruby-env](https://github.com/iij/mruby-env), +providing a superset of its API. + +## Methods + +`[]`, `[]=`, `assoc`, `clear`, `delete`, `each`, `each_key`, +`each_value`, `empty?`, `fetch`, `filter`/`select`, `freeze`, +`has_key?`/`include?`/`key?`/`member?`, `has_value?`/`value?`, +`inspect`, `key`, `keys`, `length`/`size`, `merge!`/`update`, +`rassoc`, `reject`, `replace`, `slice`, `store`, `to_a`, `to_h`, +`to_s`, `values` + +ENV includes `Enumerable` via `mruby-enumerator`. + +## Example + +```ruby +ENV["MY_VAR"] = "hello" +ENV["MY_VAR"] #=> "hello" +ENV.delete("MY_VAR") #=> "hello" + +ENV.keys #=> ["PATH", "HOME", ...] +ENV.each { |k, v| puts "#{k}=#{v}" } +ENV.select { |k, v| k.start_with?("RUBY") } +``` + +## License + +MIT License - see the mruby license. diff --git a/mrbgems/mruby-env/mrbgem.rake b/mrbgems/mruby-env/mrbgem.rake new file mode 100644 index 0000000000..8ea123e3b3 --- /dev/null +++ b/mrbgems/mruby-env/mrbgem.rake @@ -0,0 +1,6 @@ +MRuby::Gem::Specification.new('mruby-env') do |spec| + spec.license = 'MIT' + spec.authors = ['mruby developers'] + spec.summary = 'ENV object for environment variable access' + spec.add_dependency('mruby-enumerator', :core => 'mruby-enumerator') +end diff --git a/mrbgems/mruby-env/mrblib/env.rb b/mrbgems/mruby-env/mrblib/env.rb new file mode 100644 index 0000000000..e8183dd8a6 --- /dev/null +++ b/mrbgems/mruby-env/mrblib/env.rb @@ -0,0 +1,153 @@ +class << ENV + def each(&block) + return to_enum(:each) unless block + keys.each do |k| + v = self[k] + block.call([k, v]) if v + end + self + end + alias each_pair each + + def each_key(&block) + return to_enum(:each_key) unless block + keys.each(&block) + self + end + + def each_value(&block) + return to_enum(:each_value) unless block + __values.each(&block) + self + end + + def values + __values + end + + def delete(key, &block) + val = __delete(key) + if val.nil? && block + block.call(key) + else + val + end + end + + def clear + __clear + self + end + + def fetch(key, *args, &block) + val = self[key] + return val unless val.nil? + if block + block.call(key) + elsif args.length > 0 + args[0] + else + raise KeyError, "key not found: \"#{key}\"" + end + end + + def store(key, val) + self[key] = val + end + + def to_h(&block) + h = {} + each do |k, v| + if block + k, v = block.call(k, v) + end + h[k] = v + end + h + end + + def to_a + each_pair.to_a + end + + def inspect + to_h.inspect + end + + def empty? + size == 0 + end + + def has_value?(val) + __values.include?(val) + end + alias value? has_value? + + def assoc(key) + val = self[key] + val.nil? ? nil : [key, val] + end + + def rassoc(val) + each do |k, v| + return [k, v] if v == val + end + nil + end + + def key(val) + each do |k, v| + return k if v == val + end + nil + end + + def replace(hash) + clear + hash.each do |k, v| + self[k] = v + end + self + end + + def update(hash, &block) + hash.each do |k, v| + if block && key?(k) + self[k] = block.call(k, self[k], v) + else + self[k] = v + end + end + self + end + alias merge! update + + def select(&block) + return to_enum(:select) unless block + to_h.select(&block) + end + alias filter select + + def reject(&block) + return to_enum(:reject) unless block + to_h.reject(&block) + end + + def slice(*keys) + h = {} + keys.each do |k| + v = self[k] + h[k] = v unless v.nil? + end + h + end + + alias include? key? + alias has_key? key? + alias member? key? + alias length size + + def freeze + raise TypeError, "cannot freeze ENV" + end +end diff --git a/mrbgems/mruby-env/src/env.c b/mrbgems/mruby-env/src/env.c new file mode 100644 index 0000000000..7a621b466a --- /dev/null +++ b/mrbgems/mruby-env/src/env.c @@ -0,0 +1,223 @@ +/* +** env.c - ENV object for environment variable access +*/ + +#include +#include +#include +#include +#include + +#include +#include + +#ifdef _WIN32 +#include +#define environ _environ +#else +#ifdef __APPLE__ +#include +#define environ (*_NSGetEnviron()) +#else +extern char **environ; +#endif +#endif + +static void +env_check_key(mrb_state *mrb, mrb_value key) +{ + mrb_ensure_string_type(mrb, key); + if (memchr(RSTRING_PTR(key), '=', RSTRING_LEN(key))) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "bad environment variable name: contains '='"); + } + if (memchr(RSTRING_PTR(key), '\0', RSTRING_LEN(key))) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "bad environment variable name: contains null byte"); + } +} + +/* ENV[] */ +static mrb_value +mrb_env_aref(mrb_state *mrb, mrb_value self) +{ + mrb_value key; + const char *val; + + mrb_get_args(mrb, "o", &key); + env_check_key(mrb, key); + val = getenv(RSTRING_PTR(key)); + if (val == NULL) return mrb_nil_value(); + return mrb_str_new_cstr(mrb, val); +} + +/* ENV[]= */ +static mrb_value +mrb_env_aset(mrb_state *mrb, mrb_value self) +{ + mrb_value key, val; + + mrb_get_args(mrb, "oo", &key, &val); + env_check_key(mrb, key); + + if (mrb_nil_p(val)) { +#ifdef _WIN32 + _putenv_s(RSTRING_PTR(key), ""); +#else + unsetenv(RSTRING_PTR(key)); +#endif + return mrb_nil_value(); + } + + mrb_ensure_string_type(mrb, val); + if (memchr(RSTRING_PTR(val), '\0', RSTRING_LEN(val))) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "bad environment variable value: contains null byte"); + } +#ifdef _WIN32 + _putenv_s(RSTRING_PTR(key), RSTRING_PTR(val)); +#else + setenv(RSTRING_PTR(key), RSTRING_PTR(val), 1); +#endif + return val; +} + +/* ENV.__delete(key) - returns old value or nil */ +static mrb_value +mrb_env_delete(mrb_state *mrb, mrb_value self) +{ + mrb_value key; + const char *val; + mrb_value result; + + mrb_get_args(mrb, "o", &key); + env_check_key(mrb, key); + val = getenv(RSTRING_PTR(key)); + if (val == NULL) return mrb_nil_value(); + result = mrb_str_new_cstr(mrb, val); +#ifdef _WIN32 + _putenv_s(RSTRING_PTR(key), ""); +#else + unsetenv(RSTRING_PTR(key)); +#endif + return result; +} + +/* ENV.keys */ +static mrb_value +mrb_env_keys(mrb_state *mrb, mrb_value self) +{ + mrb_value ary = mrb_ary_new(mrb); + char **env; + int ai = mrb_gc_arena_save(mrb); + + for (env = environ; *env != NULL; env++) { + char *eq = strchr(*env, '='); + if (eq) { + mrb_ary_push(mrb, ary, mrb_str_new(mrb, *env, (mrb_int)(eq - *env))); + mrb_gc_arena_restore(mrb, ai); + } + } + return ary; +} + +/* ENV.__values */ +static mrb_value +mrb_env_values(mrb_state *mrb, mrb_value self) +{ + mrb_value ary = mrb_ary_new(mrb); + char **env; + int ai = mrb_gc_arena_save(mrb); + + for (env = environ; *env != NULL; env++) { + char *eq = strchr(*env, '='); + if (eq) { + mrb_ary_push(mrb, ary, mrb_str_new_cstr(mrb, eq + 1)); + mrb_gc_arena_restore(mrb, ai); + } + } + return ary; +} + +/* ENV.size */ +static mrb_value +mrb_env_size(mrb_state *mrb, mrb_value self) +{ + mrb_int count = 0; + char **env; + + for (env = environ; *env != NULL; env++) { + count++; + } + return mrb_fixnum_value(count); +} + +/* ENV.key? */ +static mrb_value +mrb_env_has_key(mrb_state *mrb, mrb_value self) +{ + mrb_value key; + + mrb_get_args(mrb, "o", &key); + env_check_key(mrb, key); + return mrb_bool_value(getenv(RSTRING_PTR(key)) != NULL); +} + +/* ENV.to_s */ +static mrb_value +mrb_env_to_s(mrb_state *mrb, mrb_value self) +{ + return mrb_str_new_lit(mrb, "ENV"); +} + +/* ENV.__clear */ +static mrb_value +mrb_env_clear(mrb_state *mrb, mrb_value self) +{ + while (*environ) { + char *eq = strchr(*environ, '='); + if (eq) { + mrb_int len = (mrb_int)(eq - *environ); + char *key = (char*)mrb_malloc(mrb, len + 1); + memcpy(key, *environ, len); + key[len] = '\0'; +#ifdef _WIN32 + _putenv_s(key, ""); +#else + unsetenv(key); +#endif + mrb_free(mrb, key); + } + else { + break; + } + } + return self; +} + +void +mrb_mruby_env_gem_init(mrb_state *mrb) +{ + mrb_value env = mrb_obj_new(mrb, mrb->object_class, 0, NULL); + struct RObject *eobj = mrb_obj_ptr(env); + + mrb_define_global_const(mrb, "ENV", env); + + mrb_define_singleton_method_id(mrb, eobj, MRB_OPSYM(aref), mrb_env_aref, MRB_ARGS_REQ(1)); + mrb_define_singleton_method_id(mrb, eobj, MRB_OPSYM(aset), mrb_env_aset, MRB_ARGS_REQ(2)); + mrb_define_singleton_method_id(mrb, eobj, MRB_SYM(__delete), mrb_env_delete, MRB_ARGS_REQ(1)); + mrb_define_singleton_method_id(mrb, eobj, MRB_SYM(keys), mrb_env_keys, MRB_ARGS_NONE()); + mrb_define_singleton_method_id(mrb, eobj, MRB_SYM(__values), mrb_env_values, MRB_ARGS_NONE()); + mrb_define_singleton_method_id(mrb, eobj, MRB_SYM(size), mrb_env_size, MRB_ARGS_NONE()); + mrb_define_singleton_method_id(mrb, eobj, MRB_SYM_Q(key), mrb_env_has_key, MRB_ARGS_REQ(1)); + mrb_define_singleton_method_id(mrb, eobj, MRB_SYM(to_s), mrb_env_to_s, MRB_ARGS_NONE()); + mrb_define_singleton_method_id(mrb, eobj, MRB_SYM(__clear), mrb_env_clear, MRB_ARGS_NONE()); + + /* include Enumerable in ENV's singleton class */ + { + struct RClass *sc = mrb_singleton_class_ptr(mrb, env); + mrb_include_module(mrb, sc, mrb_module_get_id(mrb, MRB_SYM(Enumerable))); + } +} + +void +mrb_mruby_env_gem_final(mrb_state *mrb) +{ +} diff --git a/mrbgems/mruby-env/test/env.rb b/mrbgems/mruby-env/test/env.rb new file mode 100644 index 0000000000..3acdddad69 --- /dev/null +++ b/mrbgems/mruby-env/test/env.rb @@ -0,0 +1,243 @@ +## +# ENV test + +assert('ENV class') do + assert_equal Object, ENV.class +end + +assert('ENV.to_s') do + assert_equal "ENV", ENV.to_s +end + +assert('ENV[] and ENV[]=') do + ENV["MRUBY_ENV_TEST_A"] = "hello" + assert_equal "hello", ENV["MRUBY_ENV_TEST_A"] + ENV["MRUBY_ENV_TEST_A"] = nil + assert_nil ENV["MRUBY_ENV_TEST_A"] +end + +assert('ENV[] returns nil for missing key') do + assert_nil ENV["MRUBY_ENV_TEST_NONEXISTENT"] +end + +assert('ENV.store') do + ENV.store("MRUBY_ENV_TEST_B", "world") + assert_equal "world", ENV["MRUBY_ENV_TEST_B"] + ENV.delete("MRUBY_ENV_TEST_B") +end + +assert('ENV.delete') do + ENV["MRUBY_ENV_TEST_C"] = "val" + assert_equal "val", ENV.delete("MRUBY_ENV_TEST_C") + assert_nil ENV["MRUBY_ENV_TEST_C"] +end + +assert('ENV.delete returns nil for missing key') do + assert_nil ENV.delete("MRUBY_ENV_TEST_NONEXISTENT") +end + +assert('ENV.delete with block') do + result = ENV.delete("MRUBY_ENV_TEST_NONEXISTENT") { |k| "missing: #{k}" } + assert_equal "missing: MRUBY_ENV_TEST_NONEXISTENT", result +end + +assert('ENV.key?') do + ENV["MRUBY_ENV_TEST_D"] = "yes" + assert_true ENV.key?("MRUBY_ENV_TEST_D") + assert_false ENV.key?("MRUBY_ENV_TEST_NONEXISTENT") + ENV.delete("MRUBY_ENV_TEST_D") +end + +assert('ENV.include?/has_key?/member?') do + ENV["MRUBY_ENV_TEST_E"] = "val" + assert_true ENV.include?("MRUBY_ENV_TEST_E") + assert_true ENV.has_key?("MRUBY_ENV_TEST_E") + assert_true ENV.member?("MRUBY_ENV_TEST_E") + ENV.delete("MRUBY_ENV_TEST_E") +end + +assert('ENV.keys') do + ENV["MRUBY_ENV_TEST_F"] = "val" + k = ENV.keys + assert_kind_of Array, k + assert_true k.include?("MRUBY_ENV_TEST_F") + ENV.delete("MRUBY_ENV_TEST_F") +end + +assert('ENV.values') do + ENV["MRUBY_ENV_TEST_G"] = "unique_val_42" + v = ENV.values + assert_kind_of Array, v + assert_true v.include?("unique_val_42") + ENV.delete("MRUBY_ENV_TEST_G") +end + +assert('ENV.size/length') do + s1 = ENV.size + ENV["MRUBY_ENV_TEST_H"] = "val" + assert_equal s1 + 1, ENV.size + assert_equal ENV.size, ENV.length + ENV.delete("MRUBY_ENV_TEST_H") +end + +assert('ENV.empty?') do + assert_equal [true, false].include?(ENV.empty?), true +end + +assert('ENV.has_value?/value?') do + ENV["MRUBY_ENV_TEST_I"] = "unique_find_me" + assert_true ENV.has_value?("unique_find_me") + assert_true ENV.value?("unique_find_me") + assert_false ENV.has_value?("MRUBY_ENV_NEVER_EXISTS_VALUE") + ENV.delete("MRUBY_ENV_TEST_I") +end + +assert('ENV.each/each_pair') do + ENV["MRUBY_ENV_TEST_J"] = "iter_val" + found = false + ENV.each do |k, v| + if k == "MRUBY_ENV_TEST_J" && v == "iter_val" + found = true + end + end + assert_true found + ENV.delete("MRUBY_ENV_TEST_J") +end + +assert('ENV.each_key') do + ENV["MRUBY_ENV_TEST_K"] = "val" + found = false + ENV.each_key { |k| found = true if k == "MRUBY_ENV_TEST_K" } + assert_true found + ENV.delete("MRUBY_ENV_TEST_K") +end + +assert('ENV.each_value') do + ENV["MRUBY_ENV_TEST_L"] = "val_each_v" + found = false + ENV.each_value { |v| found = true if v == "val_each_v" } + assert_true found + ENV.delete("MRUBY_ENV_TEST_L") +end + +assert('ENV.fetch') do + ENV["MRUBY_ENV_TEST_M"] = "fetch_val" + assert_equal "fetch_val", ENV.fetch("MRUBY_ENV_TEST_M") + assert_equal "default", ENV.fetch("MRUBY_ENV_TEST_NONE", "default") + assert_equal "block", ENV.fetch("MRUBY_ENV_TEST_NONE") { "block" } + assert_raise(KeyError) { ENV.fetch("MRUBY_ENV_TEST_NONE") } + ENV.delete("MRUBY_ENV_TEST_M") +end + +assert('ENV.to_h') do + ENV["MRUBY_ENV_TEST_N"] = "to_h_val" + h = ENV.to_h + assert_kind_of Hash, h + assert_equal "to_h_val", h["MRUBY_ENV_TEST_N"] + ENV.delete("MRUBY_ENV_TEST_N") +end + +assert('ENV.to_a') do + ENV["MRUBY_ENV_TEST_O"] = "to_a_val" + a = ENV.to_a + assert_kind_of Array, a + assert_true a.include?(["MRUBY_ENV_TEST_O", "to_a_val"]) + ENV.delete("MRUBY_ENV_TEST_O") +end + +assert('ENV.assoc') do + ENV["MRUBY_ENV_TEST_P"] = "assoc_val" + assert_equal ["MRUBY_ENV_TEST_P", "assoc_val"], ENV.assoc("MRUBY_ENV_TEST_P") + assert_nil ENV.assoc("MRUBY_ENV_TEST_NONEXISTENT") + ENV.delete("MRUBY_ENV_TEST_P") +end + +assert('ENV.rassoc') do + ENV["MRUBY_ENV_TEST_Q"] = "rassoc_unique_val" + result = ENV.rassoc("rassoc_unique_val") + assert_equal "MRUBY_ENV_TEST_Q", result[0] + assert_equal "rassoc_unique_val", result[1] + assert_nil ENV.rassoc("MRUBY_ENV_NEVER_EXISTS_VALUE") + ENV.delete("MRUBY_ENV_TEST_Q") +end + +assert('ENV.key') do + ENV["MRUBY_ENV_TEST_R"] = "key_search_val" + assert_equal "MRUBY_ENV_TEST_R", ENV.key("key_search_val") + assert_nil ENV.key("MRUBY_ENV_NEVER_EXISTS_VALUE") + ENV.delete("MRUBY_ENV_TEST_R") +end + +assert('ENV.inspect') do + s = ENV.inspect + assert_kind_of String, s +end + +assert('ENV.select/filter') do + ENV["MRUBY_ENV_TEST_S"] = "sel_val" + result = ENV.select { |k, v| k == "MRUBY_ENV_TEST_S" } + assert_kind_of Hash, result + assert_equal "sel_val", result["MRUBY_ENV_TEST_S"] + ENV.delete("MRUBY_ENV_TEST_S") +end + +assert('ENV.reject') do + ENV["MRUBY_ENV_TEST_T"] = "rej_val" + result = ENV.reject { |k, v| k != "MRUBY_ENV_TEST_T" } + assert_equal({"MRUBY_ENV_TEST_T" => "rej_val"}, result) + ENV.delete("MRUBY_ENV_TEST_T") +end + +assert('ENV.replace') do + ENV["MRUBY_ENV_TEST_U1"] = "old1" + ENV.replace({"MRUBY_ENV_TEST_U2" => "new2"}) + assert_nil ENV["MRUBY_ENV_TEST_U1"] + assert_equal "new2", ENV["MRUBY_ENV_TEST_U2"] + ENV.delete("MRUBY_ENV_TEST_U2") +end + +assert('ENV.update/merge!') do + ENV["MRUBY_ENV_TEST_V"] = "orig" + ENV.update({"MRUBY_ENV_TEST_V" => "updated", "MRUBY_ENV_TEST_W" => "new"}) + assert_equal "updated", ENV["MRUBY_ENV_TEST_V"] + assert_equal "new", ENV["MRUBY_ENV_TEST_W"] + ENV.delete("MRUBY_ENV_TEST_V") + ENV.delete("MRUBY_ENV_TEST_W") +end + +assert('ENV.update with block') do + ENV["MRUBY_ENV_TEST_X"] = "old" + ENV.update({"MRUBY_ENV_TEST_X" => "new"}) { |k, o, n| "#{o}_#{n}" } + assert_equal "old_new", ENV["MRUBY_ENV_TEST_X"] + ENV.delete("MRUBY_ENV_TEST_X") +end + +assert('ENV.slice') do + ENV["MRUBY_ENV_TEST_Y"] = "s1" + ENV["MRUBY_ENV_TEST_Z"] = "s2" + h = ENV.slice("MRUBY_ENV_TEST_Y", "MRUBY_ENV_TEST_Z", "MRUBY_ENV_TEST_NONEXISTENT") + assert_equal({"MRUBY_ENV_TEST_Y" => "s1", "MRUBY_ENV_TEST_Z" => "s2"}, h) + ENV.delete("MRUBY_ENV_TEST_Y") + ENV.delete("MRUBY_ENV_TEST_Z") +end + +assert('ENV.freeze raises TypeError') do + assert_raise(TypeError) { ENV.freeze } +end + +assert('ENV raises TypeError for non-string key') do + assert_raise(TypeError) { ENV[123] } +end + +assert('ENV raises TypeError for non-string value') do + assert_raise(TypeError) { ENV["MRUBY_ENV_TEST_ERR"] = 123 } +end + +assert('ENV.each returns enumerator without block') do + e = ENV.each + assert_kind_of Enumerator, e +end + +assert('ENV is Enumerable') do + assert_true ENV.is_a?(Enumerable) +end diff --git a/mrbgems/mruby-errno/README.md b/mrbgems/mruby-errno/README.md index aa9a782b5e..89b687991d 100644 --- a/mrbgems/mruby-errno/README.md +++ b/mrbgems/mruby-errno/README.md @@ -2,6 +2,64 @@ Errno module for mruby +This mrbgem provides the `Errno` module, which allows mruby programs to access system error numbers and their corresponding error messages. This is particularly useful for handling errors returned by underlying system calls. + +## Functionality + +The `mruby-errno` gem defines the `Errno` module and the `SystemCallError` class. + +### Accessing Error Constants + +The `Errno` module provides constants for various system errors. Each error constant corresponds to a specific system error number. + +Example: + +```ruby +p Errno::EPERM::Errno # Output: 1 (or the system-specific value for EPERM) +p Errno::EPERM.new.message # Output: "Operation not permitted" +``` + +### Raising and Rescuing System Call Errors + +The `SystemCallError` class is the base class for system call errors. You can raise instances of `SystemCallError` or its subclasses (like `Errno::EPERM`) and rescue them in your code. + +Example: + +```ruby +begin + raise Errno::EACCES, "my_method" +rescue SystemCallError => e + p e.class # Output: Errno::EACCES + p e.errno # Output: 13 (or the system-specific value for EACCES) + p e.message # Output: "Permission denied - my_method" + p e.to_s # Output: "Permission denied - my_method" +end + +# You can also rescue specific Errno constants +begin + # Simulate a system call that might fail + # For example, trying to open a file without permissions + raise Errno::ENOENT, "missing_file.txt" +rescue Errno::ENOENT => e + puts "Caught an ENOENT error: #{e.message}" + # Output: Caught an ENOENT error: No such file or directory - missing_file.txt +end +``` + +### Errno::NOERROR + +The `Errno::NOERROR` constant represents the absence of an error, typically with a value of 0. + +Example: + +```ruby +p Errno::NOERROR::Errno # Output: 0 +``` + +## Defining Error Types + +New error types and their corresponding numbers are defined in the `known_errors.def` file. The `gen.rb` script processes this file to generate the necessary C and Ruby code for the `Errno` module. This allows `mruby-errno` to be easily extended with new system error definitions. + ## License Copyright (c) 2013 Internet Initiative Japan Inc. diff --git a/mrbgems/mruby-errno/mrblib/errno.rb b/mrbgems/mruby-errno/mrblib/errno.rb index 52899396b0..b85f705eee 100644 --- a/mrbgems/mruby-errno/mrblib/errno.rb +++ b/mrbgems/mruby-errno/mrblib/errno.rb @@ -1,14 +1,49 @@ module Errno + # + # call-seq: + # Errno.const_defined?(name) -> true or false + # + # Returns true if the given name is defined as an errno constant, + # false otherwise. This method checks both system-defined errno + # constants and those defined by the superclass. + # + # Errno.const_defined?(:ENOENT) #=> true + # Errno.const_defined?(:EINVAL) #=> true + # Errno.const_defined?(:UNKNOWN) #=> false + # def Errno.const_defined?(name) __errno_defined?(name) or super end + # + # call-seq: + # Errno.const_missing(name) -> errno_class + # + # Called when an undefined constant is referenced. This method + # attempts to define the errno constant if it exists in the system, + # otherwise delegates to the superclass. + # + # Errno::ENOENT # triggers const_missing if not yet defined + # #=> Errno::ENOENT + # def Errno.const_missing(name) __errno_define(name) or super end - # Module#constants is defined in mruby-metaprog - # So, it may be raised NoMethodError + # + # call-seq: + # Errno.constants -> array + # + # Returns an array of all errno constant names available on the system. + # This includes both already defined constants and those that can be + # dynamically defined. + # + # Errno.constants + # #=> [:EPERM, :ENOENT, :ESRCH, :EINTR, :EIO, ...] + # + # Note: Module#constants is defined in mruby-metaprog, so this method + # may raise NoMethodError if that gem is not available. + # def Errno.constants __errno_list(super) end diff --git a/mrbgems/mruby-errno/src/errno.c b/mrbgems/mruby-errno/src/errno.c index 43d4a1eaac..881be2b134 100644 --- a/mrbgems/mruby-errno/src/errno.c +++ b/mrbgems/mruby-errno/src/errno.c @@ -1,24 +1,18 @@ -#include "mruby.h" -#include "mruby/array.h" -#include "mruby/class.h" -#include "mruby/error.h" -#include "mruby/hash.h" -#include "mruby/numeric.h" -#include "mruby/string.h" -#include "mruby/variable.h" -#include "mruby/internal.h" -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include static const struct { -#ifdef MRB_NO_PRESYM -#define itsdefined(name, sym) { #name, name }, - const char *name; -#else #define itsdefined(name, sym) { sym, name }, mrb_sym sym; -#endif int eno; } e2c[] = { #define itsnotdefined(name, sym) @@ -28,13 +22,8 @@ static const struct { }; static const struct { -#ifdef MRB_NO_PRESYM -#define itsnotdefined(name, sym) { #name }, - const char *name; -#else #define itsnotdefined(name, sym) { sym }, mrb_sym sym; -#endif } noe2c[] = { #define itsdefined(name, sym) #include "known_errors_def.cstub" @@ -42,11 +31,7 @@ static const struct { #undef itsnotdefined }; -#ifdef MRB_NO_PRESYM -#define ENTRY_SYM(e) mrb_intern_static(mrb, (e).name, strlen((e).name)) -#else #define ENTRY_SYM(e) (e).sym -#endif #define E2C_LEN (sizeof(e2c) / sizeof(e2c[0])) #define NOE2C_LEN (sizeof(noe2c) / sizeof(noe2c[0])) @@ -71,39 +56,10 @@ mrb_errno_define_exxx(mrb_state *mrb, mrb_sym name, int eno) return e; } -#ifndef MRB_NO_PRESYM typedef mrb_sym sym_ref; #define sym_ref_init(mrb, id) (id) #define errno_name_matched_p(errentry, ref) ((errentry).sym == *(ref)) -#else -typedef struct { - const char *name; - size_t len; -} sym_ref; - -static sym_ref -sym_ref_init(mrb_state *mrb, mrb_sym id) -{ - mrb_int len = 0; - const char *name = mrb_sym_name_len(mrb, id, &len); - sym_ref ename = { name, (size_t)len }; - return ename; -} - -#define errno_name_matched_p(errentry, ref) errno_name_matched_p_0((errentry).name, (ref)) -static mrb_bool -errno_name_matched_p_0(const char *name, const sym_ref *ref) -{ - if (ref->len == strlen(name) && memcmp(ref->name, name, ref->len) == 0) { - return TRUE; - } - else { - return FALSE; - } -} -#endif // MRB_NO_PRESYM - static mrb_bool ary_included_in_head(mrb_state *mrb, mrb_value ary, mrb_value obj, mrb_ssize head) { @@ -118,6 +74,10 @@ ary_included_in_head(mrb_state *mrb, mrb_value ary, mrb_value obj, mrb_ssize hea return FALSE; } +/* + * Internal method used by the Errno module to check if a specific + * error constant exists on this platform. + */ static mrb_value mrb_errno_defined_p(mrb_state *mrb, mrb_value self) { @@ -140,6 +100,10 @@ mrb_errno_defined_p(mrb_state *mrb, mrb_value self) return mrb_false_value(); } +/* + * Internal method used by the Errno module to define errno classes + * for error constants that exist on this platform. + */ static mrb_value mrb_errno_define(mrb_state *mrb, mrb_value self) { @@ -163,13 +127,16 @@ mrb_errno_define(mrb_state *mrb, mrb_value self) return mrb_nil_value(); } +/* + * Internal method used by the Errno module to populate an array + * with all errno symbols available on this platform. + */ static mrb_value mrb_errno_list(mrb_state *mrb, mrb_value self) { mrb_value list; mrb_get_args(mrb, "A", &list); - mrb_ensure_array_type(mrb, list); mrb_ary_modify(mrb, mrb_ary_ptr(list)); mrb_ssize head = RARRAY_LEN(list); @@ -224,6 +191,16 @@ mrb_sce_init(mrb_state *mrb, mrb_value self, mrb_value m, mrb_value no) mrb_exc_mesg_set(mrb, mrb_exc_ptr(self), str); } +/* + * call-seq: + * errno_class.new(message = nil) -> errno_exception + * + * Creates a new instance of a specific errno exception class. + * The optional message parameter provides additional context. + * + * Errno::ENOENT.new #=> # + * Errno::ENOENT.new("custom message") #=> # + */ static mrb_value mrb_exxx_init(mrb_state *mrb, mrb_value self) { @@ -234,6 +211,21 @@ mrb_exxx_init(mrb_state *mrb, mrb_value self) return self; } +/* + * call-seq: + * SystemCallError.new(message) -> system_call_error + * SystemCallError.new(errno) -> system_call_error + * SystemCallError.new(message, errno) -> system_call_error + * + * Creates a new SystemCallError exception. Can be called with: + * - A message string only + * - An errno number only + * - Both a message string and errno number + * + * SystemCallError.new("custom error") #=> # + * SystemCallError.new(2) #=> # + * SystemCallError.new("failed", 2) #=> # + */ static mrb_value mrb_sce_init_m(mrb_state *mrb, mrb_value self) { @@ -256,6 +248,19 @@ mrb_sce_init_m(mrb_state *mrb, mrb_value self) return self; } +/* + * call-seq: + * system_call_error.errno -> integer or nil + * + * Returns the errno number associated with this SystemCallError. + * Returns nil if no errno was set. + * + * begin + * File.open("/nonexistent") + * rescue SystemCallError => e + * e.errno #=> 2 (ENOENT) + * end + */ static mrb_value mrb_sce_errno(mrb_state *mrb, mrb_value self) { @@ -266,29 +271,37 @@ mrb_sce_errno(mrb_state *mrb, mrb_value self) sym = MRB_SYM(Errno); if (mrb_const_defined_at(mrb, mrb_obj_value(c), sym)) { return mrb_const_get(mrb, mrb_obj_value(c), sym); - } else { + } + else { sym = MRB_SYM(errno); return mrb_attr_get(mrb, self, sym); } } +/* + * call-seq: + * SystemCallError._sys_fail(errno, message = nil) + * + * Internal method that raises a SystemCallError with the given errno + * and optional message. This method does not return as it raises an exception. + * + * SystemCallError._sys_fail(2) # raises Errno::ENOENT + * SystemCallError._sys_fail(2, "failed") # raises Errno::ENOENT with message + */ static mrb_value mrb_sce_sys_fail(mrb_state *mrb, mrb_value cls) { - struct RClass *sce; mrb_value msg, no; - mrb_int argc; mrb->c->ci->mid = 0; - sce = mrb_class_ptr(cls); - argc = mrb_get_args(mrb, "o|S", &no, &msg); + struct RClass *sce = mrb_class_ptr(cls); + mrb_int argc = mrb_get_args(mrb, "o|S", &no, &msg); struct RBasic* e = mrb_obj_alloc(mrb, MRB_TT_EXCEPTION, sce); mrb_value exc = mrb_obj_value(e); if (argc == 1) { msg = mrb_nil_value(); } - exc = mrb_obj_value(e); mrb_sce_init(mrb, exc, msg, no); mrb_exc_raise(mrb, exc); return mrb_nil_value(); /* NOTREACHED */ @@ -297,23 +310,19 @@ mrb_sce_sys_fail(mrb_state *mrb, mrb_value cls) void mrb_mruby_errno_gem_init(mrb_state *mrb) { - struct RClass *e, *eno, *sce, *ste; - - ste = mrb->eStandardError_class; - - sce = mrb_define_class(mrb, "SystemCallError", ste); - mrb_define_class_method(mrb, sce, "_sys_fail", mrb_sce_sys_fail, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, sce, "errno", mrb_sce_errno, MRB_ARGS_NONE()); - mrb_define_method(mrb, sce, "initialize", mrb_sce_init_m, MRB_ARGS_ARG(1, 1)); + struct RClass *sce = mrb_define_class_id(mrb, MRB_SYM(SystemCallError), E_STANDARD_ERROR); + mrb_define_class_method_id(mrb, sce, MRB_SYM(_sys_fail), mrb_sce_sys_fail, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, sce, MRB_SYM(errno), mrb_sce_errno, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, sce, MRB_SYM(initialize), mrb_sce_init_m, MRB_ARGS_OPT(2)); - eno = mrb_define_module_id(mrb, MRB_SYM(Errno)); - mrb_define_class_method(mrb, eno, "__errno_defined?", mrb_errno_defined_p, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, eno, "__errno_define", mrb_errno_define, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, eno, "__errno_list", mrb_errno_list, MRB_ARGS_REQ(1)); + struct RClass *eno = mrb_define_module_id(mrb, MRB_SYM(Errno)); + mrb_define_class_method_id(mrb, eno, MRB_SYM_Q(__errno_defined), mrb_errno_defined_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, eno, MRB_SYM(__errno_define), mrb_errno_define, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, eno, MRB_SYM(__errno_list), mrb_errno_list, MRB_ARGS_REQ(1)); - e = mrb_define_class_under_id(mrb, eno, MRB_SYM(NOERROR), sce); + struct RClass *e = mrb_define_class_under_id(mrb, eno, MRB_SYM(NOERROR), sce); mrb_define_const_id(mrb, e, MRB_SYM(Errno), mrb_fixnum_value(0)); - //mrb_define_method(mrb, e, "===", mrb_exxx_cmp, MRB_ARGS_REQ(1)); + //mrb_define_method_id(mrb, e, MRB_SYM(eqq), mrb_exxx_cmp, MRB_ARGS_REQ(1)); // Pre-allocation for Errno::ENOMEM only mrb_errno_define_exxx(mrb, MRB_SYM(ENOMEM), ENOMEM); diff --git a/mrbgems/mruby-errno/test/errno.rb b/mrbgems/mruby-errno/test/errno.rb index a19f8abafa..0bbb7e04ff 100644 --- a/mrbgems/mruby-errno/test/errno.rb +++ b/mrbgems/mruby-errno/test/errno.rb @@ -24,7 +24,7 @@ end assert('SystemCallError#inspect') do - assert_equal("unknown error - a (SystemCallError)", SystemCallError.new("a").inspect) + assert_equal("#", SystemCallError.new("a").inspect) end assert('Errno::NOERROR') do @@ -50,9 +50,8 @@ end assert('Errno::EPERM#inspect') do - msg = Errno::EPERM.new.message - assert_equal("#{msg} (Errno::EPERM)", Errno::EPERM.new.inspect) - - msg = Errno::EPERM.new.message - assert_equal("#{msg} - a (Errno::EPERM)", Errno::EPERM.new("a").inspect) + e = Errno::EPERM.new + msg = e.message + assert_equal("#", e.inspect) + assert_equal("#", Errno::EPERM.new("a").inspect) end diff --git a/mrbgems/mruby-error/README.md b/mrbgems/mruby-error/README.md new file mode 100644 index 0000000000..a73bb10a32 --- /dev/null +++ b/mrbgems/mruby-error/README.md @@ -0,0 +1,103 @@ +# mruby-error + +The `mruby-error` mrbgem provides a set of C-level APIs for structured exception handling within mruby. These functions allow C extensions or embedded mruby code to implement error handling patterns similar to Ruby's `begin`, `rescue`, and `ensure` keywords. This is particularly useful when writing C code that needs to interact with mruby's exception system in a robust way. + +## `mrb_protect` + +The `mrb_protect` function is used to execute a C function (`body`) and capture any exceptions that might be raised during its execution. This allows you to run potentially unsafe operations and handle errors gracefully. + +**C Signature:** + +```c +mrb_value mrb_protect(mrb_state *mrb, mrb_func_t body, mrb_value data, mrb_bool *state); +``` + +**Parameters:** + +- `mrb_state *mrb`: The current mruby state. +- `mrb_func_t body`: A function pointer to the C function to be executed. This function should have the signature `mrb_value (*body)(mrb_state *mrb, mrb_value data)`. +- `mrb_value data`: A `mrb_value` that will be passed as an argument to the `body` function. +- `mrb_bool *state`: A pointer to a boolean. After `mrb_protect` returns, this boolean will be: + - `FALSE` (0) if the `body` function executed without raising an exception. + - `TRUE` (1) if an exception was raised within the `body` function. + +**Return Value:** + +- If no exception occurs (`*state` is `FALSE`), `mrb_protect` returns the value returned by the `body` function. +- If an exception occurs (`*state` is `TRUE`), `mrb_protect` returns the exception object. + +## `mrb_ensure` + +The `mrb_ensure` function ensures that a specific C function (`ensure`) is executed, regardless of whether another C function (`body`) completes normally or raises an exception. This is analogous to Ruby's `ensure` clause. + +**C Signature:** + +```c +mrb_value mrb_ensure(mrb_state *mrb, mrb_func_t body, mrb_value b_data, mrb_func_t ensure, mrb_value e_data); +``` + +**Parameters:** + +- `mrb_state *mrb`: The current mruby state. +- `mrb_func_t body`: A function pointer to the main C function to be executed. +- `mrb_value b_data`: A `mrb_value` passed as data to the `body` function. +- `mrb_func_t ensure`: A function pointer to the C function that will always be executed after the `body` function. +- `mrb_value e_data`: A `mrb_value` passed as data to the `ensure` function. + +**Behavior:** +The `body` function is executed first. After its completion (either normally or due to an exception), the `ensure` function is executed. If the `body` function raised an exception, that exception is re-thrown after the `ensure` function has finished. The return value of `mrb_ensure` is the result of the `body` function if no exception occurred. + +## `mrb_rescue` + +The `mrb_rescue` function executes a C function (`body`) and, if an exception that is a `StandardError` (or a subclass of `StandardError`) is raised, it executes a specified `rescue` C function. This is similar to a `rescue` clause in Ruby that doesn't specify a particular exception type (which defaults to `StandardError`). + +**C Signature:** + +```c +mrb_value mrb_rescue(mrb_state *mrb, mrb_func_t body, mrb_value b_data, mrb_func_t rescue, mrb_value r_data); +``` + +**Parameters:** + +- `mrb_state *mrb`: The current mruby state. +- `mrb_func_t body`: A function pointer to the main C function to be executed. +- `mrb_value b_data`: A `mrb_value` passed as data to the `body` function. +- `mrb_func_t rescue`: A function pointer to the C function that will be executed if a `StandardError` (or its subclass) is caught. +- `mrb_value r_data`: A `mrb_value` passed as data to the `rescue` function. + +**Behavior:** +If the `body` function executes without raising an exception, its result is returned. If a `StandardError` (or one of its descendants) is raised, the `rescue` function is executed, and its result becomes the return value of `mrb_rescue`. If an exception occurs that is not a `StandardError` (or its subclass), it is not caught by this function and will propagate up the call stack. Similarly, if the `rescue` block itself raises an exception, that exception will propagate. + +## `mrb_rescue_exceptions` + +The `mrb_rescue_exceptions` function provides more fine-grained exception handling than `mrb_rescue`. It executes a C function (`body`) and, if an exception matching one of the specified classes is raised, it executes a `rescue` C function. This is analogous to Ruby's `rescue SpecificError1, SpecificError2 => e` syntax. + +**C Signature:** + +```c +mrb_value mrb_rescue_exceptions(mrb_state *mrb, mrb_func_t body, mrb_value b_data, mrb_func_t rescue, mrb_value r_data, mrb_int len, struct RClass **classes); +``` + +**Parameters:** + +- `mrb_state *mrb`: The current mruby state. +- `mrb_func_t body`: A function pointer to the main C function to be executed. +- `mrb_value b_data`: A `mrb_value` passed as data to the `body` function. +- `mrb_func_t rescue`: A function pointer to the C function that will be executed if a matching exception is caught. +- `mrb_value r_data`: A `mrb_value` passed as data to the `rescue` function. +- `mrb_int len`: The number of exception classes provided in the `classes` array. +- `struct RClass **classes`: An array of pointers to mruby `RClass` objects representing the exception classes to be rescued. + +**Behavior:** +If the `body` function executes without raising an exception, its result is returned. If an exception is raised that is an instance of one of the classes specified in the `classes` array (or a subclass of one of them), the `rescue` function is executed, and its result becomes the return value of `mrb_rescue_exceptions`. If an exception occurs that does not match any of the specified classes, it is not caught and will propagate. If the `rescue` block itself raises an exception, that exception will propagate. + +## Usage Examples + +The C functions provided by this mrbgem are typically used when writing mruby C extensions that need to interact with Ruby code or manage resources carefully in the presence of potential exceptions. + +For concrete examples of how these functions are used, please refer to the test files within this mrbgem: + +- `test/exception.c`: Shows how these C functions are called directly. +- `test/exception.rb`: Demonstrates the behavior of these C functions from the Ruby side, through the `ExceptionTest` module (defined in `test/exception.c`). + +These tests illustrate how to set up callback functions and how the error handling mechanisms behave in practice. diff --git a/mrbgems/mruby-error/src/exception.c b/mrbgems/mruby-error/src/exception.c index ffa7d53bf8..b2b280356f 100644 --- a/mrbgems/mruby-error/src/exception.c +++ b/mrbgems/mruby-error/src/exception.c @@ -1,11 +1,15 @@ #include #include +/* Helper structure to pass function and data to protection wrapper */ struct protect_data { - mrb_func_t body; - mrb_value data; + mrb_func_t body; /* Function to be executed under protection */ + mrb_value data; /* Data to be passed to the function */ }; +/* Helper function that wraps user function calls for exception protection. + * Extracts function and data from protect_data structure and calls the + * user function with proper parameters. Used internally by mrb_protect. */ static mrb_value protect_body(mrb_state *mrb, void *p) { @@ -13,6 +17,19 @@ protect_body(mrb_state *mrb, void *p) return dp->body(mrb, dp->data); } +/* + * Executes a function under exception protection. + * + * @param mrb mruby state + * @param body Function to execute under protection + * @param data Data to pass to the function + * @param state Pointer to store exception state (true if exception occurred) + * @return Return value from body function, or exception object if error occurred + * + * This function provides a C API equivalent to Ruby's begin/rescue blocks. + * If an exception occurs during execution of body, it will be caught and + * the exception object returned, with *state set to true. + */ MRB_API mrb_value mrb_protect(mrb_state *mrb, mrb_func_t body, mrb_value data, mrb_bool *state) { @@ -20,6 +37,21 @@ mrb_protect(mrb_state *mrb, mrb_func_t body, mrb_value data, mrb_bool *state) return mrb_protect_error(mrb, protect_body, &protect_data, state); } +/* + * Executes a function with guaranteed cleanup (ensure block). + * + * @param mrb mruby state + * @param body Main function to execute + * @param b_data Data to pass to body function + * @param ensure Cleanup function that always executes + * @param e_data Data to pass to ensure function + * @return Return value from body function + * + * This function provides a C API equivalent to Ruby's begin/ensure blocks. + * The ensure function is guaranteed to execute regardless of whether the + * body function completes normally or raises an exception. If an exception + * occurs in the body, it will be re-raised after the ensure block executes. + */ MRB_API mrb_value mrb_ensure(mrb_state *mrb, mrb_func_t body, mrb_value b_data, mrb_func_t ensure, mrb_value e_data) { @@ -36,6 +68,20 @@ mrb_ensure(mrb_state *mrb, mrb_func_t body, mrb_value b_data, mrb_func_t ensure, return result; } +/* + * Executes a function with exception handling for StandardError and its subclasses. + * + * @param mrb mruby state + * @param body Main function to execute + * @param b_data Data to pass to body function + * @param rescue Exception handler function + * @param r_data Data to pass to rescue function + * @return Return value from body function, or rescue function if StandardError occurred + * + * This function provides a C API equivalent to Ruby's begin/rescue blocks that + * catch StandardError. It's a convenience wrapper around mrb_rescue_exceptions + * that automatically handles StandardError and its subclasses. + */ MRB_API mrb_value mrb_rescue(mrb_state *mrb, mrb_func_t body, mrb_value b_data, mrb_func_t rescue, mrb_value r_data) @@ -43,6 +89,22 @@ mrb_rescue(mrb_state *mrb, mrb_func_t body, mrb_value b_data, return mrb_rescue_exceptions(mrb, body, b_data, rescue, r_data, 1, &mrb->eStandardError_class); } +/* + * Executes a function with exception handling for specific exception classes. + * + * @param mrb mruby state + * @param body Main function to execute + * @param b_data Data to pass to body function + * @param rescue Exception handler function + * @param r_data Data to pass to rescue function + * @param len Number of exception classes to handle + * @param classes Array of exception classes to catch + * @return Return value from body function, or rescue function if matching exception occurred + * + * This function provides a C API equivalent to Ruby's begin/rescue blocks with + * specific exception class handling. Only exceptions that are instances of the + * specified classes will be caught; others will be re-raised. + */ MRB_API mrb_value mrb_rescue_exceptions(mrb_state *mrb, mrb_func_t body, mrb_value b_data, mrb_func_t rescue, mrb_value r_data, mrb_int len, struct RClass **classes) @@ -53,7 +115,7 @@ mrb_rescue_exceptions(mrb_state *mrb, mrb_func_t body, mrb_value b_data, mrb_fun mrb_value result = mrb_protect_error(mrb, protect_body, &protect_data, &error); if (error) { mrb_bool error_matched = FALSE; - for (mrb_int i = 0; i < len; ++i) { + for (mrb_int i = 0; i < len; i++) { if (mrb_obj_is_kind_of(mrb, result, classes[i])) { error_matched = TRUE; break; diff --git a/mrbgems/mruby-error/test/exception.c b/mrbgems/mruby-error/test/exception.c index 4de0e96076..e5292bba28 100644 --- a/mrbgems/mruby-error/test/exception.c +++ b/mrbgems/mruby-error/test/exception.c @@ -12,9 +12,10 @@ static mrb_value run_protect(mrb_state *mrb, mrb_value self) { mrb_value b; - mrb_value ret[2]; - mrb_bool state; mrb_get_args(mrb, "&", &b); + + mrb_bool state; + mrb_value ret[2]; ret[0] = mrb_protect(mrb, protect_cb, b, &state); ret[1] = mrb_bool_value(state); return mrb_ary_new_from_values(mrb, 2, ret); diff --git a/mrbgems/mruby-eval/README.md b/mrbgems/mruby-eval/README.md new file mode 100644 index 0000000000..638874190b --- /dev/null +++ b/mrbgems/mruby-eval/README.md @@ -0,0 +1,26 @@ +# mruby-eval + +This mrbgem provides methods for evaluating Ruby code from strings in mruby. + +## Features + +The `mruby-eval` gem implements the following methods: + +- **`eval`**: Evaluates a string as Ruby code. This is a private method on `Kernel`. +- **`instance_eval`**: Evaluates a string or a block within the context of an object. This is a method on `BasicObject`. +- **`class_eval` / `module_eval`**: Evaluates a string or a block within the context of a class or module. This is a method on `Module`. +- **`Binding#eval`**: Evaluates a string within the context of a `Binding` object. + +## Usage + +For detailed usage and examples, please refer to the standard Ruby documentation for these methods. The mruby implementation aims to be compatible with the behavior of these methods in CRuby. + +## Contributing + +Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change. + +Please make sure to update tests as appropriate. + +## License + +mruby-eval is released under the MIT License. See the [LICENSE](../../LICENSE) file in the main mruby repository for details. diff --git a/mrbgems/mruby-eval/mrbgem.rake b/mrbgems/mruby-eval/mrbgem.rake index cb8835b324..d2986fd3b9 100644 --- a/mrbgems/mruby-eval/mrbgem.rake +++ b/mrbgems/mruby-eval/mrbgem.rake @@ -4,4 +4,7 @@ MRuby::Gem::Specification.new('mruby-eval') do |spec| spec.summary = 'standard Kernel#eval method' add_dependency 'mruby-compiler', :core => 'mruby-compiler' + add_dependency 'mruby-binding', :core => 'mruby-binding' + spec.add_test_dependency('mruby-metaprog', :core => 'mruby-metaprog') + spec.add_test_dependency('mruby-method', :core => 'mruby-method') end diff --git a/mrbgems/mruby-eval/src/eval.c b/mrbgems/mruby-eval/src/eval.c index 5fead153b7..24fee236f0 100644 --- a/mrbgems/mruby-eval/src/eval.c +++ b/mrbgems/mruby-eval/src/eval.c @@ -1,21 +1,27 @@ #include +#include #include #include #include #include #include #include -#include #include #include -struct REnv *mrb_env_new(mrb_state *mrb, struct mrb_context *c, mrb_callinfo *ci, int nstacks, mrb_value *stack, struct RClass *tc); -void mrb_codedump_all(mrb_state*, struct RProc*); +/* provided by mruby-binding */ +mrb_bool mrb_binding_p(mrb_state *mrb, mrb_value binding); +const struct RProc * mrb_binding_extract_proc(mrb_state *mrb, mrb_value binding); +struct REnv * mrb_binding_extract_env(mrb_state *mrb, mrb_value binding); + +/* provided by mruby-compiler */ +typedef mrb_bool mrb_parser_foreach_top_variable_func(mrb_state *mrb, mrb_sym sym, void *user); +void mrb_parser_foreach_top_variable(mrb_state *mrb, struct mrb_parser_state *p, mrb_parser_foreach_top_variable_func *func, void *user); static struct RProc* create_proc_from_string(mrb_state *mrb, const char *s, mrb_int len, mrb_value binding, const char *file, mrb_int line) { - mrbc_context *cxt; + mrb_ccontext *cxt; struct mrb_parser_state *p; struct RProc *proc; const struct RProc *scope; @@ -25,22 +31,16 @@ create_proc_from_string(mrb_state *mrb, const char *s, mrb_int len, mrb_value bi struct mrb_context *c = mrb->c; if (!mrb_nil_p(binding)) { - mrb_value scope_obj; - if (!mrb_class_defined_id(mrb, MRB_SYM(Binding)) - || !mrb_obj_is_kind_of(mrb, binding, mrb_class_get_id(mrb, MRB_SYM(Binding)))) { + if (!mrb_binding_p(mrb, binding)) { mrb_raisef(mrb, E_TYPE_ERROR, "wrong argument type %C (expected binding)", - mrb_obj_class(mrb, binding)); + mrb_obj_class(mrb, binding)); } - scope_obj = mrb_iv_get(mrb, binding, MRB_SYM(proc)); - mrb_assert(mrb_proc_p(scope_obj)); - scope = mrb_proc_ptr(scope_obj); + scope = mrb_binding_extract_proc(mrb, binding); if (MRB_PROC_CFUNC_P(scope)) { e = NULL; } else { - mrb_value env = mrb_iv_get(mrb, binding, MRB_SYM(env)); - mrb_assert(mrb_env_p(env)); - e = (struct REnv *)mrb_obj_ptr(env); + e = mrb_binding_extract_env(mrb, binding); mrb_assert(e != NULL); } } @@ -59,10 +59,10 @@ create_proc_from_string(mrb_state *mrb, const char *s, mrb_int len, mrb_value bi file = "(eval)"; } - cxt = mrbc_context_new(mrb); + cxt = mrb_ccontext_new(mrb); cxt->lineno = (uint16_t)line; - mrbc_filename(mrb, cxt, file); + mrb_ccontext_filename(mrb, cxt, file); cxt->capture_errors = TRUE; cxt->no_optimize = TRUE; cxt->upper = scope && MRB_PROC_CFUNC_P(scope) ? NULL : scope; @@ -71,7 +71,7 @@ create_proc_from_string(mrb_state *mrb, const char *s, mrb_int len, mrb_value bi /* only occur when memory ran out */ if (!p) { - mrbc_context_free(mrb, cxt); + mrb_ccontext_free(mrb, cxt); mrb_raise(mrb, E_RUNTIME_ERROR, "Failed to create parser state (out of memory)"); } @@ -79,7 +79,7 @@ create_proc_from_string(mrb_state *mrb, const char *s, mrb_int len, mrb_value bi /* parse error */ mrb_value str; - mrbc_context_free(mrb, cxt); + mrb_ccontext_free(mrb, cxt); if (!p->error_buffer[0].message) { mrb_parser_free(p); mrb_raise(mrb, E_SYNTAX_ERROR, "compile error"); @@ -103,7 +103,7 @@ create_proc_from_string(mrb_state *mrb, const char *s, mrb_int len, mrb_value bi if (proc == NULL) { /* codegen error */ mrb_parser_free(p); - mrbc_context_free(mrb, cxt); + mrb_ccontext_free(mrb, cxt); mrb_raise(mrb, E_SCRIPT_ERROR, "codegen error"); } if (c->ci > c->cibase) { @@ -133,22 +133,149 @@ create_proc_from_string(mrb_state *mrb, const char *s, mrb_int len, mrb_value bi /* mrb_codedump_all(mrb, proc); */ mrb_parser_free(p); - mrbc_context_free(mrb, cxt); + mrb_ccontext_free(mrb, cxt); return proc; } static mrb_value -exec_irep(mrb_state *mrb, mrb_value self, struct RProc *proc) +eval_irep(mrb_state *mrb, mrb_value self, struct RProc *proc) { + mrb_callinfo *ci = mrb->c->ci; + /* no argument passed from eval() */ - mrb->c->ci->n = 0; - mrb->c->ci->nk = 0; + ci->n = 0; + ci->nk = 0; + /* clear visibility */ + MRB_CI_SET_VISIBILITY_BREAK(ci); /* clear block */ - mrb->c->ci->stack[1] = mrb_nil_value(); + ci->stack[1] = mrb_nil_value(); return mrb_exec_irep(mrb, self, proc); } +static void +binding_eval_error_check(mrb_state *mrb, struct mrb_parser_state *p, const char *file) +{ + if (!p) { + mrb_raise(mrb, E_RUNTIME_ERROR, "Failed to create parser state (out of memory)"); + } + + if (0 < p->nerr) { + if (p->mrb->exc) { + mrb_exc_raise(mrb, mrb_obj_value(p->mrb->exc)); + } + + mrb_value str; + + if (file) { + str = mrb_format(mrb, "file %s line %d: %s", + file, + p->error_buffer[0].lineno, + p->error_buffer[0].message); + } + else { + str = mrb_format(mrb, "line %d: %s", + p->error_buffer[0].lineno, + p->error_buffer[0].message); + } + mrb_exc_raise(mrb, mrb_exc_new_str(mrb, E_SYNTAX_ERROR, str)); + } +} + +#define LV_BUFFERS 8 + +struct expand_lvspace { + mrb_irep *irep; + struct REnv *env; + int numvar; + mrb_sym syms[LV_BUFFERS]; +}; + +static mrb_bool +expand_lvspace(mrb_state *mrb, mrb_sym sym, void *user) +{ + struct expand_lvspace *p = (struct expand_lvspace*)user; + mrb_int symlen; + const char *symname = mrb_sym_name_len(mrb, sym, &symlen); + + if (symname && symlen > 0) { + if (symname[0] != '&' && symname[0] != '*') { + p->syms[p->numvar++] = sym; + if (p->numvar >= LV_BUFFERS) { + mrb_proc_merge_lvar(mrb, p->irep, p->env, p->numvar, p->syms, NULL); + p->numvar = 0; + } + } + } + + return TRUE; +} + +struct binding_eval_prepare_body { + mrb_value binding; + const char *file; + mrb_ccontext *cxt; + struct mrb_parser_state *pstate; +}; + +static mrb_value +binding_eval_prepare_body(mrb_state *mrb, void *opaque) +{ + struct binding_eval_prepare_body *p = (struct binding_eval_prepare_body*)opaque; + binding_eval_error_check(mrb, p->pstate, p->file); + + struct expand_lvspace args = { + (mrb_irep*)p->cxt->upper->body.irep, + mrb_binding_extract_env(mrb, p->binding), + 0, + { 0 } + }; + mrb_parser_foreach_top_variable(mrb, p->pstate, expand_lvspace, &args); + if (args.numvar > 0) { + mrb_proc_merge_lvar(mrb, args.irep, args.env, args.numvar, args.syms, NULL); + } + + return mrb_nil_value(); +} + +static void +binding_eval_prepare(mrb_state *mrb, mrb_value binding, const char *expr, mrb_int exprlen, const char *file) +{ + struct binding_eval_prepare_body d = { binding }; + const struct RProc *proc = mrb_binding_extract_proc(mrb, binding); + mrb_assert(!MRB_PROC_CFUNC_P(proc)); + + d.cxt = mrb_ccontext_new(mrb); + d.file = mrb_ccontext_filename(mrb, d.cxt, file ? file : "(eval)"); + d.cxt->capture_errors = TRUE; + d.cxt->upper = proc; + d.pstate = mrb_parse_nstring(mrb, expr, exprlen, d.cxt); + + mrb_value ret; + MRB_ENSURE(mrb, ret, binding_eval_prepare_body, &d) { + if (d.pstate) mrb_parser_free(d.pstate); + if (d.cxt) mrb_ccontext_free(mrb, d.cxt); + } +} + +/* + * call-seq: + * eval(string, binding = nil, filename = nil, lineno = 1) -> obj + * + * Evaluates the Ruby expression(s) in string. If binding is given, + * which must be a Binding object, the evaluation is performed in its + * context. If filename is given, it is used for error reporting. + * If lineno is given, it is used as the starting line number for error reporting. + * + * eval("1 + 2") #=> 3 + * eval("x = 10; x * 2") #=> 20 + * + * x = 5 + * b = binding + * eval("x", b) #=> 5 + * eval("x = 100", b) #=> 100 + * x #=> 100 + */ static mrb_value f_eval(mrb_state *mrb, mrb_value self) { @@ -157,73 +284,139 @@ f_eval(mrb_state *mrb, mrb_value self) mrb_value binding = mrb_nil_value(); const char *file = NULL; mrb_int line = 1; - struct RProc *proc; mrb_get_args(mrb, "s|ozi", &s, &len, &binding, &file, &line); - proc = create_proc_from_string(mrb, s, len, binding, file, line); + if (!mrb_nil_p(binding)) { + binding_eval_prepare(mrb, binding, s, len, file); + } + struct RProc *proc = create_proc_from_string(mrb, s, len, binding, file, line); if (!mrb_nil_p(binding)) { self = mrb_iv_get(mrb, binding, MRB_SYM(recv)); } mrb_assert(!MRB_PROC_CFUNC_P(proc)); - return exec_irep(mrb, self, proc); + return eval_irep(mrb, self, proc); } +/* + * call-seq: + * obj.instance_eval(string, filename = nil, lineno = 1) -> obj + * obj.instance_eval {|obj| block } -> obj + * + * Evaluates a string containing Ruby source code, or the given block, + * within the context of the receiver (obj). In order to set the context, + * the variable self is set to obj while the code is executing, giving + * the code access to obj's instance variables and private methods. + * + * class KlassWithSecret + * def initialize + * @secret = 99 + * end + * private + * def the_secret + * "Ssssh! The secret is #{@secret}." + * end + * end + * k = KlassWithSecret.new + * k.instance_eval { @secret } #=> 99 + * k.instance_eval { the_secret } #=> "Ssssh! The secret is 99." + * k.instance_eval("@secret = 5") #=> 5 + */ static mrb_value -f_instance_eval(mrb_state *mrb, mrb_value self) +object_eval(mrb_state *mrb, mrb_value self, mrb_bool class_eval) { - if (!mrb_block_given_p(mrb)) { - const char *s; - mrb_int len; - const char *file = NULL; - mrb_int line = 1; - mrb_value cv; - struct RProc *proc; - - mrb_get_args(mrb, "s|zi", &s, &len, &file, &line); - cv = mrb_singleton_class(mrb, self); - proc = create_proc_from_string(mrb, s, len, mrb_nil_value(), file, line); - MRB_PROC_SET_TARGET_CLASS(proc, mrb_class_ptr(cv)); - mrb_assert(!MRB_PROC_CFUNC_P(proc)); - mrb_vm_ci_target_class_set(mrb->c->ci, mrb_class_ptr(cv)); - return exec_irep(mrb, self, proc); - } - else { + if (mrb_block_given_p(mrb)) { mrb_get_args(mrb, ""); - return mrb_obj_instance_eval(mrb, self); + return class_eval ? mrb_mod_module_eval(mrb, self) : mrb_obj_instance_eval(mrb, self); } + + const char *s; + mrb_int len; + const char *file = NULL; + mrb_int line = 1; + mrb_get_args(mrb, "s|zi", &s, &len, &file, &line); + + struct RClass *c = class_eval ? mrb_class_ptr(self) : mrb_singleton_class_ptr(mrb, self); + struct RProc *proc = create_proc_from_string(mrb, s, len, mrb_nil_value(), file, line); + MRB_PROC_SET_TARGET_CLASS(proc, c); + mrb_assert(!MRB_PROC_CFUNC_P(proc)); + mrb_vm_ci_target_class_set(mrb->c->ci, c); + return eval_irep(mrb, self, proc); +} + +static mrb_value +f_instance_eval(mrb_state *mrb, mrb_value self) +{ + return object_eval(mrb, self, FALSE); } +/* + * call-seq: + * mod.class_eval(string, filename = nil, lineno = 1) -> obj + * mod.class_eval {|mod| block } -> obj + * mod.module_eval(string, filename = nil, lineno = 1) -> obj + * mod.module_eval {|mod| block } -> obj + * + * Evaluates the string or block in the context of mod, except that when + * a block is given, constant/class variable lookup is not affected. + * This can be used to add methods to a class. module_eval returns the + * result of evaluating its argument. + * + * class Thing + * end + * a = %q{def hello() "Hello there!" end} + * Thing.module_eval(a) + * puts Thing.new.hello() #=> "Hello there!" + * + * Thing.class_eval("@@var = 99") + * Thing.class_eval { @@var } #=> 99 + */ static mrb_value f_class_eval(mrb_state *mrb, mrb_value self) { - if (!mrb_block_given_p(mrb)) { - const char *s; - mrb_int len; - const char *file = NULL; - mrb_int line = 1; - struct RProc *proc; - - mrb_get_args(mrb, "s|zi", &s, &len, &file, &line); - proc = create_proc_from_string(mrb, s, len, mrb_nil_value(), file, line); - MRB_PROC_SET_TARGET_CLASS(proc, mrb_class_ptr(self)); - mrb_assert(!MRB_PROC_CFUNC_P(proc)); - mrb_vm_ci_target_class_set(mrb->c->ci, mrb_class_ptr(self)); - return exec_irep(mrb, self, proc); - } - else { - mrb_get_args(mrb, ""); - return mrb_mod_module_eval(mrb, self); + return object_eval(mrb, self, TRUE); +} + +/* + * call-seq: + * binding.eval(string, filename = nil, lineno = 1) -> obj + * + * Evaluates the given string in the context of the binding. + * This is equivalent to calling eval(string, binding, filename, lineno). + * + * def get_binding(param) + * binding + * end + * b = get_binding("hello") + * b.eval("param") #=> "hello" + * b.eval("x = 10; x + param.length") #=> 15 + */ +static mrb_value +mrb_binding_eval(mrb_state *mrb, mrb_value binding) +{ + mrb_callinfo *ci = mrb->c->ci; + int argc = ci->n; + mrb_value *argv = ci->stack + 1; + + if (argc < 15) { + argv[0] = mrb_ary_new_from_values(mrb, argc, argv); + argv[1] = argv[argc]; /* copy block */ + ci->n = 15; } + mrb_ary_splice(mrb, argv[0], 1, 0, binding); /* insert binding as 2nd argument */ + return f_eval(mrb, binding); } void mrb_mruby_eval_gem_init(mrb_state* mrb) { - mrb_define_module_function(mrb, mrb->kernel_module, "eval", f_eval, MRB_ARGS_ARG(1, 3)); + mrb_define_private_method_id(mrb, mrb->kernel_module, MRB_SYM(eval), f_eval, MRB_ARGS_ARG(1, 3)); mrb_define_method_id(mrb, mrb_class_get_id(mrb, MRB_SYM(BasicObject)), MRB_SYM(instance_eval), f_instance_eval, MRB_ARGS_OPT(3)|MRB_ARGS_BLOCK()); - mrb_define_method_id(mrb, mrb_class_get_id(mrb, MRB_SYM(Module)), MRB_SYM(module_eval), f_class_eval, MRB_ARGS_OPT(3)|MRB_ARGS_BLOCK()); - mrb_define_method_id(mrb, mrb_class_get_id(mrb, MRB_SYM(Module)), MRB_SYM(class_eval), f_class_eval, MRB_ARGS_OPT(3)|MRB_ARGS_BLOCK()); + mrb_define_method_id(mrb, mrb->module_class, MRB_SYM(module_eval), f_class_eval, MRB_ARGS_OPT(3)|MRB_ARGS_BLOCK()); + mrb_define_method_id(mrb, mrb->module_class, MRB_SYM(class_eval), f_class_eval, MRB_ARGS_OPT(3)|MRB_ARGS_BLOCK()); + + struct RClass *binding = mrb_class_get_id(mrb, MRB_SYM(Binding)); + mrb_define_method_id(mrb, binding, MRB_SYM(eval), mrb_binding_eval, MRB_ARGS_ANY()); } void diff --git a/mrbgems/mruby-eval/test/binding.rb b/mrbgems/mruby-eval/test/binding.rb new file mode 100644 index 0000000000..1f03d6b2c3 --- /dev/null +++ b/mrbgems/mruby-eval/test/binding.rb @@ -0,0 +1,81 @@ +assert("Binding#eval") do + b = nil + 1.times { x, y, z = 1, 2, 3; [x,y,z]; b = binding } + assert_equal([1, 2, 3], b.eval("[x, y, z]")) + here = self + assert_equal(here, b.eval("self")) +end + +assert("Binding#local_variables") do + block = Proc.new do |a| + b = 1 + binding + end + bind = block.call(0) + assert_equal [:a, :b, :bind, :block], bind.local_variables.sort + bind.eval("x = 2") + assert_equal [:a, :b, :bind, :block, :x], bind.local_variables.sort +end + +assert("Binding#local_variable_set") do + bind = binding + 1.times { + assert_equal(9, bind.local_variable_set(:x, 9)) + assert_equal(9, bind.eval("x")) + assert_equal([:bind, :x], bind.eval("local_variables.sort")) + } +end + +assert("Binding#local_variable_get") do + bind = binding + x = 1 + 1.times { + y = 2 + assert_equal(1, bind.local_variable_get(:x)) + x = 10 + assert_equal(10, bind.local_variable_get(:x)) + assert_raise(NameError) { bind.local_variable_get(:y) } + bind.eval("z = 3") + assert_equal(3, bind.local_variable_get(:z)) + bind.eval("y = 5") + assert_equal(5, bind.local_variable_get(:y)) + assert_equal(2, y) + } +end + +assert "Binding#eval with Binding.new via UnboundMethod" do + assert_raise(NoMethodError) { Class.instance_method(:new).bind_call(Binding) } +end + +assert "Binding#eval with Binding.new via Method" do + # The following test is OK if SIGSEGV does not occur + cx = Class.new(Binding) + cx.define_singleton_method(:allocate, &Object.method(:allocate)) + Class.instance_method(:new).bind_call(cx).__send__(:eval,"") + + assert_true true +end + +assert "access local variables into procs" do + bx = binding + block = bx.eval("a = 1; proc { a }") + bx.eval("a = 2") + assert_equal 2, block.call +end + +assert "Binding#eval on another target class" do + obj = Object.new + Module.new do + self::BINDING = obj.instance_eval { binding } + + def self.eval(code) + self::BINDING.eval code + end + + self.eval "def self.m1; :m1; end" + self.eval "def m2; :m2; end" + end + + assert_equal :m1, obj.m1 + assert_equal :m2, obj.m2 +end diff --git a/mrbgems/mruby-eval/test/eval.rb b/mrbgems/mruby-eval/test/eval.rb index 7e28ade07e..79b304787e 100644 --- a/mrbgems/mruby-eval/test/eval.rb +++ b/mrbgems/mruby-eval/test/eval.rb @@ -1,52 +1,50 @@ -assert('Kernel.eval', '15.3.1.2.3') do - assert_equal(10) { Kernel.eval '1 * 10' } - assert_equal('aaa') { Kernel.eval "'a' * 3" } +# Kernel.eval is not provided by mruby. '15.3.1.2.3' + +assert('Kernel#eval', '15.3.1.3.12') do + assert_equal(10) { eval '1 * 10' } + assert_equal('aaa') { eval "'a' * 3" } assert_equal(10) { a = 10 - Kernel.eval "a" + eval "a" } assert_equal(20) { a = 10 - Kernel.eval "a = 20" + eval "a = 20" a } assert_equal(15) { c = 5 lambda { a = 10 - Kernel.eval "c = a + c" + eval "c = a + c" }.call c } assert_equal(5) { c = 5 lambda { - Kernel.eval 'lambda { c }.call' + eval 'lambda { c }.call' }.call } assert_equal(15) { c = 5 lambda { a = 10 - Kernel.eval 'lambda { c = a + c }.call' + eval 'lambda { c = a + c }.call' }.call c } assert_equal(2) { a = 10 - Kernel.eval 'def f(a); b=a+1; end' + eval 'def f(a); b=a+1; end' f(1) } end -assert('Kernel#eval', '15.3.1.3.12') do - assert_equal(10) { eval '1 * 10' } -end - assert('rest arguments of eval') do - assert_raise(TypeError) { Kernel.eval('0', 0, 'test', 0) } + assert_raise(TypeError) { eval('0', 0, 'test', 0) } assert_equal ['test', 'test.rb', 10] do - Kernel.eval('[\'test\', __FILE__, __LINE__]', nil, 'test.rb', 10) + eval('[\'test\', __FILE__, __LINE__]', nil, 'test.rb', 10) end end @@ -68,7 +66,7 @@ assert_equal ['', o, o], o.instance_eval("[''].each { |s| break [s, o, self] }") end -assert('Kernel.#eval(string) context') do +assert('Kernel#eval(string) context') do class TestEvalConstScope EVAL_CONST_CLASS = 'class' def const_string @@ -115,7 +113,7 @@ def bar(x) assert_equal(2){f2.baz} end -assert('Kernel.#eval(string) Issue #4021') do +assert('Kernel#eval(string) Issue #4021') do assert_equal('FOO') { (eval <<'EOS').call } foo = "FOO" Proc.new { foo } @@ -162,3 +160,26 @@ def hoge.fuga(a, &b) b = c.class_eval("class A; def a; 55; end; end; class B; def b; A; end; end; B") assert_equal 55, b.new.b.new.a end + +assert 'method visibility with eval' do + c = Class.new do + eval <<~CODE + private + def bad! + "BAD!" + end + CODE + + def good! + "GOOD!" + end + end + + assert_raise NoMethodError do + c.new.bad! + end + + assert_equal "GOOD!" do + c.new.good! + end +end diff --git a/mrbgems/mruby-exit/src/mruby-exit.c b/mrbgems/mruby-exit/src/mruby_exit.c similarity index 82% rename from mrbgems/mruby-exit/src/mruby-exit.c rename to mrbgems/mruby-exit/src/mruby_exit.c index 658d188e67..13da43201e 100644 --- a/mrbgems/mruby-exit/src/mruby-exit.c +++ b/mrbgems/mruby-exit/src/mruby_exit.c @@ -2,7 +2,6 @@ #include #include #include -#include #ifndef EXIT_SUCCESS # define EXIT_SUCCESS 0 @@ -32,7 +31,7 @@ get_status(mrb_state *mrb) * optional parameter is used to return a status code to the invoking * environment. * - * +true+ and +false+ of _status_ means success and failure + * `true` and `false` of _status_ means success and failure * respectively. The interpretation of other integer values are * system dependent. * @@ -72,9 +71,9 @@ f_exit_bang(mrb_state *mrb, mrb_value self) void mrb_mruby_exit_gem_init(mrb_state* mrb) { - mrb_define_class_id(mrb, MRB_SYM(SystemExit), mrb->eException_class); - mrb_define_method_id(mrb, mrb->kernel_module, MRB_SYM(exit), f_exit, MRB_ARGS_OPT(1)); - mrb_define_method_id(mrb, mrb->kernel_module, MRB_SYM_B(exit), f_exit_bang, MRB_ARGS_OPT(1)); + mrb_define_class_id(mrb, MRB_SYM(SystemExit), E_EXCEPTION); + mrb_define_private_method_id(mrb, mrb->kernel_module, MRB_SYM(exit), f_exit, MRB_ARGS_OPT(1)); + mrb_define_private_method_id(mrb, mrb->kernel_module, MRB_SYM_B(exit), f_exit_bang, MRB_ARGS_OPT(1)); } void diff --git a/mrbgems/mruby-fiber/src/fiber.c b/mrbgems/mruby-fiber/src/fiber.c index d4d872d18d..c7655c4a7b 100644 --- a/mrbgems/mruby-fiber/src/fiber.c +++ b/mrbgems/mruby-fiber/src/fiber.c @@ -1,24 +1,87 @@ #include #include #include +#include +#include +#include #include +#include #define fiber_ptr(o) ((struct RFiber*)mrb_ptr(o)) #define FIBER_STACK_INIT_SIZE 64 -#define FIBER_CI_INIT_SIZE 8 +#define FIBER_CI_INIT_SIZE 4 /* copied from vm.c */ #define CINFO_RESUMED 3 +static mrb_value +init_fiber(mrb_state *mrb, struct RFiber *f, const struct RProc *p) +{ + static const struct mrb_context mrb_context_zero = { 0 }; + + if (f->cxt) { + mrb_raise(mrb, E_RUNTIME_ERROR, "cannot initialize twice"); + } + if (MRB_PROC_CFUNC_P(p)) { + mrb_raise(mrb, E_FIBER_ERROR, "tried to create Fiber from C defined method"); + } + + struct mrb_context *c = (struct mrb_context*)mrb_malloc(mrb, sizeof(struct mrb_context)); + *c = mrb_context_zero; + f->cxt = c; + + /* initialize VM stack */ + size_t slen = FIBER_STACK_INIT_SIZE; + if (p->body.irep->nregs > slen) { + slen += p->body.irep->nregs; + } + c->stbase = (mrb_value*)mrb_malloc(mrb, slen*sizeof(mrb_value)); + c->stend = c->stbase + slen; + + { + mrb_value *s = c->stbase + 1; + mrb_value *send = c->stend; + + while (s < send) { + SET_NIL_VALUE(*s); + s++; + } + } + + /* copy receiver from a block */ + c->stbase[0] = mrb->c->ci->stack[0]; + + /* initialize callinfo stack */ + static const mrb_callinfo ci_zero = { 0 }; + c->cibase = (mrb_callinfo*)mrb_malloc(mrb, FIBER_CI_INIT_SIZE * sizeof(mrb_callinfo)); + c->ciend = c->cibase + FIBER_CI_INIT_SIZE; + c->ci = c->cibase; + c->cibase[0] = ci_zero; + + /* adjust return callinfo */ + mrb_callinfo *ci = c->ci; + mrb_vm_ci_target_class_set(ci, MRB_PROC_TARGET_CLASS(p)); + mrb_vm_ci_proc_set(ci, p); + mrb_field_write_barrier(mrb, (struct RBasic*)f, (struct RBasic*)p); + ci->stack = c->stbase; + ci[1] = ci[0]; + c->ci++; /* push dummy callinfo */ + + c->fib = f; + c->status = MRB_FIBER_CREATED; + + return mrb_obj_value(f); +} + /* * call-seq: * Fiber.new{...} -> obj * - * Creates a fiber, whose execution is suspend until it is explicitly - * resumed using Fiber#resume method. + * Creates a fiber, whose execution is suspended until it is explicitly + * resumed using `Fiber#resume` method. * The code running inside the fiber can give up control by calling - * Fiber.yield in which case it yields control back to caller - * (the caller of the Fiber#resume). + * `Fiber.yield` in which case it yields control back to caller + * (the caller of the `Fiber#resume`). * * Upon yielding or termination the Fiber returns the value of the last * executed expression @@ -40,10 +103,10 @@ * 2 * resuming dead fiber (FiberError) * - * The Fiber#resume method accepts an arbitrary number of - * parameters, if it is the first call to resume then they + * The `Fiber#resume` method accepts an arbitrary number of + * parameters, if it is the first call to `resume` then they * will be passed as block arguments. Otherwise they will be the return - * value of the call to Fiber.yield + * value of the call to `Fiber.yield` * * Example: * @@ -65,67 +128,9 @@ static mrb_value fiber_init(mrb_state *mrb, mrb_value self) { - static const struct mrb_context mrb_context_zero = { 0 }; - struct RFiber *f = fiber_ptr(self); - struct mrb_context *c; - struct RProc *p; - mrb_callinfo *ci; mrb_value blk; - size_t slen; - mrb_get_args(mrb, "&!", &blk); - - if (f->cxt) { - mrb_raise(mrb, E_RUNTIME_ERROR, "cannot initialize twice"); - } - p = mrb_proc_ptr(blk); - if (MRB_PROC_CFUNC_P(p)) { - mrb_raise(mrb, E_FIBER_ERROR, "tried to create Fiber from C defined method"); - } - - c = (struct mrb_context*)mrb_malloc(mrb, sizeof(struct mrb_context)); - *c = mrb_context_zero; - f->cxt = c; - - /* initialize VM stack */ - slen = FIBER_STACK_INIT_SIZE; - if (p->body.irep->nregs > slen) { - slen += p->body.irep->nregs; - } - c->stbase = (mrb_value *)mrb_malloc(mrb, slen*sizeof(mrb_value)); - c->stend = c->stbase + slen; - - { - mrb_value *p = c->stbase; - mrb_value *pend = c->stend; - - while (p < pend) { - SET_NIL_VALUE(*p); - p++; - } - } - - /* copy receiver from a block */ - c->stbase[0] = mrb->c->ci->stack[0]; - - /* initialize callinfo stack */ - c->cibase = (mrb_callinfo *)mrb_calloc(mrb, FIBER_CI_INIT_SIZE, sizeof(mrb_callinfo)); - c->ciend = c->cibase + FIBER_CI_INIT_SIZE; - c->ci = c->cibase; - - /* adjust return callinfo */ - ci = c->ci; - mrb_vm_ci_target_class_set(ci, MRB_PROC_TARGET_CLASS(p)); - mrb_vm_ci_proc_set(ci, p); - mrb_field_write_barrier(mrb, (struct RBasic*)mrb_obj_ptr(self), (struct RBasic*)p); - ci->stack = c->stbase; - ci[1] = ci[0]; - c->ci++; /* push dummy callinfo */ - - c->fib = f; - c->status = MRB_FIBER_CREATED; - - return self; + return init_fiber(mrb, fiber_ptr(self), mrb_proc_ptr(blk)); } static struct mrb_context* @@ -149,7 +154,7 @@ fiber_result(mrb_state *mrb, const mrb_value *a, mrb_int len) } /* mark return from context modifying method */ -#define MARK_CONTEXT_MODIFY(c) (c)->ci->u.target_class = NULL +#define MARK_CONTEXT_MODIFY(c) (c)->ci->u.keep_context = NULL static void fiber_check_cfunc(mrb_state *mrb, struct mrb_context *c) @@ -163,6 +168,17 @@ fiber_check_cfunc(mrb_state *mrb, struct mrb_context *c) } } +static void +fiber_check_cfunc_recursive(mrb_state *mrb, struct mrb_context *c) +{ + for (;; c = c->prev) { + fiber_check_cfunc(mrb, c); + if (c == mrb->root_c || !c->prev) { + break; + } + } +} + static void fiber_switch_context(mrb_state *mrb, struct mrb_context *c) { @@ -173,40 +189,68 @@ fiber_switch_context(mrb_state *mrb, struct mrb_context *c) mrb->c = c; } +/* + * Argument mesg is limited to a string literal or "static const" string. + * Also, it must be called as `return fiber_error(...)`. + */ +static mrb_value +fiber_error(mrb_state *mrb, const char *mesg) +{ + mrb_value str = mrb_str_new_static(mrb, mesg, strlen(mesg)); + mrb_value exc = mrb_exc_new_str(mrb, E_FIBER_ERROR, str); + + if (mrb->jmp) { + mrb_exc_raise(mrb, exc); + } + + mrb->exc = mrb_obj_ptr(exc); + + return exc; +} + +/* This function must be called as `return fiber_switch(...)` */ static mrb_value fiber_switch(mrb_state *mrb, mrb_value self, mrb_int len, const mrb_value *a, mrb_bool resume, mrb_bool vmexec) { struct mrb_context *c = fiber_check(mrb, self); struct mrb_context *old_c = mrb->c; - enum mrb_fiber_state status; mrb_value value; - fiber_check_cfunc(mrb, c); - status = c->status; + if (resume && c == mrb->c) { + return fiber_error(mrb, "attempt to resume the current fiber"); + } + + enum mrb_fiber_state status = c->status; switch (status) { case MRB_FIBER_TRANSFERRED: if (resume) { - mrb_raise(mrb, E_FIBER_ERROR, "resuming transferred fiber"); + return fiber_error(mrb, "resuming transferred fiber"); } break; case MRB_FIBER_RUNNING: case MRB_FIBER_RESUMED: - mrb_raise(mrb, E_FIBER_ERROR, "double resume"); + return fiber_error(mrb, "double resume"); break; case MRB_FIBER_TERMINATED: - mrb_raise(mrb, E_FIBER_ERROR, "resuming dead fiber"); + return fiber_error(mrb, "resuming dead fiber"); break; default: break; } - old_c->status = resume ? MRB_FIBER_RESUMED : MRB_FIBER_TRANSFERRED; - c->prev = resume ? mrb->c : (c->prev ? c->prev : mrb->root_c); + fiber_check_cfunc(mrb, c); + if (resume) { + old_c->status = MRB_FIBER_RESUMED; + c->prev = mrb->c; + } + else { + old_c->status = MRB_FIBER_TRANSFERRED; + // c->prev = mrb->root_c; + c->prev = NULL; + } fiber_switch_context(mrb, c); if (status == MRB_FIBER_CREATED) { - mrb_value *b, *e; - if (!c->ci->proc) { - mrb_raise(mrb, E_FIBER_ERROR, "double resume (current)"); + return fiber_error(mrb, "double resume (current)"); } if (vmexec) { c->ci--; /* pop dummy callinfo */ @@ -218,26 +262,38 @@ fiber_switch(mrb_state *mrb, mrb_value self, mrb_int len, const mrb_value *a, mr } else { mrb_stack_extend(mrb, len+2); /* for receiver and (optional) block */ - b = c->stbase+1; - e = b + len; + + mrb_value *b = c->stbase+1; + mrb_value *e = b + len; + while (bcibase->n = (uint8_t)len; - value = c->stbase[0] = MRB_PROC_ENV(c->cibase->proc)->stack[0]; + struct REnv *env = MRB_PROC_ENV(c->cibase->proc); + if (env && env->stack) { + value = env->stack[0]; + } + else { + value = mrb_top_self(mrb); + } + c->stbase[0] = value; } else { value = fiber_result(mrb, a, len); if (vmexec) { + if (c->ci > c->cibase) c->ci--; /* pop dummy callinfo */ c->ci[1].stack[0] = value; } } if (vmexec) { + int cci = old_c->ci->cci; c->vmexec = TRUE; value = mrb_vm_exec(mrb, c->ci->proc, c->ci->pc); mrb->c = old_c; + old_c->ci->cci = cci; /* restore values as they may have changed in Fiber.yield */ } else { MARK_CONTEXT_MODIFY(c); @@ -249,16 +305,16 @@ fiber_switch(mrb_state *mrb, mrb_value self, mrb_int len, const mrb_value *a, mr * call-seq: * fiber.resume(args, ...) -> obj * - * Resumes the fiber from the point at which the last Fiber.yield + * Resumes the fiber from the point at which the last `Fiber.yield` * was called, or starts running it if it is the first call to - * resume. Arguments passed to resume will be the value of - * the Fiber.yield expression or will be passed as block - * parameters to the fiber's block if this is the first resume. + * `resume`. Arguments passed to resume will be the value of + * the `Fiber.yield` expression or will be passed as block + * parameters to the fiber's block if this is the first `resume`. * * Alternatively, when resume is called it evaluates to the arguments passed - * to the next Fiber.yield statement inside the fiber's block + * to the next `Fiber.yield` statement inside the fiber's block * or to the block value if it runs to completion without any - * Fiber.yield + * `Fiber.yield` */ static mrb_value fiber_resume(mrb_state *mrb, mrb_value self) @@ -267,7 +323,6 @@ fiber_resume(mrb_state *mrb, mrb_value self) mrb_int len; mrb_bool vmexec = FALSE; - fiber_check_cfunc(mrb, mrb->c); mrb_get_args(mrb, "*!", &a, &len); if (mrb->c->ci->cci > 0) { vmexec = TRUE; @@ -275,7 +330,6 @@ fiber_resume(mrb_state *mrb, mrb_value self) return fiber_switch(mrb, self, len, a, TRUE, vmexec); } -/* resume thread with given arguments */ MRB_API mrb_value mrb_fiber_resume(mrb_state *mrb, mrb_value fib, mrb_int len, const mrb_value *a) { @@ -308,17 +362,80 @@ fiber_eq(mrb_state *mrb, mrb_value self) return mrb_bool_value(fiber_ptr(self) == fiber_ptr(other)); } +/* + * call-seq: + * fiber.to_s -> string + * fiber.inspect -> string + * + * Returns fiber object information as a string. + * + * If the file information cannot be obtained, it is replaced with `(unknown):0`. + * Also, if the fiber is terminated, it will be replaced in the same way (mruby limitation). + */ +static mrb_value +fiber_to_s(mrb_state *mrb, mrb_value self) +{ + fiber_check(mrb, self); + const struct RFiber *f = fiber_ptr(self); + + /* Cache status to avoid redundant lookups */ + enum mrb_fiber_state status = f->cxt->status; + + /* Pre-allocate buffer - 150 bytes handles typical fiber strings */ + mrb_value s = mrb_str_buf_new(mrb, 150); + + mrb_str_cat_lit(mrb, s, "#<"); + mrb_value cname = mrb_class_path(mrb, mrb_class_real(mrb_class(mrb, self))); + if (mrb_nil_p(cname)) { + mrb_str_cat_lit(mrb, s, "Fiber:"); + } + else { + mrb_str_cat_str(mrb, s, cname); + mrb_str_cat_lit(mrb, s, ":"); + } + mrb_str_cat_str(mrb, s, mrb_ptr_to_str(mrb, mrb_ptr(self))); + + const char *file; + int32_t line; + const struct RProc *p; + if (status != MRB_FIBER_TERMINATED && + !MRB_PROC_CFUNC_P(p = f->cxt->cibase->proc) && !MRB_PROC_ALIAS_P(p) && + mrb_debug_get_position(mrb, p->body.irep, 0, &line, &file)) { + mrb_str_cat_lit(mrb, s, " "); + mrb_str_cat_cstr(mrb, s, file); + mrb_str_cat_lit(mrb, s, ":"); + char buf[16]; + mrb_str_cat_cstr(mrb, s, mrb_int_to_cstr(buf, sizeof(buf), line, 10)); + } + + const char *st; + switch (status) { + case MRB_FIBER_CREATED: st = "created"; break; + case MRB_FIBER_RUNNING: st = "resumed"; break; + case MRB_FIBER_RESUMED: st = "suspended by resuming"; break; + case MRB_FIBER_SUSPENDED: st = "suspended"; break; + case MRB_FIBER_TRANSFERRED: st = "suspended"; break; + case MRB_FIBER_TERMINATED: st = "terminated"; break; + default: st = "UNKNOWN STATUS (BUG)"; break; + } + mrb_str_cat_lit(mrb, s, " ("); + mrb_str_cat_cstr(mrb, s, st); + mrb_str_cat_lit(mrb, s, ")>"); + + return s; +} + /* * call-seq: * fiber.transfer(args, ...) -> obj * * Transfers control to receiver fiber of the method call. - * Unlike resume the receiver wouldn't be pushed to call + * Unlike `resume` the receiver wouldn't be pushed to call * stack of fibers. Instead it will switch to the call stack of * transferring fiber. * When resuming a fiber that was transferred to another fiber it would * cause double resume error. Though when the fiber is re-transferred - * and Fiber.yield is called, the fiber would be resumable. + * and `Fiber.yield` is called, the fiber would be resumable. */ static mrb_value fiber_transfer(mrb_state *mrb, mrb_value self) @@ -327,9 +444,13 @@ fiber_transfer(mrb_state *mrb, mrb_value self) const mrb_value* a; mrb_int len; - fiber_check_cfunc(mrb, mrb->c); + fiber_check_cfunc_recursive(mrb, mrb->c); mrb_get_args(mrb, "*!", &a, &len); + if (c->status == MRB_FIBER_RESUMED) { + mrb_raise(mrb, E_FIBER_ERROR, "attempt to transfer to a resuming fiber"); + } + if (c == mrb->root_c) { mrb->c->status = MRB_FIBER_TRANSFERRED; fiber_switch_context(mrb, c); @@ -344,26 +465,28 @@ fiber_transfer(mrb_state *mrb, mrb_value self) return fiber_switch(mrb, self, len, a, FALSE, FALSE); } -/* yield values to the caller fiber */ -/* mrb_fiber_yield() must be called as `return mrb_fiber_yield(...)` */ MRB_API mrb_value mrb_fiber_yield(mrb_state *mrb, mrb_int len, const mrb_value *a) { struct mrb_context *c = mrb->c; if (!c->prev) { - mrb_raise(mrb, E_FIBER_ERROR, "can't yield from root fiber"); + return fiber_error(mrb, "attempt to yield on a not resumed fiber"); + } + if (c == mrb->root_c) { + return fiber_error(mrb, "can't yield from root fiber"); + } + if (c->prev->status == MRB_FIBER_TRANSFERRED) { + return fiber_error(mrb, "attempt to yield on a not resumed fiber"); } fiber_check_cfunc(mrb, c); - c->prev->status = MRB_FIBER_RUNNING; c->status = MRB_FIBER_SUSPENDED; fiber_switch_context(mrb, c->prev); c->prev = NULL; if (c->vmexec) { c->vmexec = FALSE; mrb->c->ci->cci = CINFO_RESUMED; - c->ci--; /* pop callinfo for yield */ } MARK_CONTEXT_MODIFY(mrb->c); return fiber_result(mrb, a, len); @@ -375,11 +498,14 @@ mrb_fiber_yield(mrb_state *mrb, mrb_int len, const mrb_value *a) * * Yields control back to the context that resumed the fiber, passing * along any arguments that were passed to it. The fiber will resume - * processing at this point when resume is called next. - * Any arguments passed to the next resume will be the + * processing at this point when `resume` is called next. + * Any arguments passed to the next `resume` will be the * * mruby limitation: Fiber resume/yield cannot cross C function boundary. * thus you cannot yield from #initialize which is called by mrb_funcall(). + * + * This method cannot be called from C using `mrb_funcall()`. + * Use `mrb_fiber_yield()` function instead. */ static mrb_value fiber_yield(mrb_state *mrb, mrb_value self) @@ -410,24 +536,36 @@ fiber_current(mrb_state *mrb, mrb_value self) return mrb_obj_value(mrb->c->fib); } +MRB_API mrb_value +mrb_fiber_new(mrb_state *mrb, const struct RProc *p) +{ + struct RClass *c = mrb_class_get_id(mrb, MRB_SYM(Fiber)); + if (MRB_INSTANCE_TT(c) != MRB_TT_FIBER) { + mrb_raise(mrb, E_TYPE_ERROR, "wrong Fiber class"); + } + + struct RFiber *f = MRB_OBJ_ALLOC(mrb, MRB_TT_FIBER, c); + return init_fiber(mrb, f, p); +} + void mrb_mruby_fiber_gem_init(mrb_state* mrb) { - struct RClass *c; - - c = mrb_define_class(mrb, "Fiber", mrb->object_class); + struct RClass *c = mrb_define_class_id(mrb, MRB_SYM(Fiber), mrb->object_class); MRB_SET_INSTANCE_TT(c, MRB_TT_FIBER); - mrb_define_method(mrb, c, "initialize", fiber_init, MRB_ARGS_NONE()|MRB_ARGS_BLOCK()); - mrb_define_method(mrb, c, "resume", fiber_resume, MRB_ARGS_ANY()); - mrb_define_method(mrb, c, "transfer", fiber_transfer, MRB_ARGS_ANY()); - mrb_define_method(mrb, c, "alive?", fiber_alive_p, MRB_ARGS_NONE()); - mrb_define_method(mrb, c, "==", fiber_eq, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, c, MRB_SYM(initialize), fiber_init, MRB_ARGS_NONE()|MRB_ARGS_BLOCK()); + mrb_define_method_id(mrb, c, MRB_SYM(resume), fiber_resume, MRB_ARGS_ANY()); + mrb_define_method_id(mrb, c, MRB_SYM(transfer), fiber_transfer, MRB_ARGS_ANY()); + mrb_define_method_id(mrb, c, MRB_SYM_Q(alive), fiber_alive_p, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, c, MRB_OPSYM(eq), fiber_eq, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, c, MRB_SYM(to_s), fiber_to_s, MRB_ARGS_NONE()); + mrb_define_alias_id(mrb, c, MRB_SYM(inspect), MRB_SYM(to_s)); - mrb_define_class_method(mrb, c, "yield", fiber_yield, MRB_ARGS_ANY()); - mrb_define_class_method(mrb, c, "current", fiber_current, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, c, MRB_SYM(yield), fiber_yield, MRB_ARGS_ANY()); + mrb_define_class_method_id(mrb, c, MRB_SYM(current), fiber_current, MRB_ARGS_NONE()); - mrb_define_class(mrb, "FiberError", mrb->eStandardError_class); + mrb_define_class_id(mrb, MRB_SYM(FiberError), E_STANDARD_ERROR); } void diff --git a/mrbgems/mruby-fiber/test/fiber.rb b/mrbgems/mruby-fiber/test/fiber.rb index b6ecb798c8..ea541acf37 100644 --- a/mrbgems/mruby-fiber/test/fiber.rb +++ b/mrbgems/mruby-fiber/test/fiber.rb @@ -1,208 +1,210 @@ -assert('Fiber.new') do - f = Fiber.new{} - assert_kind_of Fiber, f -end +begin + $fiber_test_activity = __FILE__ -assert('Fiber#resume') do - f = Fiber.new{|x| x } - assert_equal 2, f.resume(2) -end + assert('Fiber.new') do + f = Fiber.new{} + assert_kind_of Fiber, f + end -assert('Fiber#transfer') do - f2 = nil - f1 = Fiber.new do |v| - Fiber.yield v - f2.transfer - end - f2 = Fiber.new do - f1.transfer(1) - f1.transfer(1) - Fiber.yield 2 - end - assert_equal 1, f2.resume - assert_raise(FiberError) { f2.resume } - assert_equal 2, f2.transfer - assert_raise(FiberError) { f1.resume } - f1.transfer - f2.resume - assert_false f1.alive? - assert_false f2.alive? -end + assert('Fiber#resume') do + f = Fiber.new{|x| x } + assert_equal 2, f.resume(2) + end -assert('Fiber#alive?') do - f = Fiber.new{ Fiber.yield } - f.resume - assert_true f.alive? - f.resume - assert_false f.alive? -end + assert('Fiber#transfer') do + ary = [] + f2 = nil + f1 = Fiber.new{ + ary << f2.transfer(:foo) + :ok + } + f2 = Fiber.new{ + ary << f1.transfer(:baz) + :ng + } + assert_equal(:ok, f1.transfer) + assert_equal([:baz], ary) + assert_false f1.alive? + end -assert('Fiber#==') do - root = Fiber.current - assert_equal root, root - assert_equal root, Fiber.current - assert_false root != Fiber.current - f = Fiber.new { - assert_false root == Fiber.current - } - f.resume - assert_false f == root - assert_true f != root -end + assert('Fiber#alive?') do + f = Fiber.new{ Fiber.yield } + f.resume + assert_true f.alive? + f.resume + assert_false f.alive? + end -assert('Fiber.yield') do - f = Fiber.new{|x| Fiber.yield x } - assert_equal 3, f.resume(3) - assert_true f.alive? -end + assert('Fiber#==') do + root = Fiber.current + assert_equal root, root + assert_equal root, Fiber.current + assert_false root != Fiber.current + f = Fiber.new { + assert_false root == Fiber.current + } + f.resume + assert_false f == root + assert_true f != root + end -assert('FiberError') do - assert_equal StandardError, FiberError.superclass -end + assert('Fiber.yield') do + f = Fiber.new{|x| Fiber.yield x } + assert_equal 3, f.resume(3) + assert_true f.alive? + end -assert('Fiber iteration') do - f1 = Fiber.new{ - [1,2,3].each{|x| Fiber.yield(x)} - } - f2 = Fiber.new{ - [9,8,7].each{|x| Fiber.yield(x)} - } - a = [] - 3.times { - a << f1.resume - a << f2.resume - } - assert_equal [1,9,2,8,3,7], a -end + assert('FiberError') do + assert_equal StandardError, FiberError.superclass + end + + assert('Fiber iteration') do + f1 = Fiber.new{ + [1,2,3].each{|x| Fiber.yield(x)} + } + f2 = Fiber.new{ + [9,8,7].each{|x| Fiber.yield(x)} + } + a = [] + 3.times { + a << f1.resume + a << f2.resume + } + assert_equal [1,9,2,8,3,7], a + end -assert('Fiber with splat in the block argument list') { - assert_equal([1], Fiber.new{|*x|x}.resume(1)) -} + assert('Fiber with splat in the block argument list') { + assert_equal([1], Fiber.new{|*x|x}.resume(1)) + } -assert('Fiber raises on resume when dead') do - assert_raise(FiberError) do - f = Fiber.new{} - f.resume - assert_false f.alive? - f.resume + assert('Fiber raises on resume when dead') do + assert_raise(FiberError) do + f = Fiber.new{} + f.resume + assert_false f.alive? + f.resume + end end -end -assert('Yield raises when called on root fiber') do - assert_raise(FiberError) { Fiber.yield } -end + assert('Yield raises when called on root fiber') do + assert_raise(FiberError) { Fiber.yield } + end -assert('Double resume of Fiber') do - f1 = Fiber.new {} - f2 = Fiber.new { - f1.resume - assert_raise(FiberError) { f2.resume } - Fiber.yield 0 - } - assert_equal 0, f2.resume - f2.resume - assert_false f1.alive? - assert_false f2.alive? -end + assert('Double resume of Fiber') do + f1 = Fiber.new {} + f2 = Fiber.new { + f1.resume + assert_raise(FiberError) { f2.resume } + Fiber.yield 0 + } + assert_equal 0, f2.resume + f2.resume + assert_false f1.alive? + assert_false f2.alive? + end -assert('Recursive resume of Fiber') do - f1, f2 = nil, nil - f1 = Fiber.new { assert_raise(FiberError) { f2.resume } } - f2 = Fiber.new { - f1.resume - Fiber.yield 0 - } - f3 = Fiber.new { + assert('Recursive resume of Fiber') do + f1, f2 = nil, nil + f1 = Fiber.new { assert_raise(FiberError) { f2.resume } } + f2 = Fiber.new { + f1.resume + Fiber.yield 0 + } + f3 = Fiber.new { + f2.resume + } + assert_equal 0, f3.resume f2.resume - } - assert_equal 0, f3.resume - f2.resume - assert_false f1.alive? - assert_false f2.alive? - assert_false f3.alive? -end + assert_false f1.alive? + assert_false f2.alive? + assert_false f3.alive? + end -assert('Root fiber resume') do - root = Fiber.current - assert_raise(FiberError) { root.resume } - f = Fiber.new { + assert('Root fiber resume') do + root = Fiber.current assert_raise(FiberError) { root.resume } - } - f.resume - assert_false f.alive? -end + f = Fiber.new { + assert_raise(FiberError) { root.resume } + } + f.resume + assert_false f.alive? + end -assert('Fiber without block') do - assert_raise(ArgumentError) { Fiber.new } -end + assert('Fiber without block') do + assert_raise(ArgumentError) { Fiber.new } + end -assert('Transfer to self.') do - result = [] - f = Fiber.new { result << :start; f.transfer; result << :end } - f.transfer - assert_equal [:start, :end], result + assert('Transfer to self.') do + result = [] + f = Fiber.new { result << :start; f.transfer; result << :end } + f.transfer + assert_equal [:start, :end], result - result = [] - f = Fiber.new { result << :start; f.transfer; result << :end } - f.resume - assert_equal [:start, :end], result -end - -assert('Resume transferred fiber') do - f = Fiber.new { - assert_raise(FiberError) { f.resume } - } - f.transfer -end + result = [] + f = Fiber.new { result << :start; f.transfer; result << :end } + f.resume + assert_equal [:start, :end], result + end -assert('Root fiber transfer.') do - result = nil - root = Fiber.current - f = Fiber.new { - result = :ok - root.transfer - } - f.resume - assert_true f.alive? - assert_equal :ok, result -end + assert('Resume transferred fiber') do + f = Fiber.new { + assert_raise(FiberError) { f.resume } + } + f.transfer + end -assert('Break nested fiber with root fiber transfer') do - root = Fiber.current + assert('Root fiber transfer.') do + result = nil + root = Fiber.current + f = Fiber.new { + result = :ok + root.transfer + } + f.transfer + assert_true f.alive? + assert_equal :ok, result + end - result = nil - f2 = nil - f1 = Fiber.new { - Fiber.yield f2.resume - result = :f1 - } - f2 = Fiber.new { - result = :to_root - root.transfer :from_f2 - result = :f2 - } - assert_equal :from_f2, f1.resume - assert_equal :to_root, result - assert_equal :f2, f2.transfer - assert_equal :f2, result - assert_false f2.alive? - assert_equal :f1, f1.resume - assert_equal :f1, result - assert_false f1.alive? -end + assert('Break nested fiber with root fiber transfer') do + root = Fiber.current + + result = nil + f2 = nil + f1 = Fiber.new { + root.transfer(f2.transfer) + result = :f1 + } + f2 = Fiber.new { + result = :to_root + root.transfer :from_f2 + result = :f2 + } + assert_equal :from_f2, f1.transfer + assert_equal :to_root, result + assert_equal :f2, f2.transfer + assert_equal :f2, result + assert_false f2.alive? + assert_equal nil, f1.transfer + assert_equal :f1, f1.transfer + assert_equal :f1, result + assert_false f1.alive? + end -assert('CRuby Fiber#transfer test.') do - ary = [] - f2 = nil - f1 = Fiber.new{ - ary << f2.transfer(:foo) - :ok - } - f2 = Fiber.new{ - ary << f1.transfer(:baz) - :ng - } - assert_equal :ok, f1.transfer - assert_equal [:baz], ary + assert('CRuby Fiber#transfer test.') do + ary = [] + f2 = nil + f1 = Fiber.new{ + ary << f2.transfer(:foo) + :ok + } + f2 = Fiber.new{ + ary << f1.transfer(:baz) + :ng + } + assert_equal :ok, f1.transfer + assert_equal [:baz], ary + end +ensure + $fiber_test_activity = nil end diff --git a/mrbgems/mruby-fiber/test/fiber2.rb b/mrbgems/mruby-fiber/test/fiber2.rb new file mode 100644 index 0000000000..a6ff8a791d --- /dev/null +++ b/mrbgems/mruby-fiber/test/fiber2.rb @@ -0,0 +1,155 @@ +# This file tests fiber switching crossing C functions + +unless RUBY_ENGINE == "mruby" + class Fiber + alias resume_by_c_func resume + alias resume_by_c_method resume + + class << self + alias yield_by_c_func yield + + def yield_by_c_method(*args) + raise FiberError, "ycan't cross C function boundary" + end + end + end + + def Proc.c_tunnel + yield + end +end + +begin + $fiber_test_activity = __FILE__ + + assert('Call Fiber#resume nested with C') do + assert_equal "ok1", Fiber.new { Fiber.new { "ok1" }.resume_by_c_func }.resume_by_c_func + assert_equal "ok2", Fiber.new { Fiber.new { "ok2" }.resume_by_c_method }.resume_by_c_func + assert_equal "ok3", Fiber.new { Fiber.new { "ok3" }.resume_by_c_func }.resume_by_c_method + assert_equal "ok4", Fiber.new { Fiber.new { "ok4" }.resume_by_c_method }.resume_by_c_method + assert_equal "ok5", Fiber.new { Proc.c_tunnel { Fiber.new { "ok5" }.resume_by_c_func } }.resume_by_c_func + assert_equal "ok6", Fiber.new { Proc.c_tunnel { Fiber.new { "ok6" }.resume_by_c_method } }.resume_by_c_func + assert_equal "ok7", Fiber.new { Proc.c_tunnel { Fiber.new { "ok7" }.resume_by_c_func } }.resume_by_c_method + assert_equal "ok8", Fiber.new { Proc.c_tunnel { Fiber.new { "ok8" }.resume_by_c_method } }.resume_by_c_method + assert_equal "ok9", Fiber.new { Proc.c_tunnel { Fiber.new { "ok9" }.resume } }.resume_by_c_func + assert_equal "ok10", Fiber.new { Proc.c_tunnel { Fiber.new { "ok10" }.resume } }.resume_by_c_method + end + + assert('Call Fiber#resume and Fiber.yield mixed with C.') do + assert_equal 1, Fiber.new { Fiber.yield 1 }.resume_by_c_func + assert_equal 2, Fiber.new { Fiber.yield 2 }.resume_by_c_method + assert_equal 3, Fiber.new { Fiber.yield_by_c_func 3 }.resume + assert_equal 4, Fiber.new { Fiber.yield_by_c_func 4 }.resume_by_c_func + assert_equal 5, Fiber.new { Fiber.yield_by_c_func 5 }.resume_by_c_method + assert_raise(FiberError) { Fiber.new { Fiber.yield_by_c_method "bad" }.resume } + assert_raise(FiberError) { Fiber.new { Fiber.yield_by_c_method "bad" }.resume_by_c_func } + assert_raise(FiberError) { Fiber.new { Fiber.yield_by_c_method "bad" }.resume_by_c_method } + + result = [] + f1 = Fiber.new { result << Fiber.new { Fiber.yield 1; "bad" }.resume_by_c_func; 2 } + f2 = Fiber.new { result << f1.resume; 3 } + result << f2.resume + assert_equal [1, 2, 3], result + + f1 = Fiber.new { + -> { + Fiber.yield 1 + Fiber.yield_by_c_func 2 + f2 = Fiber.new { + -> { + Fiber.yield_by_c_func 3 + Fiber.yield 4 + Fiber.yield_by_c_func 5 + Fiber.yield 6 + }.call + 7 + } + Fiber.yield f2.resume_by_c_func + Fiber.yield f2.resume + Fiber.yield f2.resume_by_c_method + Fiber.yield f2.resume + Fiber.yield f2.resume_by_c_func + Fiber.yield 8 + }.call + Fiber.yield 9 + 10 + } + result = [] + 10.times { result << f1.resume } + assert_equal [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], result + end + + assert('Call Fiber#resume and Fiber.yield mixed with C and raising exceptions') do + f = Fiber.new do + raise ZeroDivisionError + rescue + Fiber.yield "rescue" + "pass1" + ensure + Fiber.yield "ensure" + end + assert_equal "rescue", f.resume_by_c_method + assert_equal "ensure", f.resume_by_c_method + assert_equal "pass1", f.resume_by_c_method + assert_raise(FiberError) { f.resume_by_c_method } + + f = Fiber.new do + raise ZeroDivisionError + rescue + Fiber.yield "rescue" + "pass2" + ensure + Fiber.yield "ensure" + end + assert_equal "rescue", f.resume_by_c_func + assert_equal "ensure", f.resume_by_c_func + assert_equal "pass2", f.resume_by_c_func + assert_raise(FiberError) { f.resume_by_c_func } + + f2 = Fiber.new do + -> do + Fiber.yield 1 + raise "3" + ensure + Fiber.yield 2 + end.call + "NOT REACH 1" + end + f1 = Fiber.new do + Fiber.yield f2.resume_by_c_func + begin + Fiber.yield f2.resume + Fiber.yield f2.resume_by_c_method + Fiber.yield "NOT REACH 2" + rescue => e + Fiber.yield e.message + Fiber.yield 4 + ensure + Fiber.yield 5 + end + Fiber.yield 6 + 7 + end + result = [] + 7.times { result << f1.resume } + assert_equal [1, 2, "3", 4, 5, 6, 7], result + end + + assert('Call Fiber#transfer with C') do + assert_equal "ok1", Fiber.new { Fiber.new { "ok1" }.resume_by_c_method }.transfer + assert_equal "ok2", Fiber.new { Fiber.new { "ok2" }.resume_by_c_func }.transfer + assert_raise(FiberError) { Proc.c_tunnel { Fiber.new { "BAD!" }.transfer } } + + b = Fiber.current + a = Fiber.new { + Proc.c_tunnel { + Fiber.new { + b.transfer + }.resume + } + } + assert_raise(FiberError) { a.transfer } + end +ensure + $fiber_test_activity = nil +end diff --git a/mrbgems/mruby-fiber/test/fibertest.c b/mrbgems/mruby-fiber/test/fibertest.c new file mode 100644 index 0000000000..16d791d0c6 --- /dev/null +++ b/mrbgems/mruby-fiber/test/fibertest.c @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include + +static mrb_value +fiber_s_yield_by_c_func(mrb_state *mrb, mrb_value self) +{ + mrb_value a = mrb_get_arg1(mrb); + return mrb_fiber_yield(mrb, 1, &a); +} + +static mrb_value +fiber_s_yield_by_c_method(mrb_state *mrb, mrb_value self) +{ + mrb_value a = mrb_get_arg1(mrb); + return mrb_funcall_argv(mrb, self, mrb_intern_lit(mrb, "yield"), 1, &a); +} + +static mrb_value +fiber_resume_by_c_func(mrb_state *mrb, mrb_value self) +{ + ptrdiff_t ci_index = mrb->c->ci - mrb->c->cibase; + mrb_value ret = mrb_fiber_resume(mrb, self, 0, NULL); + if (ci_index != mrb->c->ci - mrb->c->cibase) { + mrb_raisef(mrb, E_EXCEPTION, + "[BUG] INVALID CI POSITION (expected %d, but actual %d) [BUG]", + (int)ci_index, (int)(mrb->c->ci - mrb->c->cibase)); + } + return ret; +} + +static mrb_value +fiber_resume_by_c_method(mrb_state *mrb, mrb_value self) +{ + ptrdiff_t ci_index = mrb->c->ci - mrb->c->cibase; + mrb_value ret = mrb_funcall_argv(mrb, self, mrb_intern_lit(mrb, "resume"), 0, NULL); + if (ci_index != mrb->c->ci - mrb->c->cibase) { + mrb_raisef(mrb, E_EXCEPTION, + "[BUG] INVALID CI POSITION (expected %d, but actual %d) [BUG]", + (int)ci_index, (int)(mrb->c->ci - mrb->c->cibase)); + } + return ret; +} + +static mrb_value +fiber_transfer_by_c(mrb_state *mrb, mrb_value self) +{ + return mrb_funcall_argv(mrb, self, mrb_intern_lit(mrb, "transfer"), 0, NULL); +} + +static mrb_value +proc_s_c_tunnel(mrb_state *mrb, mrb_value self) +{ + mrb_value b; + mrb_get_args(mrb, "&!", &b); + return mrb_yield_argv(mrb, b, 0, NULL); +} + +static void +check_activity(mrb_state *mrb) +{ + mrb_value act = mrb_gv_get(mrb, mrb_intern_lit(mrb, "$fiber_test_activity")); + if (mrb_test(act)) { + act = mrb_obj_as_string(mrb, act); + fprintf(stderr, "\n\t<<<%s%.*s>>>\n", + "mruby VM has an unexpected outage in ", (int)RSTRING_LEN(act), RSTRING_PTR(act)); + abort(); + } +} + +void +mrb_mruby_fiber_gem_test(mrb_state *mrb) +{ + struct RClass *fiber_class = mrb_class_get(mrb, "Fiber"); + mrb_define_class_method(mrb, fiber_class, "yield_by_c_func", fiber_s_yield_by_c_func, MRB_ARGS_ANY()); + mrb_define_class_method(mrb, fiber_class, "yield_by_c_method", fiber_s_yield_by_c_method, MRB_ARGS_ANY()); + mrb_define_method(mrb, fiber_class, "resume_by_c_func", fiber_resume_by_c_func, MRB_ARGS_NONE()); + mrb_define_method(mrb, fiber_class, "resume_by_c_method", fiber_resume_by_c_method, MRB_ARGS_NONE()); + mrb_define_method(mrb, fiber_class, "transfer_by_c", fiber_transfer_by_c, MRB_ARGS_NONE()); + + mrb_define_class_method(mrb, mrb->proc_class, "c_tunnel", proc_s_c_tunnel, MRB_ARGS_NONE() | MRB_ARGS_BLOCK()); + + mrb_gv_set(mrb, mrb_intern_lit(mrb, "$fiber_test_activity"), mrb_nil_value()); + mrb_state_atexit(mrb, check_activity); +} diff --git a/mrbgems/mruby-hash-ext/README.md b/mrbgems/mruby-hash-ext/README.md new file mode 100644 index 0000000000..dc5765efd8 --- /dev/null +++ b/mrbgems/mruby-hash-ext/README.md @@ -0,0 +1,392 @@ +# mruby-hash-ext + +This mrbgem extends the core `Hash` class in mruby, providing a collection of additional methods to enhance its functionality. These extensions offer more ways to manipulate and interact with hashes, drawing inspiration from common Ruby hash methods. + +## How to Use + +To incorporate `mruby-hash-ext` into your mruby project, add it to your `build_config.rb` file. For example: + +```ruby +MRuby::Build.new do |conf| + # ... other configurations ... + conf.gem :github => 'mruby/mruby-hash-ext' + # or if you have it locally: + # conf.gem "#{root}/mrbgems/mruby-hash-ext" +end +``` + +Then, rebuild your mruby project. The extended hash methods will then be available for use. + +## Implemented Methods + +This gem implements the following methods for the `Hash` class: + +- `values_at(*keys)`: Returns an array containing the values associated with the given keys. +- `slice(*keys)`: Returns a hash containing only the given keys and their values. +- `except(*keys)`: Returns a hash excluding the given keys and their values. +- `Hash.[](*object)`: Creates a new hash populated with the given objects. +- `#merge!(other_hash..)`, `#update(other_hash..)`: Adds the contents of `other_hash` to `hsh`. +- `#compact!`: Removes all nil values from the hash. +- `#compact`: Returns a new hash with the nil values/key pairs removed. +- `#fetch(key [, default])`, `#fetch(key) {| key | block }`: Returns a value from the hash for the given key. +- `#delete_if {| key, value | block }`: Deletes every key-value pair from `hsh` for which `block` evaluates to `true`. +- `#flatten`: Returns a new array that is a one-dimensional flattening of this hash. +- `#invert`: Returns a new hash created by using `hsh`'s values as keys, and the keys as values. +- `#keep_if {| key, value | block }`: Deletes every key-value pair from `hsh` for which `block` evaluates to false. +- `#key(value)`: Returns the key of an occurrence of a given value. +- `#to_h`: Returns `self`. If called on a subclass of Hash, converts the receiver to a Hash object. +- `#< other_hash`: Returns `true` if `hsh` is a subset of `other_hash`. +- `#<= other_hash`: Returns `true` if `hsh` is a subset of `other_hash` or equal to `other_hash`. +- `#> other_hash`: Returns `true` if `other_hash` is a subset of `hsh`. +- `#>= other_hash`: Returns `true` if `other_hash` is a subset of `hsh` or equal to `hsh`. +- `#dig(key, ...)`: Extracts the nested value specified by the sequence of keys. +- `#transform_keys {|key| block }`: Returns a new hash with keys transformed by the block. +- `#transform_keys! {|key| block }`: Modifies the hash by transforming its keys using the block. +- `#transform_values {|value| block }`: Returns a new hash with values transformed by the block. +- `#transform_values! {|value| block }`: Modifies the hash by transforming its values using the block. +- `#to_proc`: Returns a proc that maps a key to its value in the hash. +- `#fetch_values(key, ...)`: Returns an array of values for the given keys, raising KeyError if any are not found. +- `#filter {| key, value | block }` (Alias for `select`): Returns a new hash containing entries for which the block returns true. +- `#filter! {| key, value | block }` (Alias for `select!`): Modifies the hash, keeping only entries for which the block returns true. + +### `values_at(*keys) -> array` + +Returns an array containing the values associated with the given keys. If a key is not found, `nil` is returned for that key's position in the array. + +```ruby +h = { "cat" => "feline", "dog" => "canine", "cow" => "bovine" } +h.values_at("cow", "cat") #=> ["bovine", "feline"] +h.values_at("dog", "mouse") #=> ["canine", nil] +h.values_at() #=> [] +``` + +### `slice(*keys) -> new_hash` + +Returns a new hash containing only the given keys and their associated values from the original hash. If a key is not found in the original hash, it's ignored. + +```ruby +h = { a: 100, b: 200, c: 300 } +h.slice(:a) #=> {a: 100} +h.slice(:b, :c, :d) #=> {b: 200, c: 300} (ignores :d as it's not in h) +h.slice() #=> {} +``` + +### `except(*keys) -> new_hash` + +Returns a new hash containing all key-value pairs from the original hash except for those specified by the given keys. If a key is not found in the original hash, it's ignored. + +```ruby +h = { a: 100, b: 200, c: 300 } +h.except(:a) #=> {b: 200, c: 300} +h.except(:b, :c, :d) #=> {a: 100} (ignores :d as it's not in h) +h.except() #=> {a: 100, b: 200, c: 300} +``` + +### `Hash.[](*object)` + +Creates a new hash populated with the given objects. + +- **`Hash[key, value, ...]`**: Creates a new hash with key-value pairs. +- **`Hash[[ [key, value], ... ]]`**: Creates a new hash from an array of key-value pairs. +- **`Hash[object]`**: Creates a new hash from an object convertible to a hash. + +```ruby +h1 = Hash["a", 100, "b", 200] #=> {"a"=>100, "b"=>200} +h2 = Hash[[ ["a", 100], ["b", 200] ]] #=> {"a"=>100, "b"=>200} +h3 = Hash["a" => 100, "b" => 200] #=> {"a"=>100, "b"=>200} +``` + +### `#merge!(other_hash..) -> hsh` + +### `#merge!(other_hash..){|key, oldval, newval| block} -> hsh` + +(Alias: `#update`) + +Adds the contents of `other_hash` to `hsh`. If no block is specified, entries with duplicate keys are overwritten with the values from `other_hash`. Otherwise, the value of each duplicate key is determined by calling the block with the key, its value in `hsh`, and its value in `other_hash`. + +```ruby +h1 = { "a" => 100, "b" => 200 } +h2 = { "b" => 254, "c" => 300 } +h1.merge!(h2) #=> {"a"=>100, "b"=>254, "c"=>300} + +h1 = { "a" => 100, "b" => 200 } +h2 = { "b" => 254, "c" => 300 } +h1.merge!(h2) { |key, v1, v2| v1 } + #=> {"a"=>100, "b"=>200, "c"=>300} +``` + +### `#compact! -> hsh` + +Removes all nil values from the hash. Returns the hash. Returns `nil` if the hash does not contain nil values. + +```ruby +h = { a: 1, b: false, c: nil } +h.compact! #=> { a: 1, b: false } +``` + +### `#compact -> new_hsh` + +Returns a new hash with the nil values/key pairs removed. + +```ruby +h = { a: 1, b: false, c: nil } +h.compact #=> { a: 1, b: false } +h #=> { a: 1, b: false, c: nil } +``` + +### `#fetch(key [, default] ) -> obj` + +### `#fetch(key) {| key | block } -> obj` + +Returns a value from the hash for the given key. +If the key can't be found, there are several options: + +- With no other arguments, it will raise a `KeyError` exception. +- If `default` is given, then that will be returned. +- If the optional code block is specified, then that will be run and its result returned. + +```ruby +h = { "a" => 100, "b" => 200 } +h.fetch("a") #=> 100 +h.fetch("z", "go fish") #=> "go fish" +h.fetch("z") { |el| "go fish, #{el}"} #=> "go fish, z" + +# h.fetch("z") # Raises KeyError: key not found: "z" +``` + +### `#delete_if {| key, value | block } -> hsh` + +### `#delete_if -> an_enumerator` + +Deletes every key-value pair from `hsh` for which `block` evaluates to `true`. +If no block is given, an enumerator is returned instead. + +```ruby +h = { "a" => 100, "b" => 200, "c" => 300 } +h.delete_if {|key, value| key >= "b" } #=> {"a"=>100} +``` + +### `#flatten -> an_array` + +### `#flatten(level) -> an_array` + +Returns a new array that is a one-dimensional flattening of this hash. That is, for every key or value that is an array, extract its elements into the new array. Unlike `Array#flatten`, this method does not flatten recursively by default. The optional `level` argument determines the level of recursion to flatten. + +```ruby +a = {1=> "one", 2 => [2,"two"], 3 => "three"} +a.flatten # => [1, "one", 2, [2, "two"], 3, "three"] +a.flatten(2) # => [1, "one", 2, 2, "two", 3, "three"] +``` + +### `#invert -> new_hash` + +Returns a new hash created by using `hsh`'s values as keys, and the keys as values. If a value appears more than once, the last key encountered will be used due to hash key uniqueness. + +```ruby +h = { "n" => 100, "m" => 100, "y" => 300, "d" => 200, "a" => 0 } +h.invert #=> {0=>"a", 100=>"m", 200=>"d", 300=>"y"} +``` + +### `#keep_if {| key, value | block } -> hsh` + +### `#keep_if -> an_enumerator` + +Deletes every key-value pair from `hsh` for which `block` evaluates to `false`. +If no block is given, an enumerator is returned instead. + +```ruby +h = { "a" => 1, "b" => 2, "c" => 3, "d" => 4 } +h.keep_if {|key, value| value % 2 == 0 } #=> {"b"=>2, "d"=>4} +``` + +### `#key(value) -> key` + +Returns the key of an occurrence of a given value. If the value is not found, returns `nil`. + +```ruby +h = { "a" => 100, "b" => 200, "c" => 300, "d" => 300 } +h.key(200) #=> "b" +h.key(300) #=> "c" (returns the first key found for the value) +h.key(999) #=> nil +``` + +### `#to_h -> hsh or new_hash` + +Returns `self`. If called on a subclass of Hash, converts the receiver to a Hash object. For a Hash instance, it simply returns `self`. + +```ruby +h = { "a" => 1, "b" => 2 } +h.to_h #=> {"a"=>1, "b"=>2} + +class MyHash < Hash; end +my_h = MyHash["c" => 3, "d" => 4] +my_h.to_h #=> {"c"=>3, "d"=>4} (returns a Hash object, not MyHash) +``` + +### `#< other_hash -> true or false` + +Returns `true` if `hsh` is a proper subset of `other_hash` (i.e., `other_hash` contains all key/value pairs of `hsh`, and `other_hash` has at least one additional key/value pair). + +```ruby +h1 = {a:1, b:2} +h2 = {a:1, b:2, c:3} +h1 < h2 #=> true +h2 < h1 #=> false +h1 < h1 #=> false +``` + +### `#<= other_hash -> true or false` + +Returns `true` if `hsh` is a subset of `other_hash` or is equal to `other_hash` (i.e., `other_hash` contains all key/value pairs of `hsh`). + +```ruby +h1 = {a:1, b:2} +h2 = {a:1, b:2, c:3} +h1 <= h2 #=> true +h2 <= h1 #=> false +h1 <= h1 #=> true +``` + +### `#> other_hash -> true or false` + +Returns `true` if `other_hash` is a proper subset of `hsh` (i.e., `hsh` contains all key/value pairs of `other_hash`, and `hsh` has at least one additional key/value pair). + +```ruby +h1 = {a:1, b:2} +h2 = {a:1, b:2, c:3} +h1 > h2 #=> false +h2 > h1 #=> true +h1 > h1 #=> false +``` + +### `#>= other_hash -> true or false` + +Returns `true` if `other_hash` is a subset of `hsh` or is equal to `hsh` (i.e., `hsh` contains all key/value pairs of `other_hash`). + +```ruby +h1 = {a:1, b:2} +h2 = {a:1, b:2, c:3} +h1 >= h2 #=> false +h2 >= h1 #=> true +h1 >= h1 #=> true +``` + +### `#dig(key,...) -> object` + +Extracts the nested value specified by the sequence of `key` objects by calling `dig` at each step. Returns `nil` if any intermediate step is `nil`. + +```ruby +h = { a: { b: { c: 1 } } } +h.dig(:a, :b, :c) #=> 1 +h.dig(:a, :x, :c) #=> nil + +g = { a: [1, 2, {b: 3}] } +g.dig(:a, 2, :b) #=> 3 +g.dig(:a, 1, :b) #=> nil (element at index 1 is 2, which does not respond to #dig) +``` + +### `#transform_keys {|key| block } -> new_hash` + +### `#transform_keys -> an_enumerator` + +Returns a new hash, with the keys computed from running the block once for each key in the hash, and the values unchanged. +If no block is given, an enumerator is returned instead. + +```ruby +h = { a: 1, b: 2, c: 3 } +h.transform_keys {|k| k.to_s.upcase } #=> {"A"=>1, "B"=>2, "C"=>3} +``` + +### `#transform_keys! {|key| block } -> hsh` + +### `#transform_keys! -> an_enumerator` + +Invokes the given block once for each key in `hsh`, replacing it with the new key returned by the block, and then returns `hsh`. +If no block is given, an enumerator is returned instead. + +```ruby +h = { a: 1, b: 2, c: 3 } +h.transform_keys! {|k| k.to_s.upcase } #=> {"A"=>1, "B"=>2, "C"=>3} +h #=> {"A"=>1, "B"=>2, "C"=>3} +``` + +### `#transform_values {|value| block } -> new_hash` + +### `#transform_values -> an_enumerator` + +Returns a new hash with the results of running the block once for every value. This method does not change the keys. +If no block is given, an enumerator is returned instead. + +```ruby +h = { a: 1, b: 2, c: 3 } +h.transform_values {|v| v * v } #=> {a: 1, b: 4, c: 9} +``` + +### `#transform_values! {|value| block } -> hsh` + +### `#transform_values! -> an_enumerator` + +Invokes the given block once for each value in the hash, replacing it with the new value returned by the block, and then returns `hsh`. +If no block is given, an enumerator is returned instead. + +```ruby +h = { a: 1, b: 2, c: 3 } +h.transform_values! {|v| v * v } #=> {a: 1, b: 4, c: 9} +h #=> {a: 1, b: 4, c: 9} +``` + +### `#to_proc -> a_proc` + +Returns a `Proc` object that maps a key to its corresponding value in the hash. This allows a hash to be used as a block argument. + +```ruby +h = {a: 1, b: 2} +p = h.to_proc +p.call(:a) #=> 1 + +['a', 'b', 'c'].map(&h) #=> [1, 2, nil] (uses h[x] for each element) +``` + +### `#fetch_values(key, ...) -> array` + +### `#fetch_values(key, ...) { |key| block } -> array` + +Returns an array containing the values associated with the given keys. Raises `KeyError` if any of the keys can't be found, unless a block is provided to compute a default value. + +```ruby +h = { "cat" => "feline", "dog" => "canine", "cow" => "bovine" } + +h.fetch_values("cow", "cat") #=> ["bovine", "feline"] +# h.fetch_values("cow", "bird") # raises KeyError: key not found: "bird" +h.fetch_values("cow", "bird") { |k| k.upcase } #=> ["bovine", "BIRD"] +``` + +### `#filter {| key, value | block } -> new_hsh` + +### `#filter -> an_enumerator` + +(Alias for `select`) +Returns a new hash consisting of entries for which the block returns a true value. +If no block is given, an enumerator is returned instead. + +```ruby +h = { "a" => 100, "b" => 200, "c" => 300 } +h.filter {|key, value| key < "b"} #=> {"a"=>100} +h.filter {|key, value| value < 200} #=> {"a"=>100} +``` + +### `#filter! {| key, value | block } -> hsh_or_nil` + +### `#filter! -> an_enumerator` + +(Alias for `select!`) +Equivalent to `Hash#keep_if`, but returns `nil` if no changes were made. +Deletes every key-value pair from `hsh` for which `block` evaluates to `false`. +If no block is given, an enumerator is returned instead. + +```ruby +h = { "a" => 100, "b" => 200, "c" => 300 } +h.filter! {|key, value| key < "b"} #=> {"a"=>100} +h #=> {"a"=>100} +h.filter! {|key, value| key < "a"} #=> nil (no changes) +``` diff --git a/mrbgems/mruby-hash-ext/mrblib/hash.rb b/mrbgems/mruby-hash-ext/mrblib/hash.rb index ee48117736..e115598724 100644 --- a/mrbgems/mruby-hash-ext/mrblib/hash.rb +++ b/mrbgems/mruby-hash-ext/mrblib/hash.rb @@ -1,71 +1,15 @@ class Hash - # ISO does not define Hash#each_pair, so each_pair is defined in gem. + # ISO does not define Hash#each_pair, so each_pair is defined in gem. alias each_pair each - ## - # call-seq: - # Hash[ key, value, ... ] -> new_hash - # Hash[ [ [key, value], ... ] ] -> new_hash - # Hash[ object ] -> new_hash - # - # Creates a new hash populated with the given objects. - # - # Similar to the literal `{ _key_ => _value_, ... }`. In the first - # form, keys and values occur in pairs, so there must be an even number of - # arguments. - # - # The second and third form take a single argument which is either an array - # of key-value pairs or an object convertible to a hash. - # - # Hash["a", 100, "b", 200] #=> {"a"=>100, "b"=>200} - # Hash[ [ ["a", 100], ["b", 200] ] ] #=> {"a"=>100, "b"=>200} - # Hash["a" => 100, "b" => 200] #=> {"a"=>100, "b"=>200} - # - - def self.[](*object) - length = object.length - if length == 1 - o = object[0] - if Hash === o - h = self.new - o.each { |k, v| h[k] = v } - return h - elsif o.respond_to?(:to_a) - h = self.new - o.to_a.each do |i| - raise ArgumentError, "wrong element type #{i.class} (expected array)" unless i.respond_to?(:to_a) - k, v = nil - case i.size - when 2 - k = i[0] - v = i[1] - when 1 - k = i[0] - else - raise ArgumentError, "invalid number of elements (#{i.size} for 1..2)" - end - h[k] = v - end - return h - end - end - unless length % 2 == 0 - raise ArgumentError, 'odd number of arguments for Hash' - end - h = self.new - 0.step(length - 2, 2) do |i| - h[object[i]] = object[i + 1] - end - h - end ## # call-seq: # hsh.merge!(other_hash..) -> hsh # hsh.merge!(other_hash..){|key, oldval, newval| block} -> hsh # - # Adds the contents of _other_hash_ to _hsh_. If no block is specified, + # Adds the contents of _other_hash_ to _hsh_. If no block is specified, # entries with duplicate keys are overwritten with the values from # _other_hash_, otherwise the value of each duplicate key is determined by # calling the block with the key, its value in _hsh_ and its value in @@ -88,13 +32,9 @@ def merge!(*others, &block) other = others[i] i += 1 raise TypeError, "Hash required (#{other.class} given)" unless Hash === other - if block - other.each_key{|k| - self[k] = (self.has_key?(k))? block.call(k, self[k], other[k]): other[k] - } - else - other.each_key{|k| self[k] = other[k]} - end + other.each_key {|k| + self[k] = (self.has_key?(k))? block.call(k, self[k], other[k]): other[k] + } end self end @@ -113,16 +53,7 @@ def merge!(*others, &block) # def compact! - keys = self.keys - nk = keys.select{|k| - self[k] != nil - } - return nil if (keys.size == nk.size) - h = {} - nk.each {|k| - h[k] = self[k] - } - self.replace(h) + self.__compact end ## @@ -136,12 +67,8 @@ def compact! # h #=> { a: 1, b: false, c: nil } # def compact - h = {} - self.keys.select{|k| - self[k] != nil - }.each {|k| - h[k] = self[k] - } + h=self.dup + h.__compact h end @@ -152,7 +79,7 @@ def compact # # Returns a value from the hash for the given key. If the key can't be # found, there are several options: With no other arguments, it will - # raise an KeyError exception; if default is + # raise an `KeyError` exception; if *default* is # given, then that will be returned; if the optional code block is # specified, then that will be run and its result returned. # @@ -177,7 +104,7 @@ def fetch(key, none=NONE, &block) unless self.key?(key) if block block.call(key) - elsif none != NONE + elsif !NONE.equal?(none) none else raise KeyError, "Key not found: #{key.inspect}" @@ -192,8 +119,8 @@ def fetch(key, none=NONE, &block) # hsh.delete_if {| key, value | block } -> hsh # hsh.delete_if -> an_enumerator # - # Deletes every key-value pair from hsh for which block - # evaluates to true. + # Deletes every key-value pair from *hsh* for which *block* + # evaluates to `true`. # # If no block is given, an enumerator is returned instead. # @@ -202,7 +129,7 @@ def fetch(key, none=NONE, &block) # def delete_if(&block) - return to_enum :delete_if unless block + return to_enum(:delete_if) unless block self.each do |k, v| self.delete(k) if block.call(k, v) @@ -217,9 +144,9 @@ def delete_if(&block) # # Returns a new array that is a one-dimensional flattening of this # hash. That is, for every key or value that is an array, extract - # its elements into the new array. Unlike Array#flatten, this - # method does not flatten recursively by default. The optional - # level argument determines the level of recursion to flatten. + # its elements into the new array. Unlike Array#flatten, this + # method does not flatten recursively by default. The optional + # *level* argument determines the level of recursion to flatten. # # a = {1=> "one", 2 => [2,"two"], 3 => "three"} # a.flatten # => [1, "one", 2, [2, "two"], 3, "three"] @@ -234,7 +161,7 @@ def flatten(level=1) # call-seq: # hsh.invert -> new_hash # - # Returns a new hash created by using hsh's values as keys, and + # Returns a new hash created by using *hsh*'s values as keys, and # the keys as values. # # h = { "n" => 100, "m" => 100, "y" => 300, "d" => 200, "a" => 0 } @@ -252,14 +179,14 @@ def invert # hsh.keep_if {| key, value | block } -> hsh # hsh.keep_if -> an_enumerator # - # Deletes every key-value pair from hsh for which block + # Deletes every key-value pair from *hsh* for which *block* # evaluates to false. # # If no block is given, an enumerator is returned instead. # def keep_if(&block) - return to_enum :keep_if unless block + return to_enum(:keep_if) unless block self.each do |k, v| unless block.call([k, v]) @@ -269,31 +196,12 @@ def keep_if(&block) self end - ## - # call-seq: - # hsh.key(value) -> key - # - # Returns the key of an occurrence of a given value. If the value is - # not found, returns nil. - # - # h = { "a" => 100, "b" => 200, "c" => 300, "d" => 300 } - # h.key(200) #=> "b" - # h.key(300) #=> "c" - # h.key(999) #=> nil - # - - def key(val) - self.each do |k, v| - return k if v == val - end - nil - end ## # call-seq: # hsh.to_h -> hsh or new_hash # - # Returns +self+. If called on a subclass of Hash, converts + # Returns `self`. If called on a subclass of Hash, converts # the receiver to a Hash object. # def to_h @@ -304,8 +212,8 @@ def to_h # call-seq: # hash < other -> true or false # - # Returns true if hash is subset of - # other. + # Returns `true` if *hash* is subset of + # *other*. # # h1 = {a:1, b:2} # h2 = {a:1, b:2, c:3} @@ -324,8 +232,8 @@ def <(hash) # call-seq: # hash <= other -> true or false # - # Returns true if hash is subset of - # other or equals to other. + # Returns `true` if *hash* is subset of + # *other* or equals to *other*. # # h1 = {a:1, b:2} # h2 = {a:1, b:2, c:3} @@ -344,8 +252,8 @@ def <=(hash) # call-seq: # hash > other -> true or false # - # Returns true if other is subset of - # hash. + # Returns `true` if *other* is subset of + # *hash*. # # h1 = {a:1, b:2} # h2 = {a:1, b:2, c:3} @@ -364,8 +272,8 @@ def >(hash) # call-seq: # hash >= other -> true or false # - # Returns true if other is subset of - # hash or equals to hash. + # Returns `true` if *other* is subset of + # *hash* or equals to *hash*. # # h1 = {a:1, b:2} # h2 = {a:1, b:2, c:3} @@ -384,9 +292,9 @@ def >=(hash) # call-seq: # hsh.dig(key,...) -> object # - # Extracts the nested value specified by the sequence of key - # objects by calling +dig+ at each step, returning +nil+ if any - # intermediate step is +nil+. + # Extracts the nested value specified by the sequence of *key* + # objects by calling `dig` at each step, returning `nil` if any + # intermediate step is `nil`. # def dig(idx,*args) n = self[idx] @@ -408,7 +316,7 @@ def dig(idx,*args) # If no block is given, an enumerator is returned instead. # def transform_keys(&block) - return to_enum :transform_keys unless block + return to_enum(:transform_keys) unless block hash = {} self.keys.each do |k| new_key = block.call(k) @@ -421,19 +329,15 @@ def transform_keys(&block) # hsh.transform_keys! {|key| block } -> hsh # hsh.transform_keys! -> an_enumerator # - # Invokes the given block once for each key in hsh, replacing it - # with the new key returned by the block, and then returns hsh. + # Invokes the given block once for each key in *hsh*, replacing it + # with the new key returned by the block, and then returns *hsh*. # # If no block is given, an enumerator is returned instead. # def transform_keys!(&block) - return to_enum :transform_keys! unless block - self.keys.each do |k| - value = self[k] - self.__delete(k) - k = block.call(k) if block - self[k] = value - end + return to_enum(:transform_keys!) unless block + hash = self.transform_keys(&block) + self.replace(hash) self end ## @@ -448,7 +352,7 @@ def transform_keys!(&block) # If no block is given, an enumerator is returned instead. # def transform_values(&b) - return to_enum :transform_values unless block_given? + return to_enum(:transform_values) unless block_given? hash = {} self.keys.each do |k| hash[k] = yield(self[k]) @@ -462,12 +366,12 @@ def transform_values(&b) # hsh.transform_values! -> an_enumerator # # Invokes the given block once for each value in the hash, replacing - # with the new value returned by the block, and then returns hsh. + # with the new value returned by the block, and then returns *hsh*. # # If no block is given, an enumerator is returned instead. # def transform_values!(&b) - return to_enum :transform_values! unless block_given? + return to_enum(:transform_values!) unless block_given? self.keys.each do |k| self[k] = yield(self[k]) end @@ -484,8 +388,8 @@ def to_proc # hsh.fetch_values(key, ...) { |key| block } -> array # # Returns an array containing the values associated with the given keys - # but also raises KeyError when one of keys can't be found. - # Also see Hash#values_at and Hash#fetch. + # but also raises `KeyError` when one of keys can't be found. + # Also see `Hash#values_at` and `Hash#fetch`. # # h = { "cat" => "feline", "dog" => "canine", "cow" => "bovine" } # diff --git a/mrbgems/mruby-hash-ext/src/hash-ext.c b/mrbgems/mruby-hash-ext/src/hash-ext.c deleted file mode 100644 index 9c85858fe6..0000000000 --- a/mrbgems/mruby-hash-ext/src/hash-ext.c +++ /dev/null @@ -1,111 +0,0 @@ -/* -** hash.c - Hash class -** -** See Copyright Notice in mruby.h -*/ - -#include -#include -#include - -/* - * call-seq: - * hsh.values_at(key, ...) -> array - * - * Return an array containing the values associated with the given keys. - * Also see Hash.select. - * - * h = { "cat" => "feline", "dog" => "canine", "cow" => "bovine" } - * h.values_at("cow", "cat") #=> ["bovine", "feline"] - */ - -static mrb_value -hash_values_at(mrb_state *mrb, mrb_value hash) -{ - const mrb_value *argv; - mrb_value result; - mrb_int argc, i; - int ai; - - mrb_get_args(mrb, "*", &argv, &argc); - result = mrb_ary_new_capa(mrb, argc); - ai = mrb_gc_arena_save(mrb); - for (i = 0; i < argc; i++) { - mrb_ary_push(mrb, result, mrb_hash_get(mrb, hash, argv[i])); - mrb_gc_arena_restore(mrb, ai); - } - return result; -} - -/* - * call-seq: - * hsh.slice(*keys) -> a_hash - * - * Returns a hash containing only the given keys and their values. - * - * h = { a: 100, b: 200, c: 300 } - * h.slice(:a) #=> {:a=>100} - * h.slice(:b, :c, :d) #=> {:b=>200, :c=>300} - */ -static mrb_value -hash_slice(mrb_state *mrb, mrb_value hash) -{ - const mrb_value *argv; - mrb_value result; - mrb_int argc, i; - - mrb_get_args(mrb, "*", &argv, &argc); - result = mrb_hash_new_capa(mrb, argc); - if (argc == 0) return result; /* empty hash */ - for (i = 0; i < argc; i++) { - mrb_value key = argv[i]; - mrb_value val; - - val = mrb_hash_fetch(mrb, hash, key, mrb_undef_value()); - if (!mrb_undef_p(val)) { - mrb_hash_set(mrb, result, key, val); - } - } - return result; -} - -/* - * call-seq: - * hsh.except(*keys) -> a_hash - * - * Returns a hash excluding the given keys and their values. - * - * h = { a: 100, b: 200, c: 300 } - * h.except(:a) #=> {:b=>200, :c=>300} - * h.except(:b, :c, :d) #=> {:a=>100} - */ -static mrb_value -hash_except(mrb_state *mrb, mrb_value hash) -{ - const mrb_value *argv; - mrb_value result; - mrb_int argc, i; - - mrb_get_args(mrb, "*", &argv, &argc); - result = mrb_hash_dup(mrb, hash); - for (i = 0; i < argc; i++) { - mrb_hash_delete_key(mrb, result, argv[i]); - } - return result; -} - -void -mrb_mruby_hash_ext_gem_init(mrb_state *mrb) -{ - struct RClass *h; - - h = mrb->hash_class; - mrb_define_method(mrb, h, "values_at", hash_values_at, MRB_ARGS_ANY()); - mrb_define_method(mrb, h, "slice", hash_slice, MRB_ARGS_ANY()); - mrb_define_method(mrb, h, "except", hash_except, MRB_ARGS_ANY()); -} - -void -mrb_mruby_hash_ext_gem_final(mrb_state *mrb) -{ -} diff --git a/mrbgems/mruby-hash-ext/src/hash_ext.c b/mrbgems/mruby-hash-ext/src/hash_ext.c new file mode 100644 index 0000000000..567f96366d --- /dev/null +++ b/mrbgems/mruby-hash-ext/src/hash_ext.c @@ -0,0 +1,391 @@ +/* +** hash.c - Hash class +** +** See Copyright Notice in mruby.h +*/ + +#include +#include +#include +#include +#include + +/* + * call-seq: + * hsh.values_at(key, ...) -> array + * + * Return an array containing the values associated with the given keys. + * Also see `Hash.select`. + * + * h = { "cat" => "feline", "dog" => "canine", "cow" => "bovine" } + * h.values_at("cow", "cat") #=> ["bovine", "feline"] + */ + +static mrb_value +hash_values_at(mrb_state *mrb, mrb_value hash) +{ + const mrb_value *argv; + mrb_int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + mrb_value result = mrb_ary_new_capa(mrb, argc); + if (argc == 0) return result; + + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < argc; i++) { + mrb_ary_push(mrb, result, mrb_hash_get(mrb, hash, argv[i])); + mrb_gc_arena_restore(mrb, ai); + } + return result; +} + +/* + * call-seq: + * hsh.slice(*keys) -> a_hash + * + * Returns a hash containing only the given keys and their values. + * + * h = { a: 100, b: 200, c: 300 } + * h.slice(:a) #=> {:a=>100} + * h.slice(:b, :c, :d) #=> {:b=>200, :c=>300} + */ +static mrb_value +hash_slice(mrb_state *mrb, mrb_value hash) +{ + const mrb_value *argv; + mrb_int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + mrb_value result = mrb_hash_new_capa(mrb, argc); + if (argc == 0) return result; /* empty hash */ + for (mrb_int i = 0; i < argc; i++) { + mrb_value key = argv[i]; + mrb_value val; + + val = mrb_hash_fetch(mrb, hash, key, mrb_undef_value()); + if (!mrb_undef_p(val)) { + mrb_hash_set(mrb, result, key, val); + } + } + return result; +} + +struct slice_bang_i_arg { + mrb_value keep_keys; + mrb_value keys_to_remove; +}; + +/* + * Iterator for `slice!`. + * + * Iterates over a hash, identifying keys that are not present in the `keep_keys` + * hash. Keys identified for removal are appended to the `keys_to_remove` array. + * + * @param mrb The mruby state. + * @param key The key of the current hash entry. + * @param val The value of the current hash entry (unused). + * @param data A pointer to a `slice_bang_i_arg` struct, which contains + * `keep_keys` and `keys_to_remove`. + * @return Always returns `0` to ensure the iteration continues over all + * hash entries. + */ +static int +slice_bang_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data) +{ + struct slice_bang_i_arg *args = (struct slice_bang_i_arg *)data; + if (!mrb_hash_key_p(mrb, args->keep_keys, key)) { + mrb_ary_push(mrb, args->keys_to_remove, key); + } + return 0; /* Continue iteration */ +} + +/* + * call-seq: + * hsh.slice!(*keys) -> a_hash + * + * Deletes keys from hsh that are not in `keys`. + * Returns a new hash containing the deleted key-value pairs. + * + * h = { a: 1, b: 2, c: 3, d: 4 } + * h.slice!(:a, :c) #=> { b: 2, d: 4 } + * h #=> { a: 1, c: 3 } + */ +static mrb_value +hash_slice_bang(mrb_state *mrb, mrb_value self) +{ + struct slice_bang_i_arg args; + const mrb_value *argv; + mrb_int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + + args.keep_keys = mrb_hash_new_capa(mrb, argc); + for (mrb_int i = 0; i < argc; i++) { + mrb_hash_set(mrb, args.keep_keys, argv[i], mrb_true_value()); + } + + args.keys_to_remove = mrb_ary_new(mrb); + mrb_hash_foreach(mrb, mrb_hash_ptr(self), slice_bang_i, &args); + + mrb_int len = RARRAY_LEN(args.keys_to_remove); + mrb_value removed_hash = mrb_hash_new_capa(mrb, len); + for (mrb_int i = 0; i < len; i++) { + mrb_value key = mrb_ary_ref(mrb, args.keys_to_remove, i); + mrb_value val = mrb_hash_delete_key(mrb, self, key); + mrb_hash_set(mrb, removed_hash, key, val); + } + + return removed_hash; +} + +/* + * call-seq: + * hsh.except(*keys) -> a_hash + * + * Returns a hash excluding the given keys and their values. + * + * h = { a: 100, b: 200, c: 300 } + * h.except(:a) #=> {:b=>200, :c=>300} + * h.except(:b, :c, :d) #=> {:a=>100} + */ +static mrb_value +hash_except(mrb_state *mrb, mrb_value hash) +{ + const mrb_value *argv; + mrb_int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + mrb_value result = mrb_hash_dup(mrb, hash); + for (mrb_int i = 0; i < argc; i++) { + mrb_hash_delete_key(mrb, result, argv[i]); + } + return result; +} + +/* + * call-seq: + * Hash[ key, value, ... ] -> new_hash + * Hash[ [ [key, value], ... ] ] -> new_hash + * Hash[ object ] -> new_hash + * + * Creates a new hash populated with the given objects. + * + * Similar to the literal `{ _key_ => _value_, ... }`. In the first + * form, keys and values occur in pairs, so there must be an even number of + * arguments. + * + * The second and third form take a single argument which is either an array + * of key-value pairs or an object convertible to a hash. + * + * Hash["a", 100, "b", 200] #=> {"a"=>100, "b"=>200} + * Hash[ [ ["a", 100], ["b", 200] ] ] #=> {"a"=>100, "b"=>200} + * Hash["a" => 100, "b" => 200] #=> {"a"=>100, "b"=>200} + */ +static mrb_value +hash_s_create(mrb_state *mrb, mrb_value klass) +{ + const mrb_value *argv; + mrb_int argc; + mrb_value hash; + + mrb_get_args(mrb, "*", &argv, &argc); + + if (argc == 1) { + mrb_value obj = argv[0]; + + /* Case 1: Hash argument - copy constructor */ + if (mrb_hash_p(obj)) { + hash = mrb_hash_new(mrb); + mrb_hash_merge(mrb, hash, obj); + /* Set the correct class if it's a subclass */ + if (mrb_class_ptr(klass) != mrb->hash_class) { + mrb_obj_ptr(hash)->c = mrb_class_ptr(klass); + } + return hash; + } + + /* Case 2: Array argument with nested arrays */ + if (mrb_array_p(obj)) { + mrb_int ary_len = RARRAY_LEN(obj); + hash = mrb_hash_new_capa(mrb, ary_len); + + for (mrb_int i = 0; i < ary_len; i++) { + mrb_value elem = mrb_ary_ref(mrb, obj, i); + mrb_value key = mrb_nil_value(), val = mrb_nil_value(); + + if (!mrb_array_p(elem)) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, + "wrong element type %C (expected array)", mrb_obj_class(mrb, elem)); + } + + mrb_int elem_len = RARRAY_LEN(elem); + + switch (elem_len) { + case 2: + key = mrb_ary_ref(mrb, elem, 0); + val = mrb_ary_ref(mrb, elem, 1); + break; + case 1: + key = mrb_ary_ref(mrb, elem, 0); + val = mrb_nil_value(); + break; + case 0: + default: + mrb_raisef(mrb, E_ARGUMENT_ERROR, + "invalid number of elements (%i for 1..2)", elem_len); + } + + mrb_hash_set(mrb, hash, key, val); + } + /* Set the correct class if it's a subclass */ + if (mrb_class_ptr(klass) != mrb->hash_class) { + mrb_obj_ptr(hash)->c = mrb_class_ptr(klass); + } + return hash; + } + } + + /* Case 3: Multiple arguments as key-value pairs */ + if (argc % 2 != 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "odd number of arguments for Hash"); + } + + hash = mrb_hash_new_capa(mrb, argc / 2); + for (mrb_int i = 0; i < argc; i += 2) { + mrb_hash_set(mrb, hash, argv[i], argv[i + 1]); + } + + /* Set the correct class if it's a subclass */ + if (mrb_class_ptr(klass) != mrb->hash_class) { + mrb_obj_ptr(hash)->c = mrb_class_ptr(klass); + } + + return hash; +} + +/* Data structure for hash_key search */ +struct key_search { + mrb_value target; + mrb_value result; + mrb_bool found; +}; + +/* + * Iterator for `key`. + * + * This function is designed to be used with `mrb_hash_foreach`. It iterates + * over hash entries to find a key that corresponds to a specific target value. + * + * When a match is found, it stores the key, sets a flag, and stops the + * iteration. + * + * @param mrb The mruby state. + * @param key The key of the current hash entry. + * @param val The value of the current hash entry. + * @param data A pointer to a `key_search` struct, which contains the + * target value and holds the result. + * @return Returns `1` to stop the iteration once a match is found, + * otherwise returns `0` to continue. + */ +static int +hash_key_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data) +{ + struct key_search *search = (struct key_search*)data; + if (mrb_equal(mrb, val, search->target)) { + search->result = key; + search->found = TRUE; + return 1; /* Stop iteration */ + } + return 0; /* Continue iteration */ +} + +/* + * call-seq: + * hsh.key(value) -> key + * + * Returns the key of an occurrence of a given value. If the value is + * not found, returns `nil`. + * + * h = { "a" => 100, "b" => 200, "c" => 300, "d" => 300 } + * h.key(200) #=> "b" + * h.key(300) #=> "c" or "d" + * h.key(999) #=> nil + */ +static mrb_value +hash_key(mrb_state *mrb, mrb_value hash) +{ + mrb_value val; + struct key_search search; + + mrb_get_args(mrb, "o", &val); + + search.target = val; + search.result = mrb_nil_value(); + search.found = FALSE; + + mrb_hash_foreach(mrb, mrb_hash_ptr(hash), hash_key_i, &search); + + return search.found ? search.result : mrb_nil_value(); +} + +/* + * call-seq: + * hsh.__merge(*others) -> hsh + * + * Merges multiple hashes into hsh. This is an internal method + * used by merge! for non-block cases. + * + * Raises ArgumentError if no arguments given. + * Raises TypeError if any argument is not a Hash. + * + * h = { a: 1, b: 2 } + * h.__merge({ c: 3 }, { d: 4 }) #=> { a: 1, b: 2, c: 3, d: 4 } + */ +static mrb_value +hash_merge(mrb_state *mrb, mrb_value hash) +{ + const mrb_value *argv; + mrb_int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + + /* Validate arguments */ + if (argc == 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (given 0, expected 1+)"); + } + + /* Merge multiple hashes in C */ + for (mrb_int i = 0; i < argc; i++) { + if (!mrb_hash_p(argv[i])) { + mrb_raisef(mrb, E_TYPE_ERROR, "no implicit conversion of %C into Hash", + mrb_obj_class(mrb, argv[i])); + } + mrb_hash_merge(mrb, hash, argv[i]); + } + + return hash; +} + +static const mrb_mt_entry hash_ext_rom_entries[] = { + MRB_MT_ENTRY(hash_values_at, MRB_SYM(values_at), MRB_ARGS_ANY()), + MRB_MT_ENTRY(hash_slice, MRB_SYM(slice), MRB_ARGS_ANY()), + MRB_MT_ENTRY(hash_slice_bang, MRB_SYM_B(slice), MRB_ARGS_ANY()), + MRB_MT_ENTRY(hash_except, MRB_SYM(except), MRB_ARGS_ANY()), + MRB_MT_ENTRY(hash_key, MRB_SYM(key), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(hash_merge, MRB_SYM(__merge), MRB_ARGS_ANY()), +}; + +void +mrb_mruby_hash_ext_gem_init(mrb_state *mrb) +{ + struct RClass *h; + + h = mrb->hash_class; + MRB_MT_INIT_ROM(mrb, h, hash_ext_rom_entries); + mrb_define_class_method_id(mrb, h, MRB_OPSYM(aref), hash_s_create, MRB_ARGS_ANY()); +} + +void +mrb_mruby_hash_ext_gem_final(mrb_state *mrb) +{ +} diff --git a/mrbgems/mruby-hash-ext/test/hash.rb b/mrbgems/mruby-hash-ext/test/hash.rb index e60eb09b21..13b8df6129 100644 --- a/mrbgems/mruby-hash-ext/test/hash.rb +++ b/mrbgems/mruby-hash-ext/test/hash.rb @@ -46,27 +46,27 @@ end assert('Hash#merge!') do + # Single hash merge a = { 'abc_key' => 'abc_value', 'cba_key' => 'cba_value' } - b = { 'cba_key' => 'XXX', 'xyz_key' => 'xyz_value' } - + b = { 'cba_key' => 'XXX', 'xyz_key' => 'xyz_value' } result_1 = a.merge! b + assert_equal({'abc_key' => 'abc_value', 'cba_key' => 'XXX', + 'xyz_key' => 'xyz_value' }, result_1) + # Block handling a = { 'abc_key' => 'abc_value', 'cba_key' => 'cba_value' } result_2 = a.merge!(b) do |key, original, new| original end - - assert_equal({'abc_key' => 'abc_value', 'cba_key' => 'XXX', - 'xyz_key' => 'xyz_value' }, result_1) assert_equal({'abc_key' => 'abc_value', 'cba_key' => 'cba_value', 'xyz_key' => 'xyz_value' }, result_2) - assert_raise(TypeError) do - { 'abc_key' => 'abc_value' }.merge! "a" - end - - # multiple arguments + # Multiple arguments assert_equal({a:1,b:2,c:3}, {a:1}.merge!({b:2},{c:3})) + + # Error cases + assert_raise(ArgumentError) { {}.merge!() } + assert_raise(TypeError) { {}.merge!("not a hash") } end assert('Hash#values_at') do @@ -89,8 +89,8 @@ assert('Hash#compact!') do h = { "cat" => "feline", "dog" => nil, "cow" => false } - h.compact! - assert_equal({ "cat" => "feline", "cow" => false }, h) + assert_equal({ "cat" => "feline", "cow" => false }, h.compact!) + assert_nil(h.compact!) end assert('Hash#fetch') do @@ -292,6 +292,23 @@ assert_equal({:b=>200, :c=>300}, h.slice(:b, :c, :d)) end +assert("Hash#slice!") do + h = { a: 1, b: 2, c: 3, d: 4 } + removed = h.slice!(:a, :c) + assert_equal({ a: 1, c: 3 }, h) + assert_equal({ b: 2, d: 4 }, removed) + + h = { a: 1, b: 2 } + removed = h.slice!() + assert_equal({}, h) + assert_equal({ a: 1, b: 2 }, removed) + + h = { a: 1, b: 2 } + removed = h.slice!(:a, :b, :c) + assert_equal({ a: 1, b: 2 }, h) + assert_equal({}, removed) +end + assert("Hash#except") do h = { a: 100, b: 200, c: 300 } assert_equal({:b=>200, :c=>300}, h.except(:a)) diff --git a/mrbgems/mruby-io/README.md b/mrbgems/mruby-io/README.md index 3e601b62ac..390289e29e 100644 --- a/mrbgems/mruby-io/README.md +++ b/mrbgems/mruby-io/README.md @@ -14,10 +14,10 @@ Add the line below to your build configuration. ### IO -* +- | method | mruby-io | memo | -|----------------------------|----------|----------| +| -------------------------- | -------- | -------- | | IO.binread | | | | IO.binwrite | | | | IO.copy_stream | | | @@ -33,8 +33,8 @@ Add the line below to your build configuration. | IO.write | | | | IO#<< | | | | IO#advise | | | -| IO#autoclose= | | | -| IO#autoclose? | | | +| IO#autoclose= | o | | +| IO#autoclose? | o | | | IO#binmode | | | | IO#binmode? | | | | IO#bytes | | obsolete | @@ -99,10 +99,10 @@ Add the line below to your build configuration. ### File -* +- | method | mruby-io | memo | -|-----------------------------|----------|----------| +| --------------------------- | -------- | -------- | | File.absolute_path | | | | File.atime | | | | File.basename | o | | @@ -158,17 +158,22 @@ Add the line below to your build configuration. | File.writable? | | FileTest | | File.writable_real? | | FileTest | | File.zero? | o | FileTest | -| File#atime | | | +| File#atime | o | | | File#chmod | | | | File#chown | | | -| File#ctime | | | +| File#ctime | o | | | File#flock | o | | | File#lstat | | | -| File#mtime | | | -| File#path, File#to_path | o | | +| File#mtime | o | | +| File#path | o | | | File#size | | | | File#truncate | | | +## Porting Note + +If your (non Windows) platform does not support `getpwnam(3)` for some reason, define `MRB_IO_NO_PWNAM`. +See [mruby#5358](https://github.com/mruby/mruby/issues/5358). + ## License Copyright (c) 2013 Internet Initiative Japan Inc. diff --git a/mrbgems/mruby-io/include/io_hal.h b/mrbgems/mruby-io/include/io_hal.h new file mode 100644 index 0000000000..cce23f3ac3 --- /dev/null +++ b/mrbgems/mruby-io/include/io_hal.h @@ -0,0 +1,450 @@ +/* +** io_hal.h - IO Hardware Abstraction Layer (HAL) +** +** See Copyright Notice in mruby.h +** +** This header defines the HAL interface for platform-specific I/O operations. +** Platform-specific implementations (hal-posix-io, hal-win-io, etc.) must +** provide all functions declared here. +*/ + +#ifndef MRUBY_IO_HAL_H +#define MRUBY_IO_HAL_H + +#include + +/* + * Platform-independent type definitions + */ + +/* File status structure - platform-independent representation */ +typedef struct mrb_io_stat { + mrb_int st_dev; /* Device ID */ + mrb_int st_ino; /* Inode number */ + mrb_int st_mode; /* File mode/permissions */ + mrb_int st_nlink; /* Number of hard links */ + mrb_int st_uid; /* User ID */ + mrb_int st_gid; /* Group ID */ + mrb_int st_rdev; /* Device ID (if special file) */ + mrb_int st_size; /* File size in bytes */ + mrb_int st_atime; /* Last access time */ + mrb_int st_mtime; /* Last modification time */ + mrb_int st_ctime; /* Last status change time */ + mrb_int st_blksize; /* Block size for filesystem I/O */ + mrb_int st_blocks; /* Number of 512B blocks allocated */ +} mrb_io_stat; + +/* Timeval structure for select() */ +typedef struct mrb_io_timeval { + mrb_int tv_sec; /* Seconds */ + mrb_int tv_usec; /* Microseconds */ +} mrb_io_timeval; + +/* File descriptor set for select() */ +typedef struct mrb_io_fdset mrb_io_fdset; + +/* + * File mode constants (POSIX-style) + */ + +/* File type masks */ +#define MRB_IO_S_IFMT 0170000 /* Type of file mask */ +#define MRB_IO_S_IFSOCK 0140000 /* Socket */ +#define MRB_IO_S_IFLNK 0120000 /* Symbolic link */ +#define MRB_IO_S_IFREG 0100000 /* Regular file */ +#define MRB_IO_S_IFBLK 0060000 /* Block device */ +#define MRB_IO_S_IFDIR 0040000 /* Directory */ +#define MRB_IO_S_IFCHR 0020000 /* Character device */ +#define MRB_IO_S_IFIFO 0010000 /* FIFO */ + +/* File type test macros */ +#define MRB_IO_S_ISREG(m) (((m) & MRB_IO_S_IFMT) == MRB_IO_S_IFREG) +#define MRB_IO_S_ISDIR(m) (((m) & MRB_IO_S_IFMT) == MRB_IO_S_IFDIR) +#define MRB_IO_S_ISCHR(m) (((m) & MRB_IO_S_IFMT) == MRB_IO_S_IFCHR) +#define MRB_IO_S_ISBLK(m) (((m) & MRB_IO_S_IFMT) == MRB_IO_S_IFBLK) +#define MRB_IO_S_ISFIFO(m) (((m) & MRB_IO_S_IFMT) == MRB_IO_S_IFIFO) +#define MRB_IO_S_ISLNK(m) (((m) & MRB_IO_S_IFMT) == MRB_IO_S_IFLNK) +#define MRB_IO_S_ISSOCK(m) (((m) & MRB_IO_S_IFMT) == MRB_IO_S_IFSOCK) + +/* File lock constants */ +#define MRB_IO_LOCK_SH 1 /* Shared lock */ +#define MRB_IO_LOCK_EX 2 /* Exclusive lock */ +#define MRB_IO_LOCK_NB 4 /* Non-blocking */ +#define MRB_IO_LOCK_UN 8 /* Unlock */ + +/* Seek constants */ +#define MRB_IO_SEEK_SET 0 /* Seek from beginning */ +#define MRB_IO_SEEK_CUR 1 /* Seek from current position */ +#define MRB_IO_SEEK_END 2 /* Seek from end */ + +/* + * HAL Interface - File Operations + */ + +/** + * Get file status by path + * + * @param mrb mruby state + * @param path File path (UTF-8) + * @param st Output stat structure + * @return 0 on success, -1 on error (sets errno) + */ +int mrb_hal_io_stat(mrb_state *mrb, const char *path, mrb_io_stat *st); + +/** + * Get file status by descriptor + * + * @param mrb mruby state + * @param fd File descriptor + * @param st Output stat structure + * @return 0 on success, -1 on error (sets errno) + */ +int mrb_hal_io_fstat(mrb_state *mrb, int fd, mrb_io_stat *st); + +/** + * Get link status (don't follow symlinks) + * + * @param mrb mruby state + * @param path File path (UTF-8) + * @param st Output stat structure + * @return 0 on success, -1 on error (sets errno) + */ +int mrb_hal_io_lstat(mrb_state *mrb, const char *path, mrb_io_stat *st); + +/** + * Change file permissions + * + * @param mrb mruby state + * @param path File path (UTF-8) + * @param mode New permission mode + * @return 0 on success, -1 on error (sets errno) + */ +int mrb_hal_io_chmod(mrb_state *mrb, const char *path, mrb_int mode); + +/** + * Set/get file creation mask + * + * @param mrb mruby state + * @param mask New umask value (if < 0, only returns current value) + * @return Previous umask value + */ +mrb_int mrb_hal_io_umask(mrb_state *mrb, mrb_int mask); + +/** + * Truncate file to specified length + * + * @param mrb mruby state + * @param fd File descriptor + * @param length New file length + * @return 0 on success, -1 on error (sets errno) + */ +int mrb_hal_io_ftruncate(mrb_state *mrb, int fd, mrb_int length); + +/** + * Apply or remove advisory lock on file + * + * @param mrb mruby state + * @param fd File descriptor + * @param operation Lock operation (MRB_IO_LOCK_*) + * @return 0 on success, -1 on error (sets errno) + */ +int mrb_hal_io_flock(mrb_state *mrb, int fd, int operation); + +/** + * Delete a file + * + * @param mrb mruby state + * @param path File path (UTF-8) + * @return 0 on success, -1 on error (sets errno) + */ +int mrb_hal_io_unlink(mrb_state *mrb, const char *path); + +/** + * Rename a file + * + * @param mrb mruby state + * @param oldpath Old file path (UTF-8) + * @param newpath New file path (UTF-8) + * @return 0 on success, -1 on error (sets errno) + */ +int mrb_hal_io_rename(mrb_state *mrb, const char *oldpath, const char *newpath); + +/** + * Create a symbolic link + * + * @param mrb mruby state + * @param target Target path (UTF-8) + * @param linkpath Link path (UTF-8) + * @return 0 on success, -1 on error (sets errno) + */ +int mrb_hal_io_symlink(mrb_state *mrb, const char *target, const char *linkpath); + +/** + * Read value of a symbolic link + * + * @param mrb mruby state + * @param path Symlink path (UTF-8) + * @param buf Buffer to store result (UTF-8) + * @param bufsize Buffer size + * @return Number of bytes placed in buf, -1 on error (sets errno) + */ +mrb_int mrb_hal_io_readlink(mrb_state *mrb, const char *path, char *buf, size_t bufsize); + +/** + * Resolve pathname to absolute path + * + * @param mrb mruby state + * @param path Pathname (UTF-8) + * @param resolved Buffer for resolved path (must be at least PATH_MAX size) + * @return Pointer to resolved on success, NULL on error (sets errno) + */ +char* mrb_hal_io_realpath(mrb_state *mrb, const char *path, char *resolved); + +/** + * Get current working directory + * + * @param mrb mruby state + * @param buf Buffer to store result (UTF-8) + * @param size Buffer size + * @return Pointer to buf on success, NULL on error (sets errno) + */ +char* mrb_hal_io_getcwd(mrb_state *mrb, char *buf, size_t size); + +/** + * Get environment variable + * + * @param mrb mruby state + * @param name Variable name + * @return Value string (UTF-8) or NULL if not found + */ +const char* mrb_hal_io_getenv(mrb_state *mrb, const char *name); + +/** + * Get user's home directory + * + * @param mrb mruby state + * @param username User name (NULL for current user) + * @return Home directory path (UTF-8) or NULL on error (sets errno) + */ +const char* mrb_hal_io_gethome(mrb_state *mrb, const char *username); + +/* + * HAL Interface - Core I/O Operations + */ + +/** + * Open file + * + * @param mrb mruby state + * @param path File path (UTF-8) + * @param flags Open flags (O_RDONLY, O_WRONLY, O_RDWR, etc.) + * @param mode Creation mode (used if O_CREAT is set) + * @return File descriptor on success, -1 on error (sets errno) + */ +int mrb_hal_io_open(mrb_state *mrb, const char *path, int flags, mrb_int mode); + +/** + * Close file descriptor + * + * @param mrb mruby state + * @param fd File descriptor + * @return 0 on success, -1 on error (sets errno) + */ +int mrb_hal_io_close(mrb_state *mrb, int fd); + +/** + * Read from file descriptor + * + * @param mrb mruby state + * @param fd File descriptor + * @param buf Buffer to store data + * @param count Maximum bytes to read + * @return Number of bytes read, 0 on EOF, -1 on error (sets errno) + */ +mrb_int mrb_hal_io_read(mrb_state *mrb, int fd, void *buf, size_t count); + +/** + * Write to file descriptor + * + * @param mrb mruby state + * @param fd File descriptor + * @param buf Data to write + * @param count Number of bytes to write + * @return Number of bytes written, -1 on error (sets errno) + */ +mrb_int mrb_hal_io_write(mrb_state *mrb, int fd, const void *buf, size_t count); + +/** + * Reposition file offset + * + * @param mrb mruby state + * @param fd File descriptor + * @param offset Offset value + * @param whence Reference point (MRB_IO_SEEK_SET/CUR/END) + * @return New offset from beginning of file, -1 on error (sets errno) + */ +mrb_int mrb_hal_io_lseek(mrb_state *mrb, int fd, mrb_int offset, int whence); + +/** + * Duplicate file descriptor + * + * @param mrb mruby state + * @param fd File descriptor to duplicate + * @return New descriptor on success, -1 on error (sets errno) + */ +int mrb_hal_io_dup(mrb_state *mrb, int fd); + +/** + * Manipulate file descriptor + * + * @param mrb mruby state + * @param fd File descriptor + * @param cmd Command (F_GETFD, F_SETFD, etc.) + * @param arg Command argument + * @return Depends on command, -1 on error (sets errno) + */ +int mrb_hal_io_fcntl(mrb_state *mrb, int fd, int cmd, int arg); + +/** + * Check if descriptor refers to terminal + * + * @param mrb mruby state + * @param fd File descriptor + * @return 1 if TTY, 0 if not, -1 on error (sets errno) + */ +int mrb_hal_io_isatty(mrb_state *mrb, int fd); + +/** + * Create pipe + * + * @param mrb mruby state + * @param fds Array to store two file descriptors [read_end, write_end] + * @return 0 on success, -1 on error (sets errno) + */ +int mrb_hal_io_pipe(mrb_state *mrb, int fds[2]); + +/* + * HAL Interface - Process Operations + */ + +/** + * Spawn a new process + * + * Creates a new process and executes the command. File descriptors can be + * redirected for stdin/stdout/stderr (-1 means don't redirect). + * + * POSIX: Uses fork() + dup2() + execl() + * Windows: Uses CreateProcess() with STARTUPINFO + * + * @param mrb mruby state + * @param cmd Command to execute (shell command) + * @param stdin_fd File descriptor to use for stdin (-1 = don't redirect) + * @param stdout_fd File descriptor to use for stdout (-1 = don't redirect) + * @param stderr_fd File descriptor to use for stderr (-1 = don't redirect) + * @param pid Output parameter for process ID + * @return 0 on success, -1 on error (sets errno) + */ +int mrb_hal_io_spawn_process(mrb_state *mrb, const char *cmd, + int stdin_fd, int stdout_fd, int stderr_fd, + int *pid); + +/** + * Wait for process to change state + * + * @param mrb mruby state + * @param pid Process ID to wait for + * @param status Output parameter for exit status + * @param options Wait options (0 for blocking wait) + * @return Process ID on success, -1 on error (sets errno) + */ +int mrb_hal_io_waitpid(mrb_state *mrb, int pid, int *status, int options); + +/* + * HAL Interface - I/O Multiplexing + */ + +/** + * Allocate file descriptor set + * + * @param mrb mruby state + * @return Pointer to fdset or NULL on error + */ +mrb_io_fdset* mrb_hal_io_fdset_alloc(mrb_state *mrb); + +/** + * Free file descriptor set + * + * @param mrb mruby state + * @param fdset File descriptor set to free + */ +void mrb_hal_io_fdset_free(mrb_state *mrb, mrb_io_fdset *fdset); + +/** + * Clear file descriptor set + * + * @param mrb mruby state + * @param fdset File descriptor set + */ +void mrb_hal_io_fdset_zero(mrb_state *mrb, mrb_io_fdset *fdset); + +/** + * Add descriptor to set + * + * @param mrb mruby state + * @param fd File descriptor + * @param fdset File descriptor set + */ +void mrb_hal_io_fdset_set(mrb_state *mrb, int fd, mrb_io_fdset *fdset); + +/** + * Check if descriptor is in set + * + * @param mrb mruby state + * @param fd File descriptor + * @param fdset File descriptor set + * @return Non-zero if fd is in set, 0 otherwise + */ +int mrb_hal_io_fdset_isset(mrb_state *mrb, int fd, mrb_io_fdset *fdset); + +/** + * Monitor multiple file descriptors + * + * @param mrb mruby state + * @param nfds Highest file descriptor number + 1 + * @param readfds Set of descriptors to check for reading (NULL = ignore) + * @param writefds Set of descriptors to check for writing (NULL = ignore) + * @param errorfds Set of descriptors to check for errors (NULL = ignore) + * @param timeout Timeout (NULL = block indefinitely) + * @return Number of ready descriptors, 0 on timeout, -1 on error (sets errno) + */ +int mrb_hal_io_select(mrb_state *mrb, int nfds, + mrb_io_fdset *readfds, + mrb_io_fdset *writefds, + mrb_io_fdset *errorfds, + mrb_io_timeval *timeout); + +/* + * HAL Initialization/Finalization + */ + +/** + * Initialize I/O HAL + * + * Called during gem initialization. Platform-specific HAL should perform + * any necessary setup here. + * + * @param mrb mruby state + */ +void mrb_hal_io_init(mrb_state *mrb); + +/** + * Finalize I/O HAL + * + * Called during gem finalization. Platform-specific HAL should perform + * any necessary cleanup here. + * + * @param mrb mruby state + */ +void mrb_hal_io_final(mrb_state *mrb); + +#endif /* MRUBY_IO_HAL_H */ diff --git a/mrbgems/mruby-io/include/mruby/ext/io.h b/mrbgems/mruby-io/include/mruby/io.h similarity index 65% rename from mrbgems/mruby-io/include/mruby/ext/io.h rename to mrbgems/mruby-io/include/mruby/io.h index 714f58fd16..e76241ba61 100644 --- a/mrbgems/mruby-io/include/mruby/ext/io.h +++ b/mrbgems/mruby-io/include/mruby/io.h @@ -15,22 +15,34 @@ extern "C" { #endif -#if defined(MRB_WITHOUT_IO_PREAD_PWRITE) -# undef MRB_WITH_IO_PREAD_PWRITE -#elif !defined(MRB_WITH_IO_PREAD_PWRITE) -# if defined(__unix__) || defined(__MACH__) -# define MRB_WITH_IO_PREAD_PWRITE +#if defined(MRB_NO_IO_PREAD_PWRITE) || defined(MRB_WITHOUT_IO_PREAD_PWRITE) +# undef MRB_USE_IO_PREAD_PWRITE +#elif !defined(MRB_USE_IO_PREAD_PWRITE) +# if defined(__unix__) || defined(__MACH__) || defined(MRB_WITH_IO_PREAD_PWRITE) +# define MRB_USE_IO_PREAD_PWRITE # endif #endif +#define MRB_IO_BUF_SIZE 4096 + +struct mrb_io_buf { + short start; + short len; + char mem[MRB_IO_BUF_SIZE]; +}; + struct mrb_io { int fd; /* file descriptor, or -1 */ int fd2; /* file descriptor to write if it's different from fd, or -1 */ int pid; /* child's pid (for pipes) */ unsigned int readable:1, writable:1, + eof:1, sync:1, - is_socket:1; + is_socket:1, + close_fd:1, + close_fd2:1; + struct mrb_io_buf *buf; }; #define MRB_O_RDONLY 0x0000 @@ -53,14 +65,8 @@ struct mrb_io { #define MRB_O_DSYNC 0x00008000 #define MRB_O_RSYNC 0x00010000 -#define MRB_O_RDONLY_P(f) ((mrb_bool)(((f) & MRB_O_ACCMODE) == MRB_O_RDONLY)) -#define MRB_O_WRONLY_P(f) ((mrb_bool)(((f) & MRB_O_ACCMODE) == MRB_O_WRONLY)) -#define MRB_O_RDWR_P(f) ((mrb_bool)(((f) & MRB_O_ACCMODE) == MRB_O_RDWR)) -#define MRB_O_READABLE_P(f) ((mrb_bool)((((f) & MRB_O_ACCMODE) | 2) == 2)) -#define MRB_O_WRITABLE_P(f) ((mrb_bool)(((((f) & MRB_O_ACCMODE) + 1) & 2) == 2)) - -#define E_IO_ERROR (mrb_exc_get(mrb, "IOError")) -#define E_EOF_ERROR (mrb_exc_get(mrb, "EOFError")) +#define E_IO_ERROR mrb_exc_get_id(mrb, MRB_ERROR_SYM(IOError)) +#define E_EOF_ERROR mrb_exc_get_id(mrb, MRB_ERROR_SYM(EOFError)) int mrb_io_fileno(mrb_state *mrb, mrb_value io); diff --git a/mrbgems/mruby-io/mrbgem.rake b/mrbgems/mruby-io/mrbgem.rake index 3bb3febd31..b9118713b7 100644 --- a/mrbgems/mruby-io/mrbgem.rake +++ b/mrbgems/mruby-io/mrbgem.rake @@ -3,8 +3,10 @@ MRuby::Gem::Specification.new('mruby-io') do |spec| spec.authors = ['Internet Initiative Japan Inc.', 'mruby developers'] spec.summary = 'IO and File class' + spec.build.defines << "HAVE_MRUBY_IO_GEM" + spec.add_test_dependency 'mruby-time', core: 'mruby-time' + if spec.for_windows? spec.linker.libraries << "ws2_32" end - spec.add_test_dependency 'mruby-time', core: 'mruby-time' end diff --git a/mrbgems/mruby-io/mrblib/file.rb b/mrbgems/mruby-io/mrblib/file.rb index 9398acef67..a1c62d6d71 100644 --- a/mrbgems/mruby-io/mrblib/file.rb +++ b/mrbgems/mruby-io/mrblib/file.rb @@ -1,6 +1,21 @@ class File < IO + # The path to the file attr_accessor :path + # + # call-seq: + # File.new(filename, mode="r") -> file + # File.new(filename [, mode [, perm]]) -> file + # File.new(fd [, mode]) -> file + # + # Opens the file named by filename according to the given mode and returns + # a new File object. If a file descriptor is given instead of a filename, + # the new File object will be associated with that descriptor. + # + # f = File.new("testfile", "r") + # f = File.new("newfile", "w+") + # f = File.new("tmpfile", "a") + # def initialize(fd_or_path, mode = "r", perm = 0666) if fd_or_path.kind_of? Integer super(fd_or_path, mode) @@ -11,134 +26,74 @@ def initialize(fd_or_path, mode = "r", perm = 0666) end end - def mtime - t = self._mtime + # + # call-seq: + # file.atime -> time + # + # Returns the last access time for file, or epoch if the platform + # doesn't have access time. + # + # File.new("testfile").atime #=> Wed Apr 09 08:51:48 CDT 2003 + # + def atime + t = self._atime t && Time.at(t) end - def self.join(*names) - return "" if names.empty? - - names.map! do |name| - case name - when String - name - when Array - if names == name - raise ArgumentError, "recursive array" - end - join(*name) - else - raise TypeError, "no implicit conversion of #{name.class} into String" - end - end - - return names[0] if names.size == 1 - - if names[0][-1] == File::SEPARATOR - s = names[0][0..-2] - else - s = names[0].dup - end - - (1..names.size-2).each { |i| - t = names[i] - if t[0] == File::SEPARATOR and t[-1] == File::SEPARATOR - t = t[1..-2] - elsif t[0] == File::SEPARATOR - t = t[1..-1] - elsif t[-1] == File::SEPARATOR - t = t[0..-2] - end - s += File::SEPARATOR + t if t != "" - } - if names[-1][0] == File::SEPARATOR - s += File::SEPARATOR + names[-1][1..-1] - else - s += File::SEPARATOR + names[-1] - end - s - end - - def self._concat_path(path, base_path) - if path[0] == "/" || path[1] == ':' # Windows root! - expanded_path = path - elsif path[0] == "~" - if (path[1] == "/" || path[1] == nil) - dir = path[1, path.size] - home_dir = _gethome - - unless home_dir - raise ArgumentError, "couldn't find HOME environment -- expanding '~'" - end - - expanded_path = home_dir - expanded_path += dir if dir - expanded_path += "/" - else - splitted_path = path.split("/") - user = splitted_path[0][1, splitted_path[0].size] - dir = "/" + splitted_path[1, splitted_path.size].join("/") - - home_dir = _gethome(user) - - unless home_dir - raise ArgumentError, "user #{user} doesn't exist" - end - - expanded_path = home_dir - expanded_path += dir if dir - expanded_path += "/" - end - else - expanded_path = _concat_path(base_path, _getwd) - expanded_path += "/" + path - end - - expanded_path + # + # call-seq: + # file.ctime -> time + # + # Returns the change time for file (that is, the time directory + # information about the file was changed, not the file itself). + # + # File.new("testfile").ctime #=> Wed Apr 09 08:53:13 CDT 2003 + # + def ctime + t = self._ctime + t && Time.at(t) end - def self.expand_path(path, default_dir = '.') - expanded_path = _concat_path(path, default_dir) - drive_prefix = "" - if File::ALT_SEPARATOR && expanded_path.size > 2 && - ("A".."Z").include?(expanded_path[0].upcase) && expanded_path[1] == ":" - drive_prefix = expanded_path[0, 2] - expanded_path = expanded_path[2, expanded_path.size] - end - expand_path_array = [] - if File::ALT_SEPARATOR && expanded_path.include?(File::ALT_SEPARATOR) - expanded_path.gsub!(File::ALT_SEPARATOR, '/') - end - while expanded_path.include?('//') - expanded_path = expanded_path.gsub('//', '/') - end - - if expanded_path != "/" - expanded_path.split('/').each do |path_token| - if path_token == '..' - if expand_path_array.size > 1 - expand_path_array.pop - end - elsif path_token == '.' - # nothing to do. - else - expand_path_array << path_token - end - end - - expanded_path = expand_path_array.join("/") - if expanded_path.empty? - expanded_path = '/' - end - end - if drive_prefix.empty? - expanded_path - else - drive_prefix + expanded_path.gsub("/", File::ALT_SEPARATOR) - end + # + # call-seq: + # file.mtime -> time + # + # Returns the modification time for file. + # + # File.new("testfile").mtime #=> Wed Apr 09 08:53:14 CDT 2003 + # + def mtime + t = self._mtime + t && Time.at(t) end + # + # call-seq: + # file.inspect -> string + # + # Return a string describing this File object. + # + # File.new("testfile").inspect #=> "#" + # + def inspect + "<#{self.class}:#{@path}>" + end + + + # + # call-seq: + # File.foreach(name) {|line| block } -> nil + # File.foreach(name) -> an_enumerator + # + # Executes the block for every line in the named I/O port, where lines + # are separated by sep. + # + # File.foreach("testfile") {|x| print "GOT ", x } + # GOT This is line one + # GOT This is line two + # GOT This is line three + # GOT And so on... + # def self.foreach(file) if block_given? self.open(file) do |f| @@ -149,60 +104,6 @@ def self.foreach(file) end end - def self.directory?(file) - FileTest.directory?(file) - end - - def self.exist?(file) - FileTest.exist?(file) - end - - def self.exists?(file) - FileTest.exists?(file) - end - - def self.file?(file) - FileTest.file?(file) - end - - def self.pipe?(file) - FileTest.pipe?(file) - end - - def self.size(file) - FileTest.size(file) - end - - def self.size?(file) - FileTest.size?(file) - end - - def self.socket?(file) - FileTest.socket?(file) - end - def self.symlink?(file) - FileTest.symlink?(file) - end - def self.zero?(file) - FileTest.zero?(file) - end - - def self.extname(filename) - fname = self.basename(filename) - epos = fname.rindex('.') - return '' if epos == 0 || epos.nil? - return fname[epos..-1] - end - - def self.path(filename) - if filename.kind_of?(String) - filename - elsif filename.respond_to?(:to_path) - filename.to_path - else - raise TypeError, "no implicit conversion of #{filename.class} into String" - end - end end diff --git a/mrbgems/mruby-io/mrblib/file_constants.rb b/mrbgems/mruby-io/mrblib/file_constants.rb index bd77d53fe6..40ca9fb0aa 100644 --- a/mrbgems/mruby-io/mrblib/file_constants.rb +++ b/mrbgems/mruby-io/mrblib/file_constants.rb @@ -1,13 +1,24 @@ class File + # File name matching constants used with File.fnmatch and Dir.glob module Constants + # Makes File.fnmatch case sensitive on systems where it's case insensitive by default FNM_SYSCASE = 0 + + # Disables the special meaning of the backslash escape character FNM_NOESCAPE = 1 + + # Pathname wildcard doesn't match '/' (directory separator) FNM_PATHNAME = 2 + + # Allows patterns to match hidden files (those starting with '.') FNM_DOTMATCH = 4 + + # Makes the pattern case insensitive (overrides FNM_SYSCASE) FNM_CASEFOLD = 8 end end class File + # Include Constants module to make FNM_* constants available directly on File class include File::Constants end diff --git a/mrbgems/mruby-io/mrblib/io.rb b/mrbgems/mruby-io/mrblib/io.rb index 9192d7bf2c..d5307016b6 100644 --- a/mrbgems/mruby-io/mrblib/io.rb +++ b/mrbgems/mruby-io/mrblib/io.rb @@ -1,16 +1,27 @@ ## # IO +# +# ISO 15.2.20 class IOError < StandardError; end class EOFError < IOError; end class IO - SEEK_SET = 0 - SEEK_CUR = 1 - SEEK_END = 2 - - BUF_SIZE = 4096 - + # + # call-seq: + # IO.open(fd, mode="r" [, opt]) -> io + # IO.open(fd, mode="r" [, opt]) {|io| block } -> obj + # + # With no associated block, IO.open is a synonym for IO.new. If the optional + # code block is given, it will be passed io as an argument, and the IO object + # will automatically be closed when the block terminates. In this instance, + # IO.open returns the value of the block. + # + # fd = IO.sysopen("/dev/tty", "w") + # a = IO.open(fd,"w") + # $stderr.puts "Hello" + # a.close + # def self.open(*args, &block) io = self.new(*args) @@ -26,6 +37,21 @@ def self.open(*args, &block) end end + # + # call-seq: + # IO.popen(cmd, mode="r" [, opt]) -> io + # IO.popen(cmd, mode="r" [, opt]) {|io| block } -> obj + # + # Runs the specified command as a subprocess; the subprocess's standard input + # and output will be connected to the returned IO object. + # + # p IO.popen("date").read #=> "Wed Apr 9 08:56:30 CDT 2003\n" + # IO.popen("dc", "r+") {|f| + # f.puts "5 2 *" + # f.close_write + # puts f.read + # } + # def self.popen(command, mode = 'r', **opts, &block) if !self.respond_to?(:_popen) raise NotImplementedError, "popen is not supported on this platform" @@ -44,6 +70,27 @@ def self.popen(command, mode = 'r', **opts, &block) end end + # + # call-seq: + # IO.pipe -> [read_io, write_io] + # IO.pipe {|read_io, write_io| ... } -> obj + # + # Creates a pair of pipe endpoints (connected to each other) and returns + # them as a two-element array of IO objects: [read_io, write_io]. + # + # rd, wr = IO.pipe + # if fork + # wr.close + # puts rd.read + # rd.close + # Process.wait + # else + # rd.close + # wr.write "Hello, parent!" + # wr.close + # exit + # end + # def self.pipe(&block) if !self.respond_to?(:_pipe) raise NotImplementedError, "pipe is not supported on this platform" @@ -61,17 +108,26 @@ def self.pipe(&block) end end + # + # call-seq: + # IO.read(name, [length [, offset]] ) -> string + # IO.read(name, [length [, offset]], mode: mode) -> string + # + # Opens the file, optionally seeks to the given offset, then returns length + # bytes (defaulting to the rest of the file). read ensures the file is + # closed before returning. + # + # IO.read("testfile") #=> "This is line one\nThis is line two\n" + # IO.read("testfile", 20) #=> "This is line one\nTh" + # IO.read("testfile", 20, 10) #=> "ne one\nThis is line " + # def self.read(path, length=nil, offset=0, mode: "r") str = "" fd = -1 io = nil begin - if path[0] == "|" - io = IO.popen(path[1..-1], mode) - else - fd = IO.sysopen(path, mode) - io = IO.open(fd, mode) - end + fd = IO.sysopen(path, mode) + io = IO.open(fd, mode) io.seek(offset) if offset > 0 str = io.read(length) ensure @@ -84,216 +140,77 @@ def self.read(path, length=nil, offset=0, mode: "r") str end - def flush - # mruby-io always writes immediately (no output buffer). - raise IOError, "closed stream" if self.closed? - self - end + + # + # call-seq: + # ios.hash -> integer + # + # Compute a hash based on the IO object. Two IO objects with the same + # content will have the same hash code (and will compare using eql?). + # We must define IO#hash here because IO includes Enumerable and + # Enumerable#hash will call IO#read() otherwise. + # def hash # We must define IO#hash here because IO includes Enumerable and - # Enumerable#hash will call IO#read... + # Enumerable#hash will call IO#read() otherwise self.__id__ end - def write(string) - str = string.is_a?(String) ? string : string.to_s - return 0 if str.empty? - unless @buf.empty? - # reset real pos ignore buf - seek(pos, SEEK_SET) - end - len = syswrite(str) - len - end - - def <<(str) - write(str) - self - end - def eof? - _check_readable - begin - _read_buf - return @buf.empty? - rescue EOFError - return true - end - end + # Alias for eof? alias_method :eof, :eof? - - def pos - raise IOError if closed? - sysseek(0, SEEK_CUR) - @buf.bytesize - end + # Alias for pos alias_method :tell, :pos + # + # call-seq: + # ios.pos = integer -> integer + # + # Seeks to the given position (in bytes) in ios. It is not guaranteed that + # seeking to the right position when ios is textmode. + # + # f = File.new("testfile") + # f.pos = 17 + # f.gets #=> "This is line two\n" + # def pos=(i) seek(i, SEEK_SET) end + # + # call-seq: + # ios.rewind -> 0 + # + # Positions ios to the beginning of input, resetting lineno to zero. + # + # f = File.new("testfile") + # f.readline #=> "This is line one\n" + # f.rewind #=> 0 + # f.lineno #=> 0 + # f.readline #=> "This is line one\n" + # def rewind seek(0, SEEK_SET) end - def seek(i, whence = SEEK_SET) - raise IOError if closed? - sysseek(i, whence) - @buf = '' - 0 - end - - def _read_buf - return @buf if @buf && @buf.bytesize > 0 - sysread(BUF_SIZE, @buf) - end - - def ungetc(substr) - raise TypeError.new "expect String, got #{substr.class}" unless substr.is_a?(String) - if @buf.empty? - @buf.replace(substr) - else - @buf[0,0] = substr - end - nil - end - - def ungetbyte(c) - if c.is_a? String - c = c.getbyte(0) - else - c &= 0xff - end - s = " " - s.setbyte(0,c) - ungetc s - end - - def read(length = nil, outbuf = "") - unless length.nil? - unless length.is_a? Integer - raise TypeError.new "can't convert #{length.class} into Integer" - end - if length < 0 - raise ArgumentError.new "negative length: #{length} given" - end - if length == 0 - return "" # easy case - end - end - - array = [] - while true - begin - _read_buf - rescue EOFError - array = nil if array.empty? and (not length.nil?) and length != 0 - break - end - - if length - consume = (length <= @buf.bytesize) ? length : @buf.bytesize - array.push IO._bufread(@buf, consume) - length -= consume - break if length == 0 - else - array.push @buf - @buf = '' - end - end - - if array.nil? - outbuf.replace("") - nil - else - outbuf.replace(array.join) - end - end - - def readline(arg = "\n", limit = nil) - case arg - when String - rs = arg - when Integer - rs = "\n" - limit = arg - else - raise ArgumentError - end - - if rs.nil? - return read - end - - if rs == "" - rs = "\n\n" - end - - array = [] - while true - begin - _read_buf - rescue EOFError - array = nil if array.empty? - break - end - - if limit && limit <= @buf.size - array.push @buf[0, limit] - @buf[0, limit] = "" - break - elsif idx = @buf.index(rs) - len = idx + rs.size - array.push @buf[0, len] - @buf[0, len] = "" - break - else - array.push @buf - @buf = '' - end - end - - raise EOFError.new "end of file reached" if array.nil? - - array.join - end - - def gets(*args) - begin - readline(*args) - rescue EOFError - nil - end - end - - def readchar - _read_buf - _readchar(@buf) - end - - def getc - begin - readchar - rescue EOFError - nil - end - end - - def readbyte - _read_buf - IO._bufread(@buf, 1).getbyte(0) - end - - def getbyte - readbyte - rescue EOFError - nil - end + # + # call-seq: + # ios.each(sep=$/) {|line| block } -> ios + # ios.each(limit) {|line| block } -> ios + # ios.each(sep,limit) {|line| block } -> ios + # ios.each(...) -> an_enumerator + # + # Executes the block for every line in ios, where lines are separated by sep. + # ios must be opened for reading. If no block is given, an enumerator is returned instead. + # + # f = File.new("testfile") + # f.each {|line| puts "#{f.lineno}: #{line}" } + # # 15.2.20.5.3 def each(&block) - return to_enum unless block + return to_enum(:each) unless block while line = self.gets block.call(line) @@ -301,6 +218,19 @@ def each(&block) self end + # + # call-seq: + # ios.each_byte {|byte| block } -> ios + # ios.each_byte -> an_enumerator + # + # Calls the given block once for each byte (0..255) in ios, passing the byte + # as an argument. The stream must be opened for reading or an IOError will be raised. + # + # f = File.new("testfile") + # checksum = 0 + # f.each_byte {|x| checksum ^= x } #=> # + # checksum #=> 12 + # # 15.2.20.5.4 def each_byte(&block) return to_enum(:each_byte) unless block @@ -311,9 +241,20 @@ def each_byte(&block) self end - # 15.2.20.5.5 + # Alias for each - 15.2.20.5.5 alias each_line each + # + # call-seq: + # ios.each_char {|c| block } -> ios + # ios.each_char -> an_enumerator + # + # Calls the given block once for each character in ios, passing the character + # as an argument. The stream must be opened for reading or an IOError will be raised. + # + # f = File.new("testfile") + # ios.each_char {|c| print c, ' ' } #=> # + # def each_char(&block) return to_enum(:each_char) unless block @@ -323,54 +264,40 @@ def each_char(&block) self end - def readlines - ary = [] - while (line = gets) - ary << line - end - ary - end - - def puts(*args) - i = 0 - len = args.size - while i < len - s = args[i] - if s.kind_of?(Array) - puts(*s) - else - s = s.to_s - write s - write "\n" if (s[-1] != "\n") - end - i += 1 - end - write "\n" if len == 0 - nil - end - def print(*args) - i = 0 - len = args.size - while i < len - write args[i].to_s - i += 1 - end - end + # + # call-seq: + # ios.printf(format_string [, obj, ...]) -> nil + # + # Formats and writes to ios, converting parameters under control of the format string. + # See sprintf for details of the format string. + # + # $stdout.printf "Number: %5.2f,\nString: %s\n", 1.23, "hello" + # Number: 1.23, + # String: hello + # def printf(*args) write sprintf(*args) nil end + # Alias for fileno - returns the integer file descriptor for ios alias_method :to_i, :fileno + # Alias for isatty - returns true if ios is associated with a terminal device alias_method :tty?, :isatty end +# Standard input stream - connected to file descriptor 0 STDIN = IO.open(0, "r") +# Standard output stream - connected to file descriptor 1 STDOUT = IO.open(1, "w") +# Standard error stream - connected to file descriptor 2 STDERR = IO.open(2, "w") +# Global variable for standard input $stdin = STDIN +# Global variable for standard output $stdout = STDOUT +# Global variable for standard error $stderr = STDERR diff --git a/mrbgems/mruby-io/mrblib/kernel.rb b/mrbgems/mruby-io/mrblib/kernel.rb index 9cb3b56502..2b72a7e94a 100644 --- a/mrbgems/mruby-io/mrblib/kernel.rb +++ b/mrbgems/mruby-io/mrblib/kernel.rb @@ -1,9 +1,37 @@ module Kernel - def `(cmd) - IO.popen(cmd) { |io| io.read } + # + # call-seq: + # `cmd` -> string + # + # Returns the standard output of running cmd in a subshell. + # The built-in syntax %x{...} uses this method. + # + # `date` #=> "Wed Apr 9 08:56:30 CDT 2003\n" + # `ls testdir`.split[1] #=> "main.rb" + # `echo oops && exit 99` #=> "oops\n" + # + private def `(cmd) #` + IO.popen(cmd) {|io| io.read } end - def open(file, *rest, &block) + # + # call-seq: + # open(name [, mode [, perm]] [, opt]) -> io or nil + # open(name [, mode [, perm]] [, opt]) {|io| block } -> obj + # + # Creates an IO object connected to the given stream, file, or subprocess. + # If path starts with a pipe character ("|"), a subprocess is created, + # connected to the caller by a pair of pipes. The returned IO object may + # be used to write to the standard input and read from the standard output + # of this subprocess. + # + # open("testfile") #=> # + # open("| date") #=> # + # open("testfile") do |f| + # print f.gets + # end + # + private def open(file, *rest, &block) raise ArgumentError unless file.is_a?(String) if file[0] == "|" @@ -13,19 +41,135 @@ def open(file, *rest, &block) end end - def print(*args) - $stdout.print(*args) + # + # call-seq: + # p(obj) -> obj + # p(obj1, obj2, ...) -> [obj1, obj2, ...] + # p() -> nil + # + # For each object, directly writes obj.inspect followed by a newline + # to the program's standard output. + # + # S = Struct.new(:name, :state) + # s = S['dave', 'TX'] + # p s #=> # + # + private def p(*a) + for e in a + $stdout.write e.inspect + $stdout.write "\n" + end + len = a.size + return nil if len == 0 + return a[0] if len == 1 + a + end + + # + # call-seq: + # print(obj, ...) -> nil + # + # Prints each object in turn to $stdout. If the output field separator + # ($,) is not nil, its contents will appear between each field. + # If the output record separator ($\) is not nil, it will be appended + # to the output. + # + # print "cat", [1,2,3], 99, "\n" + # $, = ", " + # $\ = "\n" + # print "cat", [1,2,3], 99 + # + private def print(...) + $stdout.print(...) + end + + # + # call-seq: + # putc(int) -> nil + # + # Equivalent to $stdout.putc(int). + # If int is Integer, writes the byte (mod 256). + # If int is String, writes the first character. + # Returns nil. + # + private def putc(c) + $stdout.putc(c) + nil + end + + # + # call-seq: + # puts(obj, ...) -> nil + # + # Equivalent to $stdout.puts(obj, ...). + # + # puts "this", "is", "a", "test" + # + private def puts(...) + $stdout.puts(...) + end + + # + # call-seq: + # printf(io, string [, obj ... ]) -> nil + # printf(string [, obj ... ]) -> nil + # + # Equivalent to io.write(sprintf(string, obj, ...)) or + # $stdout.write(sprintf(string, obj, ...)). + # + # printf "Number: %5.2f,\nString: %s\n", 1.23, "hello" + # + private def printf(...) + $stdout.printf(...) end - def puts(*args) - $stdout.puts(*args) + # + # call-seq: + # gets(sep=$/) -> string or nil + # gets(limit) -> string or nil + # gets(sep,limit) -> string or nil + # + # Returns (and assigns to $_) the next line from the list of files in ARGV + # (or $*), or from standard input if no files are present on the command line. + # Returns nil at end of file. + # + # print "Enter your name: " + # name = gets + # print "Hello #{name}" + # + private def gets(...) + $stdin.gets(...) end - def printf(*args) - $stdout.printf(*args) + # + # call-seq: + # readline(sep=$/) -> string + # readline(limit) -> string + # readline(sep,limit) -> string + # + # Equivalent to gets, except readline raises EOFError at end of file. + # + # print "Enter your name: " + # name = readline + # print "Hello #{name}" + # + private def readline(...) + $stdin.readline(...) end - def gets(*args) - $stdin.gets(*args) + # + # call-seq: + # readlines(sep=$/) -> array + # readlines(limit) -> array + # readlines(sep,limit) -> array + # + # Returns an array containing the lines returned by calling gets(sep) + # until the end of file. + # + # lines = readlines + # lines[0] #=> "This is line one\n" + # + private def readlines(...) + $stdin.readlines(...) end end diff --git a/mrbgems/mruby-io/ports/posix/io_hal.c b/mrbgems/mruby-io/ports/posix/io_hal.c new file mode 100644 index 0000000000..254bc6c6c6 --- /dev/null +++ b/mrbgems/mruby-io/ports/posix/io_hal.c @@ -0,0 +1,573 @@ +/* +** io_hal.c - POSIX HAL implementation for mruby-io +** +** See Copyright Notice in mruby.h +** +** POSIX implementation for I/O operations using standard POSIX APIs. +** Supported platforms: Linux, macOS, BSD, Unix +*/ + +#include +#include "io_hal.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifndef __DJGPP__ +#include +#endif + +/* Maximum path length */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +/* + * Helper Functions + */ + +/* Convert POSIX struct stat to mrb_io_stat */ +static void +convert_stat(const struct stat *src, mrb_io_stat *dst) +{ + /* Extract time values FIRST while macros are still defined. + * On POSIX systems, st_atime may be a macro for st_atim.tv_sec */ + time_t atime_val, mtime_val, ctime_val; +#if defined(st_atime) + /* st_atime is a macro - use it to extract from src */ + atime_val = src->st_atime; + mtime_val = src->st_mtime; + ctime_val = src->st_ctime; +#elif defined(__APPLE__) || defined(__FreeBSD__) || \ + defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) + /* BSD/macOS: st_atime is typically a direct member */ + atime_val = src->st_atime; + mtime_val = src->st_mtime; + ctime_val = src->st_ctime; +#else + /* POSIX.1-2008: use st_atim.tv_sec directly */ + atime_val = src->st_atim.tv_sec; + mtime_val = src->st_mtim.tv_sec; + ctime_val = src->st_ctim.tv_sec; +#endif + + /* Undefine macros to avoid interference with mrb_io_stat fields */ +#undef st_atime +#undef st_mtime +#undef st_ctime + + dst->st_dev = (mrb_int)src->st_dev; + dst->st_ino = (mrb_int)src->st_ino; + dst->st_mode = (mrb_int)src->st_mode; + dst->st_nlink = (mrb_int)src->st_nlink; + dst->st_uid = (mrb_int)src->st_uid; + dst->st_gid = (mrb_int)src->st_gid; + dst->st_rdev = (mrb_int)src->st_rdev; + dst->st_size = (mrb_int)src->st_size; + dst->st_atime = (mrb_int)atime_val; + dst->st_mtime = (mrb_int)mtime_val; + dst->st_ctime = (mrb_int)ctime_val; + +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE + dst->st_blksize = (mrb_int)src->st_blksize; +#else + dst->st_blksize = 512; +#endif +#ifdef HAVE_STRUCT_STAT_ST_BLOCKS + dst->st_blocks = (mrb_int)src->st_blocks; +#else + dst->st_blocks = (dst->st_size + 511) / 512; +#endif +} + +/* + * File Operations + */ + +int +mrb_hal_io_stat(mrb_state *mrb, const char *path, mrb_io_stat *st) +{ + struct stat s; + (void)mrb; + + if (stat(path, &s) == -1) { + return -1; + } + convert_stat(&s, st); + return 0; +} + +int +mrb_hal_io_fstat(mrb_state *mrb, int fd, mrb_io_stat *st) +{ + struct stat s; + (void)mrb; + + if (fstat(fd, &s) == -1) { + return -1; + } + convert_stat(&s, st); + return 0; +} + +int +mrb_hal_io_lstat(mrb_state *mrb, const char *path, mrb_io_stat *st) +{ + struct stat s; + (void)mrb; + + if (lstat(path, &s) == -1) { + return -1; + } + convert_stat(&s, st); + return 0; +} + +int +mrb_hal_io_chmod(mrb_state *mrb, const char *path, mrb_int mode) +{ + (void)mrb; + return chmod(path, (mode_t)mode); +} + +mrb_int +mrb_hal_io_umask(mrb_state *mrb, mrb_int mask) +{ + mode_t old; + (void)mrb; + + if (mask < 0) { + /* Just query current value */ + old = umask(0); + umask(old); + } + else { + old = umask((mode_t)mask); + } + return (mrb_int)old; +} + +int +mrb_hal_io_ftruncate(mrb_state *mrb, int fd, mrb_int length) +{ + (void)mrb; + return ftruncate(fd, (off_t)length); +} + +int +mrb_hal_io_flock(mrb_state *mrb, int fd, int operation) +{ + (void)mrb; + + while (flock(fd, operation) == -1) { + if (errno == EINTR) { + continue; /* Retry on interrupt */ + } + return -1; + } + return 0; +} + +int +mrb_hal_io_unlink(mrb_state *mrb, const char *path) +{ + (void)mrb; + return unlink(path); +} + +int +mrb_hal_io_rename(mrb_state *mrb, const char *oldpath, const char *newpath) +{ + (void)mrb; + return rename(oldpath, newpath); +} + +int +mrb_hal_io_symlink(mrb_state *mrb, const char *target, const char *linkpath) +{ + (void)mrb; + return symlink(target, linkpath); +} + +mrb_int +mrb_hal_io_readlink(mrb_state *mrb, const char *path, char *buf, size_t bufsize) +{ + (void)mrb; + return (mrb_int)readlink(path, buf, bufsize); +} + +char* +mrb_hal_io_realpath(mrb_state *mrb, const char *path, char *resolved) +{ + (void)mrb; + return realpath(path, resolved); +} + +char* +mrb_hal_io_getcwd(mrb_state *mrb, char *buf, size_t size) +{ + (void)mrb; + return getcwd(buf, size); +} + +const char* +mrb_hal_io_getenv(mrb_state *mrb, const char *name) +{ + (void)mrb; + return getenv(name); +} + +const char* +mrb_hal_io_gethome(mrb_state *mrb, const char *username) +{ + const char *home; + + if (username == NULL || *username == '\0') { + /* Get current user's home */ + home = getenv("HOME"); + if (home == NULL) { + errno = ENOENT; + return NULL; + } + } + else { + /* Get specified user's home */ + struct passwd *pwd = getpwnam(username); + if (pwd == NULL) { + errno = ENOENT; + return NULL; + } + home = pwd->pw_dir; + } + + return home; +} + +/* + * Core I/O Operations + */ + +int +mrb_hal_io_open(mrb_state *mrb, const char *path, int flags, mrb_int mode) +{ + int fd; + (void)mrb; + + fd = open(path, flags, (mode_t)mode); + if (fd == -1) { + return -1; + } + + /* Set close-on-exec for non-standard descriptors */ +#if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC) + if (fd > 2) { + int fd_flags = fcntl(fd, F_GETFD); + if (fd_flags != -1) { + fcntl(fd, F_SETFD, fd_flags | FD_CLOEXEC); + } + } +#endif + + return fd; +} + +int +mrb_hal_io_close(mrb_state *mrb, int fd) +{ + (void)mrb; + return close(fd); +} + +mrb_int +mrb_hal_io_read(mrb_state *mrb, int fd, void *buf, size_t count) +{ + (void)mrb; + return (mrb_int)read(fd, buf, count); +} + +mrb_int +mrb_hal_io_write(mrb_state *mrb, int fd, const void *buf, size_t count) +{ + (void)mrb; + return (mrb_int)write(fd, buf, count); +} + +mrb_int +mrb_hal_io_lseek(mrb_state *mrb, int fd, mrb_int offset, int whence) +{ + int posix_whence; + (void)mrb; + + /* Convert MRB_IO_SEEK_* to POSIX SEEK_* */ + switch (whence) { + case MRB_IO_SEEK_SET: posix_whence = SEEK_SET; break; + case MRB_IO_SEEK_CUR: posix_whence = SEEK_CUR; break; + case MRB_IO_SEEK_END: posix_whence = SEEK_END; break; + default: + errno = EINVAL; + return -1; + } + + return (mrb_int)lseek(fd, (off_t)offset, posix_whence); +} + +int +mrb_hal_io_dup(mrb_state *mrb, int fd) +{ + int new_fd; + (void)mrb; + + new_fd = dup(fd); + if (new_fd == -1) { + return -1; + } + + /* Set close-on-exec */ +#if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC) + if (new_fd > 2) { + int fd_flags = fcntl(new_fd, F_GETFD); + if (fd_flags != -1) { + fcntl(new_fd, F_SETFD, fd_flags | FD_CLOEXEC); + } + } +#endif + + return new_fd; +} + +int +mrb_hal_io_fcntl(mrb_state *mrb, int fd, int cmd, int arg) +{ + (void)mrb; + return fcntl(fd, cmd, arg); +} + +int +mrb_hal_io_isatty(mrb_state *mrb, int fd) +{ + (void)mrb; + return isatty(fd) ? 1 : 0; +} + +int +mrb_hal_io_pipe(mrb_state *mrb, int fds[2]) +{ + int ret; + (void)mrb; + + ret = pipe(fds); + if (ret == -1) { + return -1; + } + + /* Set close-on-exec on both ends */ +#if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC) + for (int i = 0; i < 2; i++) { + int fd_flags = fcntl(fds[i], F_GETFD); + if (fd_flags != -1) { + fcntl(fds[i], F_SETFD, fd_flags | FD_CLOEXEC); + } + } +#endif + + return 0; +} + +/* + * Process Operations + */ + +int +mrb_hal_io_spawn_process(mrb_state *mrb, const char *cmd, + int stdin_fd, int stdout_fd, int stderr_fd, + int *pid) +{ + pid_t child_pid; + (void)mrb; + + /* Skip leading whitespace */ + while (*cmd == ' ' || *cmd == '\t' || *cmd == '\n') { + cmd++; + } + + if (!*cmd) { + errno = ENOENT; + return -1; + } + + child_pid = fork(); + if (child_pid == -1) { + /* Fork failed */ + return -1; + } + + if (child_pid == 0) { + /* Child process */ + + /* Redirect stdin */ + if (stdin_fd != -1) { + dup2(stdin_fd, STDIN_FILENO); + if (stdin_fd > 2) close(stdin_fd); + } + + /* Redirect stdout */ + if (stdout_fd != -1) { + dup2(stdout_fd, STDOUT_FILENO); + if (stdout_fd > 2) close(stdout_fd); + } + + /* Redirect stderr */ + if (stderr_fd != -1) { + dup2(stderr_fd, STDERR_FILENO); + if (stderr_fd > 2) close(stderr_fd); + } + + /* Close all other file descriptors */ + int max_fd = sysconf(_SC_OPEN_MAX); + if (max_fd == -1) max_fd = 1024; + for (int i = 3; i < max_fd; i++) { + close(i); + } + + /* Execute command via shell */ + execl("/bin/sh", "sh", "-c", cmd, (char*)NULL); + + /* If execl returns, it failed */ + _exit(127); + } + + /* Parent process */ + *pid = (int)child_pid; + return 0; +} + +int +mrb_hal_io_waitpid(mrb_state *mrb, int pid, int *status, int options) +{ + pid_t result; + int stat; + (void)mrb; + + result = waitpid((pid_t)pid, &stat, options); + if (result == -1) { + return -1; + } + + if (status != NULL) { + *status = stat; + } + + return (int)result; +} + +/* + * I/O Multiplexing + */ + +struct mrb_io_fdset { + fd_set fds; +}; + +mrb_io_fdset* +mrb_hal_io_fdset_alloc(mrb_state *mrb) +{ + mrb_io_fdset *fdset = (mrb_io_fdset*)mrb_malloc(mrb, sizeof(mrb_io_fdset)); + FD_ZERO(&fdset->fds); + return fdset; +} + +void +mrb_hal_io_fdset_free(mrb_state *mrb, mrb_io_fdset *fdset) +{ + if (fdset) { + mrb_free(mrb, fdset); + } +} + +void +mrb_hal_io_fdset_zero(mrb_state *mrb, mrb_io_fdset *fdset) +{ + (void)mrb; + if (fdset) { + FD_ZERO(&fdset->fds); + } +} + +void +mrb_hal_io_fdset_set(mrb_state *mrb, int fd, mrb_io_fdset *fdset) +{ + (void)mrb; + if (fd < 0 || fd >= FD_SETSIZE) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "fd is out of range"); + return; + } + if (fdset) { + FD_SET(fd, &fdset->fds); + } +} + +int +mrb_hal_io_fdset_isset(mrb_state *mrb, int fd, mrb_io_fdset *fdset) +{ + (void)mrb; + if (fd < 0 || fd >= FD_SETSIZE) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "fd is out of range"); + return 0; + } + if (fdset) { + return FD_ISSET(fd, &fdset->fds); + } + return 0; +} + +int +mrb_hal_io_select(mrb_state *mrb, int nfds, + mrb_io_fdset *readfds, + mrb_io_fdset *writefds, + mrb_io_fdset *errorfds, + mrb_io_timeval *timeout) +{ + fd_set *r = readfds ? &readfds->fds : NULL; + fd_set *w = writefds ? &writefds->fds : NULL; + fd_set *e = errorfds ? &errorfds->fds : NULL; + struct timeval *tv = NULL; + struct timeval tv_storage; + (void)mrb; + + if (timeout) { + tv_storage.tv_sec = (time_t)timeout->tv_sec; + tv_storage.tv_usec = (suseconds_t)timeout->tv_usec; + tv = &tv_storage; + } + + return select(nfds, r, w, e, tv); +} + +/* + * HAL Initialization/Finalization + */ + +void +mrb_hal_io_init(mrb_state *mrb) +{ + (void)mrb; + /* No special initialization needed for POSIX */ +} + +void +mrb_hal_io_final(mrb_state *mrb) +{ + (void)mrb; + /* No special cleanup needed for POSIX */ +} diff --git a/mrbgems/mruby-io/ports/win/io_hal.c b/mrbgems/mruby-io/ports/win/io_hal.c new file mode 100644 index 0000000000..3d60673952 --- /dev/null +++ b/mrbgems/mruby-io/ports/win/io_hal.c @@ -0,0 +1,620 @@ +/* +** io_hal.c - Windows HAL implementation for mruby-io +** +** See Copyright Notice in mruby.h +** +** Windows implementation for I/O operations using Win32 APIs. +** Supported platforms: Windows, MinGW +*/ + +#include +#include "io_hal.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Maximum path length */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +/* + * Helper Functions + */ + +/* Convert Windows struct _stat64 to mrb_io_stat */ +static void +convert_stat(const struct _stat64 *src, mrb_io_stat *dst) +{ + dst->st_dev = (mrb_int)src->st_dev; + dst->st_ino = (mrb_int)src->st_ino; + dst->st_mode = (mrb_int)src->st_mode; + dst->st_nlink = (mrb_int)src->st_nlink; + dst->st_uid = 0; /* Windows doesn't have Unix-style UIDs */ + dst->st_gid = 0; /* Windows doesn't have Unix-style GIDs */ + dst->st_rdev = (mrb_int)src->st_rdev; + dst->st_size = (mrb_int)src->st_size; + dst->st_atime = (mrb_int)src->st_atime; + dst->st_mtime = (mrb_int)src->st_mtime; + dst->st_ctime = (mrb_int)src->st_ctime; + dst->st_blksize = 512; + dst->st_blocks = (dst->st_size + 511) / 512; +} + +/* Convert errno to Windows errno */ +static void +set_errno_from_win_error(DWORD error) +{ + switch (error) { + case ERROR_FILE_NOT_FOUND: + case ERROR_PATH_NOT_FOUND: + errno = ENOENT; + break; + case ERROR_ACCESS_DENIED: + errno = EACCES; + break; + case ERROR_NOT_ENOUGH_MEMORY: + errno = ENOMEM; + break; + case ERROR_INVALID_HANDLE: + errno = EBADF; + break; + case ERROR_ALREADY_EXISTS: + errno = EEXIST; + break; + default: + errno = EIO; + break; + } +} + +/* + * File Operations + */ + +int +mrb_hal_io_stat(mrb_state *mrb, const char *path, mrb_io_stat *st) +{ + struct _stat64 s; + (void)mrb; + + if (_stat64(path, &s) == -1) { + return -1; + } + convert_stat(&s, st); + return 0; +} + +int +mrb_hal_io_fstat(mrb_state *mrb, int fd, mrb_io_stat *st) +{ + struct _stat64 s; + (void)mrb; + + if (_fstat64(fd, &s) == -1) { + return -1; + } + convert_stat(&s, st); + return 0; +} + +int +mrb_hal_io_lstat(mrb_state *mrb, const char *path, mrb_io_stat *st) +{ + /* Windows doesn't distinguish lstat from stat */ + return mrb_hal_io_stat(mrb, path, st); +} + +int +mrb_hal_io_chmod(mrb_state *mrb, const char *path, mrb_int mode) +{ + (void)mrb; + return _chmod(path, (int)mode); +} + +mrb_int +mrb_hal_io_umask(mrb_state *mrb, mrb_int mask) +{ + int old; + (void)mrb; + + if (mask < 0) { + /* Just query current value */ + old = _umask(0); + _umask(old); + } + else { + old = _umask((int)mask); + } + return (mrb_int)old; +} + +int +mrb_hal_io_ftruncate(mrb_state *mrb, int fd, mrb_int length) +{ + (void)mrb; + return _chsize_s(fd, (__int64)length); +} + +int +mrb_hal_io_flock(mrb_state *mrb, int fd, int operation) +{ + HANDLE h; + OVERLAPPED overlapped; + DWORD flags = 0; + (void)mrb; + + h = (HANDLE)_get_osfhandle(fd); + if (h == INVALID_HANDLE_VALUE) { + errno = EBADF; + return -1; + } + + memset(&overlapped, 0, sizeof(overlapped)); + + if (operation & MRB_IO_LOCK_UN) { + if (!UnlockFileEx(h, 0, MAXDWORD, MAXDWORD, &overlapped)) { + set_errno_from_win_error(GetLastError()); + return -1; + } + return 0; + } + + if (operation & MRB_IO_LOCK_EX) { + flags |= LOCKFILE_EXCLUSIVE_LOCK; + } + if (operation & MRB_IO_LOCK_NB) { + flags |= LOCKFILE_FAIL_IMMEDIATELY; + } + + if (!LockFileEx(h, flags, 0, MAXDWORD, MAXDWORD, &overlapped)) { + set_errno_from_win_error(GetLastError()); + return -1; + } + + return 0; +} + +int +mrb_hal_io_unlink(mrb_state *mrb, const char *path) +{ + (void)mrb; + return _unlink(path); +} + +int +mrb_hal_io_rename(mrb_state *mrb, const char *oldpath, const char *newpath) +{ + (void)mrb; + return rename(oldpath, newpath); +} + +int +mrb_hal_io_symlink(mrb_state *mrb, const char *target, const char *linkpath) +{ + (void)target; + (void)linkpath; + /* Symlinks require special privileges on Windows */ + mrb_raise(mrb, E_NOTIMP_ERROR, "symlink is not supported on Windows"); + return -1; /* not reached */ +} + +mrb_int +mrb_hal_io_readlink(mrb_state *mrb, const char *path, char *buf, size_t bufsize) +{ + (void)path; + (void)buf; + (void)bufsize; + /* Symlinks require special handling on Windows */ + mrb_raise(mrb, E_NOTIMP_ERROR, "readlink is not supported on Windows"); + return -1; /* not reached */ +} + +char* +mrb_hal_io_realpath(mrb_state *mrb, const char *path, char *resolved) +{ + DWORD ret; + (void)mrb; + + ret = GetFullPathName(path, PATH_MAX, resolved, NULL); + if (ret == 0 || ret >= PATH_MAX) { + set_errno_from_win_error(GetLastError()); + return NULL; + } + return resolved; +} + +char* +mrb_hal_io_getcwd(mrb_state *mrb, char *buf, size_t size) +{ + (void)mrb; + return _getcwd(buf, (int)size); +} + +const char* +mrb_hal_io_getenv(mrb_state *mrb, const char *name) +{ + (void)mrb; + return getenv(name); +} + +const char* +mrb_hal_io_gethome(mrb_state *mrb, const char *username) +{ + const char *home; + (void)mrb; + + if (username != NULL && *username != '\0') { + /* Windows doesn't have a simple way to get other users' home directories */ + errno = ENOSYS; + return NULL; + } + + /* Try USERPROFILE first, then HOMEDRIVE+HOMEPATH */ + home = getenv("USERPROFILE"); + if (home == NULL) { + const char *homedrive = getenv("HOMEDRIVE"); + const char *homepath = getenv("HOMEPATH"); + if (homedrive && homepath) { + static char homebuf[PATH_MAX]; + snprintf(homebuf, PATH_MAX, "%s%s", homedrive, homepath); + return homebuf; + } + errno = ENOENT; + return NULL; + } + + return home; +} + +/* + * Core I/O Operations + */ + +int +mrb_hal_io_open(mrb_state *mrb, const char *path, int flags, mrb_int mode) +{ + int fd; + (void)mrb; + + /* Windows uses _open with slightly different flags */ + fd = _open(path, flags | _O_BINARY, (int)mode); + if (fd == -1) { + return -1; + } + + /* Set close-on-exec for non-standard descriptors */ + if (fd > 2) { + SetHandleInformation((HANDLE)_get_osfhandle(fd), HANDLE_FLAG_INHERIT, 0); + } + + return fd; +} + +int +mrb_hal_io_close(mrb_state *mrb, int fd) +{ + (void)mrb; + return _close(fd); +} + +mrb_int +mrb_hal_io_read(mrb_state *mrb, int fd, void *buf, size_t count) +{ + (void)mrb; + return (mrb_int)_read(fd, buf, (unsigned int)count); +} + +mrb_int +mrb_hal_io_write(mrb_state *mrb, int fd, const void *buf, size_t count) +{ + (void)mrb; + return (mrb_int)_write(fd, buf, (unsigned int)count); +} + +mrb_int +mrb_hal_io_lseek(mrb_state *mrb, int fd, mrb_int offset, int whence) +{ + int win_whence; + (void)mrb; + + /* Convert MRB_IO_SEEK_* to Windows SEEK_* */ + switch (whence) { + case MRB_IO_SEEK_SET: win_whence = SEEK_SET; break; + case MRB_IO_SEEK_CUR: win_whence = SEEK_CUR; break; + case MRB_IO_SEEK_END: win_whence = SEEK_END; break; + default: + errno = EINVAL; + return -1; + } + + return (mrb_int)_lseeki64(fd, (__int64)offset, win_whence); +} + +int +mrb_hal_io_dup(mrb_state *mrb, int fd) +{ + int new_fd; + (void)mrb; + + new_fd = _dup(fd); + if (new_fd == -1) { + return -1; + } + + /* Set close-on-exec */ + if (new_fd > 2) { + SetHandleInformation((HANDLE)_get_osfhandle(new_fd), HANDLE_FLAG_INHERIT, 0); + } + + return new_fd; +} + +int +mrb_hal_io_fcntl(mrb_state *mrb, int fd, int cmd, int arg) +{ + /* Windows has limited fcntl support */ + (void)mrb; + (void)fd; + (void)cmd; + (void)arg; + errno = ENOSYS; + return -1; +} + +int +mrb_hal_io_isatty(mrb_state *mrb, int fd) +{ + (void)mrb; + return _isatty(fd) ? 1 : 0; +} + +int +mrb_hal_io_pipe(mrb_state *mrb, int fds[2]) +{ + int ret; + (void)mrb; + + ret = _pipe(fds, 4096, _O_BINARY); + if (ret == -1) { + return -1; + } + + /* Set close-on-exec on both ends */ + SetHandleInformation((HANDLE)_get_osfhandle(fds[0]), HANDLE_FLAG_INHERIT, 0); + SetHandleInformation((HANDLE)_get_osfhandle(fds[1]), HANDLE_FLAG_INHERIT, 0); + + return 0; +} + +/* + * Process Operations + */ + +int +mrb_hal_io_spawn_process(mrb_state *mrb, const char *cmd, + int stdin_fd, int stdout_fd, int stderr_fd, + int *pid) +{ + STARTUPINFO si; + PROCESS_INFORMATION pi; + HANDLE h_stdin = INVALID_HANDLE_VALUE; + HANDLE h_stdout = INVALID_HANDLE_VALUE; + HANDLE h_stderr = INVALID_HANDLE_VALUE; + char cmdline[8192]; + BOOL ret; + (void)mrb; + + /* Skip leading whitespace */ + while (*cmd == ' ' || *cmd == '\t' || *cmd == '\n') { + cmd++; + } + + if (!*cmd) { + errno = ENOENT; + return -1; + } + + /* Build command line - use cmd.exe to execute */ + snprintf(cmdline, sizeof(cmdline), "cmd.exe /c %s", cmd); + + /* Setup startup info */ + memset(&si, 0, sizeof(si)); + si.cb = sizeof(si); + si.dwFlags = STARTF_USESTDHANDLES; + + /* Convert file descriptors to handles and make them inheritable */ + if (stdin_fd != -1) { + h_stdin = (HANDLE)_get_osfhandle(stdin_fd); + SetHandleInformation(h_stdin, HANDLE_FLAG_INHERIT, HANDLE_FLAG_INHERIT); + si.hStdInput = h_stdin; + } + else { + si.hStdInput = GetStdHandle(STD_INPUT_HANDLE); + } + + if (stdout_fd != -1) { + h_stdout = (HANDLE)_get_osfhandle(stdout_fd); + SetHandleInformation(h_stdout, HANDLE_FLAG_INHERIT, HANDLE_FLAG_INHERIT); + si.hStdOutput = h_stdout; + } + else { + si.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE); + } + + if (stderr_fd != -1) { + h_stderr = (HANDLE)_get_osfhandle(stderr_fd); + SetHandleInformation(h_stderr, HANDLE_FLAG_INHERIT, HANDLE_FLAG_INHERIT); + si.hStdError = h_stderr; + } + else { + si.hStdError = GetStdHandle(STD_ERROR_HANDLE); + } + + /* Create process */ + memset(&pi, 0, sizeof(pi)); + ret = CreateProcess(NULL, cmdline, NULL, NULL, TRUE, 0, NULL, NULL, &si, &pi); + + if (!ret) { + set_errno_from_win_error(GetLastError()); + return -1; + } + + /* Close thread handle - we don't need it */ + CloseHandle(pi.hThread); + + /* Store process handle as PID (will be used in waitpid) */ + *pid = (int)(intptr_t)pi.hProcess; + + return 0; +} + +int +mrb_hal_io_waitpid(mrb_state *mrb, int pid, int *status, int options) +{ + HANDLE h = (HANDLE)(intptr_t)pid; + DWORD wait_result; + DWORD exit_code; + DWORD timeout; + (void)mrb; + + /* Convert options to timeout */ + timeout = (options != 0) ? 0 : INFINITE; + + wait_result = WaitForSingleObject(h, timeout); + + if (wait_result == WAIT_TIMEOUT) { + return 0; /* Non-blocking wait, no change */ + } + + if (wait_result != WAIT_OBJECT_0) { + set_errno_from_win_error(GetLastError()); + return -1; + } + + /* Get exit code */ + if (!GetExitCodeProcess(h, &exit_code)) { + set_errno_from_win_error(GetLastError()); + return -1; + } + + if (status != NULL) { + /* Store exit code in status (shifted to match Unix convention) */ + *status = (int)(exit_code << 8); + } + + /* Close process handle */ + CloseHandle(h); + + return pid; +} + +/* + * I/O Multiplexing + */ + +struct mrb_io_fdset { + fd_set fds; +}; + +mrb_io_fdset* +mrb_hal_io_fdset_alloc(mrb_state *mrb) +{ + mrb_io_fdset *fdset = (mrb_io_fdset*)mrb_malloc(mrb, sizeof(mrb_io_fdset)); + FD_ZERO(&fdset->fds); + return fdset; +} + +void +mrb_hal_io_fdset_free(mrb_state *mrb, mrb_io_fdset *fdset) +{ + if (fdset) { + mrb_free(mrb, fdset); + } +} + +void +mrb_hal_io_fdset_zero(mrb_state *mrb, mrb_io_fdset *fdset) +{ + (void)mrb; + if (fdset) { + FD_ZERO(&fdset->fds); + } +} + +void +mrb_hal_io_fdset_set(mrb_state *mrb, int fd, mrb_io_fdset *fdset) +{ + (void)mrb; + if (fdset->fds.fd_count >= FD_SETSIZE) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "too many sockets for fd_set"); + return; + } + + if (fdset) { + FD_SET(fd, &fdset->fds); + } +} + +int +mrb_hal_io_fdset_isset(mrb_state *mrb, int fd, mrb_io_fdset *fdset) +{ + (void)mrb; + if (fdset) { + return FD_ISSET(fd, &fdset->fds); + } + return 0; +} + +int +mrb_hal_io_select(mrb_state *mrb, int nfds, + mrb_io_fdset *readfds, + mrb_io_fdset *writefds, + mrb_io_fdset *errorfds, + mrb_io_timeval *timeout) +{ + fd_set *r = readfds ? &readfds->fds : NULL; + fd_set *w = writefds ? &writefds->fds : NULL; + fd_set *e = errorfds ? &errorfds->fds : NULL; + struct timeval *tv = NULL; + struct timeval tv_storage; + (void)mrb; + (void)nfds; /* Windows select() doesn't use nfds */ + + if (timeout) { + tv_storage.tv_sec = (long)timeout->tv_sec; + tv_storage.tv_usec = (long)timeout->tv_usec; + tv = &tv_storage; + } + + return select(0, r, w, e, tv); +} + +/* + * HAL Initialization/Finalization + */ + +void +mrb_hal_io_init(mrb_state *mrb) +{ + (void)mrb; + /* Initialize Winsock for select() support */ + WSADATA wsaData; + WSAStartup(MAKEWORD(2, 2), &wsaData); +} + +void +mrb_hal_io_final(mrb_state *mrb) +{ + (void)mrb; + /* Cleanup Winsock */ + WSACleanup(); +} diff --git a/mrbgems/mruby-io/src/file.c b/mrbgems/mruby-io/src/file.c index fe7861798f..d5f7816b34 100644 --- a/mrbgems/mruby-io/src/file.c +++ b/mrbgems/mruby-io/src/file.c @@ -2,13 +2,15 @@ ** file.c - File class */ -#include "mruby.h" -#include "mruby/class.h" -#include "mruby/data.h" -#include "mruby/string.h" -#include "mruby/ext/io.h" -#include "mruby/error.h" -#include "mruby/presym.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "io_hal.h" #include #include @@ -18,7 +20,19 @@ #include #include #include -#if defined(_WIN32) || defined(_WIN64) + +/* Undefine system macros that conflict with mrb_io_stat field names */ +#ifdef st_atime +#undef st_atime +#endif +#ifdef st_mtime +#undef st_mtime +#endif +#ifdef st_ctime +#undef st_ctime +#endif + +#if defined(_WIN32) #include #include #define NULL_FILE "NUL" @@ -38,95 +52,101 @@ #define GETCWD getcwd #define CHMOD(a, b) chmod(a,b) #include +#ifndef __DJGPP__ #include +#endif #include #include #endif #define FILE_SEPARATOR "/" -#if defined(_WIN32) || defined(_WIN64) +#if defined(_WIN32) #define PATH_SEPARATOR ";" #define FILE_ALT_SEPARATOR "\\" #define VOLUME_SEPARATOR ":" + #define DIRSEP_P(ch) (((ch) == '/') | ((ch) == '\\')) + #define VOLSEP_P(ch) ((ch) == ':') + #define UNC_PATH_P(path) (DIRSEP_P((path)[0]) && DIRSEP_P((path)[1])) + #define DRIVE_LETTER_P(path) (((size_t)(((path)[0]) | 0x20) - 'a' <= (size_t)'z' - 'a') && (path)[1] == ':') + #define DRIVE_EQUAL_P(x, y) (((x)[0] | 0x20) == ((y)[0] | 0x20)) #else #define PATH_SEPARATOR ":" + #define DIRSEP_P(ch) ((ch) == '/') #endif +/* Use HAL lock constants */ #ifndef LOCK_SH -#define LOCK_SH 1 +#define LOCK_SH MRB_IO_LOCK_SH #endif #ifndef LOCK_EX -#define LOCK_EX 2 +#define LOCK_EX MRB_IO_LOCK_EX #endif #ifndef LOCK_NB -#define LOCK_NB 4 +#define LOCK_NB MRB_IO_LOCK_NB #endif #ifndef LOCK_UN -#define LOCK_UN 8 +#define LOCK_UN MRB_IO_LOCK_UN #endif #if !defined(_WIN32) || defined(MRB_MINGW32_LEGACY) -typedef struct stat mrb_stat; # define mrb_stat(path, sb) stat(path, sb) # define mrb_fstat(fd, sb) fstat(fd, sb) #elif defined MRB_INT32 -typedef struct _stat32 mrb_stat; # define mrb_stat(path, sb) _stat32(path, sb) # define mrb_fstat(fd, sb) _fstat32(fd, sb) #else -typedef struct _stat64 mrb_stat; # define mrb_stat(path, sb) _stat64(path, sb) # define mrb_fstat(fd, sb) _fstat64(fd, sb) #endif -#ifdef _WIN32 -static int -flock(int fd, int operation) { - OVERLAPPED ov; - HANDLE h = (HANDLE)_get_osfhandle(fd); - DWORD flags; - flags = ((operation & LOCK_NB) ? LOCKFILE_FAIL_IMMEDIATELY : 0) - | ((operation & LOCK_SH) ? LOCKFILE_EXCLUSIVE_LOCK : 0); - ov = (OVERLAPPED){0}; - return LockFileEx(h, flags, 0, 0xffffffff, 0xffffffff, &ov) ? 0 : -1; -} -#endif - +/* + * call-seq: + * File.umask([mask]) -> integer + * + * Returns the current umask value for this process. If the optional + * `mask` argument is given, set the umask to that value and return + * the previous value. + * + * File.umask(0006) #=> 18 + * File.umask #=> 6 + */ static mrb_value mrb_file_s_umask(mrb_state *mrb, mrb_value klass) { -#if defined(_WIN32) || defined(_WIN64) - /* nothing to do on windows */ - return mrb_fixnum_value(0); - -#else mrb_int mask, omask; + if (mrb_get_args(mrb, "|i", &mask) == 0) { - omask = umask(0); - umask(omask); - } else { - omask = umask(mask); + omask = mrb_hal_io_umask(mrb, -1); + } + else { + omask = mrb_hal_io_umask(mrb, mask); } return mrb_fixnum_value(omask); -#endif } +/* + * call-seq: + * File.delete(file_name, ...) -> integer + * File.unlink(file_name, ...) -> integer + * + * Deletes the named file(s). Returns the number of files deleted. + * + * File.delete("a.txt", "b.txt") #=> 2 + */ static mrb_value mrb_file_s_unlink(mrb_state *mrb, mrb_value obj) { const mrb_value *argv; - mrb_int argc, i; - char *path; + mrb_int argc; mrb_get_args(mrb, "*", &argv, &argc); - for (i = 0; i < argc; i++) { - const char *utf8_path; + for (int i = 0; i < argc; i++) { mrb_value pathv = argv[i]; mrb_ensure_string_type(mrb, pathv); - utf8_path = RSTRING_CSTR(mrb, pathv); - path = mrb_locale_from_utf8(utf8_path, -1); - if (UNLINK(path) < 0) { + const char *utf8_path = RSTRING_CSTR(mrb, pathv); + char *path = mrb_locale_from_utf8(utf8_path, -1); + if (mrb_hal_io_unlink(mrb, path) < 0) { mrb_locale_free(path); mrb_sys_fail(mrb, utf8_path); } @@ -135,18 +155,28 @@ mrb_file_s_unlink(mrb_state *mrb, mrb_value obj) return mrb_fixnum_value(argc); } +/* + * call-seq: + * File.rename(old_name, new_name) -> 0 + * + * Renames the given file to the new name. + * + * File.rename("a.txt", "b.txt") #=> 0 + */ static mrb_value mrb_file_s_rename(mrb_state *mrb, mrb_value obj) { mrb_value from, to; - char *src, *dst; mrb_get_args(mrb, "SS", &from, &to); - src = mrb_locale_from_utf8(RSTRING_CSTR(mrb, from), -1); - dst = mrb_locale_from_utf8(RSTRING_CSTR(mrb, to), -1); - if (rename(src, dst) < 0) { -#if defined(_WIN32) || defined(_WIN64) - if (CHMOD(dst, 0666) == 0 && UNLINK(dst) == 0 && rename(src, dst) == 0) { + char *src = mrb_locale_from_utf8(RSTRING_CSTR(mrb, from), -1); + char *dst = mrb_locale_from_utf8(RSTRING_CSTR(mrb, to), -1); + if (mrb_hal_io_rename(mrb, src, dst) < 0) { +#if defined(_WIN32) + /* Windows retry: try chmod+unlink+rename if initial rename fails */ + if (mrb_hal_io_chmod(mrb, dst, 0666) == 0 && + mrb_hal_io_unlink(mrb, dst) == 0 && + mrb_hal_io_rename(mrb, src, dst) == 0) { mrb_locale_free(src); mrb_locale_free(dst); return mrb_fixnum_value(0); @@ -162,106 +192,173 @@ mrb_file_s_rename(mrb_state *mrb, mrb_value obj) return mrb_fixnum_value(0); } +#define SKIP_DIRSEP(p) for (; DIRSEP_P(*(p)); (p)++) +#define NEXT_DIRSEP(p) for (; *(p) != '\0' && !DIRSEP_P(*(p)); (p)++) + +static const char* +scan_dirname(const char *path, mrb_int level) +{ + const char *p = path + strlen(path); + if (level < 1) return p; + for (; p > path && DIRSEP_P(p[-1]); p--) + ; + for (; level > 0; level--) { + for (; p > path && !DIRSEP_P(p[-1]); p--) + ; + for (; p > path && DIRSEP_P(p[-1]); p--) + ; + } + return p > path ? p : path; +} + +/* + * call-seq: + * File.dirname(file_name) -> string + * + * Returns the directory part of a file name. + * + * File.dirname("/usr/bin/ruby") #=> "/usr/bin" + */ static mrb_value mrb_file_dirname(mrb_state *mrb, mrb_value klass) { -#if defined(_WIN32) || defined(_WIN64) - char dname[_MAX_DIR], vname[_MAX_DRIVE]; - char buffer[_MAX_DRIVE + _MAX_DIR]; - const char *utf8_path; - char *path; - size_t ridx; - mrb_get_args(mrb, "z", &utf8_path); - path = mrb_locale_from_utf8(utf8_path, -1); - _splitpath(path, vname, dname, NULL, NULL); - snprintf(buffer, _MAX_DRIVE + _MAX_DIR, "%s%s", vname, dname); - mrb_locale_free(path); - ridx = strlen(buffer); - if (ridx == 0) { - strncpy(buffer, ".", 2); /* null terminated */ - } else if (ridx > 1) { - ridx--; - while (ridx > 0 && (buffer[ridx] == '/' || buffer[ridx] == '\\')) { - buffer[ridx] = '\0'; /* remove last char */ - ridx--; - } - } - return mrb_str_new_cstr(mrb, buffer); -#else - char *dname, *path; - mrb_value s; - mrb_get_args(mrb, "S", &s); - path = mrb_locale_from_utf8(mrb_str_to_cstr(mrb, s), -1); + const char *path; + mrb_int level = 1; + mrb_get_args(mrb, "z|i", &path, &level); - if ((dname = dirname(path)) == NULL) { - mrb_locale_free(path); - mrb_sys_fail(mrb, "dirname"); + if (level < 0) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "negative level: %i", level); + } + + const char *p = path; +#ifdef _WIN32 + if (UNC_PATH_P(p)) { + p += 2; + SKIP_DIRSEP(p); + path = p - 2; /* if consecutive, point to the trailing slash */ + NEXT_DIRSEP(p); + const char *o = p; + SKIP_DIRSEP(p); + if (*p == '\0') { + p = o; + } + else { + NEXT_DIRSEP(p); + p = scan_dirname(p, level); + } + return mrb_str_new(mrb, path, p - path); + } + else if (ISALPHA(p[0]) && p[1] == ':') { + p += 2; + const char *o = p; + SKIP_DIRSEP(p); + p = scan_dirname(p, level); + mrb_value s = mrb_str_new(mrb, path, p - path); + if (p == o) { + mrb_str_cat_lit(mrb, s, "."); + } + return s; } - mrb_locale_free(path); - return mrb_str_new_cstr(mrb, dname); #endif + SKIP_DIRSEP(p); + if (p > path) { + path = p - 1; /* if consecutive, point to the trailing slash */ + } + p = scan_dirname(p, level); + return (p == path) ? mrb_str_new_lit(mrb, ".") : mrb_str_new(mrb, path, p - path); } +/* + * call-seq: + * File.basename(file_name, [suffix]) -> string + * + * Returns the last component of the file name. + * + * File.basename("/usr/bin/ruby") #=> "ruby" + * File.basename("/usr/bin/ruby.exe", ".exe") #=> "ruby" + */ static mrb_value mrb_file_basename(mrb_state *mrb, mrb_value klass) { - // NOTE: Do not use mrb_locale_from_utf8 here -#if defined(_WIN32) || defined(_WIN64) - char bname[_MAX_DIR]; - char extname[_MAX_EXT]; - char *path; - size_t ridx; - char buffer[_MAX_DIR + _MAX_EXT]; - mrb_value s; - - mrb_get_args(mrb, "S", &s); - path = mrb_str_to_cstr(mrb, s); - ridx = strlen(path); - if (ridx > 0) { - ridx--; - while (ridx > 0 && (path[ridx] == '/' || path[ridx] == '\\')) { - path[ridx] = '\0'; - ridx--; - } - if (strncmp(path, "/", 2) == 0) { - return mrb_str_new_cstr(mrb, path); - } - } - _splitpath((const char*)path, NULL, NULL, bname, extname); - snprintf(buffer, _MAX_DIR + _MAX_EXT, "%s%s", bname, extname); - return mrb_str_new_cstr(mrb, buffer); -#else - char *bname, *path; - mrb_value s; - mrb_get_args(mrb, "S", &s); - path = mrb_str_to_cstr(mrb, s); - if ((bname = basename(path)) == NULL) { - mrb_sys_fail(mrb, "basename"); - } - if (strncmp(bname, "//", 3) == 0) bname[1] = '\0'; /* patch for Cygwin */ - return mrb_str_new_cstr(mrb, bname); -#endif + const char *path; + const char *suffix = NULL; + + mrb_get_args(mrb, "z|z", &path, &suffix); + + const char *endp = path + strlen(path); + if (path == endp) { + return mrb_str_new_lit(mrb, "."); + } + +#ifdef _WIN32 + if (UNC_PATH_P(path)) { + path += 2; + SKIP_DIRSEP(path); + NEXT_DIRSEP(path); // skip server name + SKIP_DIRSEP(path); + NEXT_DIRSEP(path); // skip share name + } + else if (DRIVE_LETTER_P(path)) { + path += 2; + if (path == endp) { + return mrb_str_new_lit(mrb, ""); + } + } +#endif // _WIN32 + + // Remove trailing slashes (except when path is only "/") + while (path < endp && DIRSEP_P(endp[-1])) { + endp--; + } + + // Find the last path separator + const char *base = endp; + while (path < base && !DIRSEP_P(base[-1])) { + base--; + } + + // If path is all slashes, return "/" + if (base == endp) { + return mrb_str_new_lit(mrb, "/"); + } + + // Suffix removal (CRuby compatible) + if (suffix && *suffix) { + mrb_int blen = endp - base; + mrb_int slen = strlen(suffix); + if (blen > slen && memcmp(endp - slen, suffix, slen) == 0) { + endp -= slen; + } + } + + return mrb_str_new(mrb, base, endp - base); } +/* + * call-seq: + * File.realpath(pathname, [dir_string]) -> string + * + * Returns the real (absolute) path of `pathname` in the actual + * filesystem. + * + * File.realpath("../../bin/ruby") #=> "/usr/bin/ruby" + */ static mrb_value mrb_file_realpath(mrb_state *mrb, mrb_value klass) { - mrb_value pathname, dir_string, s, result; - mrb_int argc; - char *cpath; + mrb_value pathname, dir_string; - argc = mrb_get_args(mrb, "S|S", &pathname, &dir_string); - if (argc == 2) { - s = mrb_str_dup(mrb, dir_string); - s = mrb_str_append(mrb, s, mrb_str_new_cstr(mrb, FILE_SEPARATOR)); + if (mrb_get_args(mrb, "S|S", &pathname, &dir_string) == 2) { + mrb_value s = mrb_str_dup(mrb, dir_string); + s = mrb_str_cat_cstr(mrb, s, FILE_SEPARATOR); s = mrb_str_append(mrb, s, pathname); pathname = s; } - cpath = mrb_locale_from_utf8(RSTRING_CSTR(mrb, pathname), -1); - result = mrb_str_new_capa(mrb, PATH_MAX); - if (realpath(cpath, RSTRING_PTR(result)) == NULL) { + char *cpath = mrb_locale_from_utf8(RSTRING_CSTR(mrb, pathname), -1); + mrb_value result = mrb_str_new_capa(mrb, PATH_MAX); + if (mrb_hal_io_realpath(mrb, cpath, RSTRING_PTR(result)) == NULL) { mrb_locale_free(cpath); - mrb_sys_fail(mrb, cpath); + mrb_sys_fail(mrb, RSTRING_CSTR(mrb, pathname)); return result; /* not reached */ } mrb_locale_free(cpath); @@ -269,139 +366,317 @@ mrb_file_realpath(mrb_state *mrb, mrb_value klass) return result; } -static mrb_value -mrb_file__getwd(mrb_state *mrb, mrb_value klass) +static const char* +path_getwd(mrb_state *mrb) { - mrb_value path; - char buf[MAXPATHLEN], *utf8; + char buf[MAXPATHLEN]; - if (GETCWD(buf, MAXPATHLEN) == NULL) { + if (mrb_hal_io_getcwd(mrb, buf, MAXPATHLEN) == NULL) { mrb_sys_fail(mrb, "getcwd(2)"); } - utf8 = mrb_utf8_from_locale(buf, -1); - path = mrb_str_new_cstr(mrb, utf8); + char *utf8 = mrb_utf8_from_locale(buf, -1); + mrb_value path = mrb_str_new_cstr(mrb, utf8); mrb_utf8_free(utf8); - return path; + return RSTRING_CSTR(mrb, path); } -#ifdef _WIN32 -#define IS_FILESEP(x) (x == (*(char*)(FILE_SEPARATOR)) || x == (*(char*)(FILE_ALT_SEPARATOR))) -#define IS_VOLSEP(x) (x == (*(char*)(VOLUME_SEPARATOR))) -#define IS_DEVICEID(x) (x == '.' || x == '?') -#define CHECK_UNCDEV_PATH (IS_FILESEP(path[0]) && IS_FILESEP(path[1])) - -static int -is_absolute_traditional_path(const char *path, size_t len) +static mrb_bool +path_absolute_p(const char *path) { - if (len < 3) return 0; - return (ISALPHA(path[0]) && IS_VOLSEP(path[1]) && IS_FILESEP(path[2])); +#ifdef _WIN32 + return UNC_PATH_P(path) || + (ISALPHA(path[0]) && VOLSEP_P(path[1]) && DIRSEP_P(path[2])); +#else + return DIRSEP_P(path[0]); +#endif } -static int -is_absolute_unc_path(const char *path, size_t len) { - if (len < 2) return 0; - return (CHECK_UNCDEV_PATH && !IS_DEVICEID(path[2])); -} +static void +path_parse(mrb_state *mrb, mrb_value ary, const char *path, int ai) +{ +#ifdef _WIN32 + if (DRIVE_LETTER_P(path)) { + mrb_ary_set(mrb, ary, 0, mrb_str_new(mrb, path, 2)); + path += 2; + if (DIRSEP_P(*path)) { + ARY_SET_LEN(mrb_ary_ptr(ary), 1); + } + mrb_gc_arena_restore(mrb, ai); + } + else if (UNC_PATH_P(path)) { + path += 2; + SKIP_DIRSEP(path); + const char *path0 = path; + NEXT_DIRSEP(path); + mrb_value prefix = mrb_str_new_lit(mrb, "//"); + mrb_str_cat(mrb, prefix, path0, path - path0); + ARY_SET_LEN(mrb_ary_ptr(ary), 0); + mrb_ary_push(mrb, ary, prefix); + mrb_gc_arena_restore(mrb, ai); + } + else +#endif /* _WIN32 */ + { + if (RARRAY_LEN(ary) == 0) { + mrb_ary_set(mrb, ary, 0, mrb_nil_value()); + } + else if (DIRSEP_P(*path)) { + ARY_SET_LEN(mrb_ary_ptr(ary), 1); + } + } -static int -is_absolute_device_path(const char *path, size_t len) { - if (len < 4) return 0; - return (CHECK_UNCDEV_PATH && IS_DEVICEID(path[2]) && IS_FILESEP(path[3])); + for (;;) { + SKIP_DIRSEP(path); + const char *path0 = path; + NEXT_DIRSEP(path); + ptrdiff_t len = path - path0; + if (len == 0) { + break; + } + else if (len == 1 && path0[0] == '.') { + /* do nothing */ + } + else if (len == 2 && path0[0] == '.' && path0[1] == '.') { + if (RARRAY_LEN(ary) >= 2) { + mrb_ary_pop(mrb, ary); + } + } + else { + mrb_ary_push(mrb, ary, mrb_str_new(mrb, path0, path - path0)); + mrb_gc_arena_restore(mrb, ai); + } + } } -static int -mrb_file_is_absolute_path(const char *path) +// This function decomposes path into an array based on basedir and workdir. +// The array consists of the root prefix at ary[0], zero or more directories, and finally file names. +// The root prefix is nil for non-Windows, or the drive name or UNC host name for Windows. +static mrb_value +path_split(mrb_state *mrb, const char *path, const char *basedir, const char *workdir) { - size_t len = strlen(path); - if (IS_FILESEP(path[0])) return 1; - if (len > 0) - return ( - is_absolute_traditional_path(path, len) || - is_absolute_unc_path(path, len) || - is_absolute_device_path(path, len) - ); - else - return 0; -} + mrb_value ary = mrb_ary_new(mrb); + int ai = mrb_gc_arena_save(mrb); -#undef IS_FILESEP -#undef IS_VOLSEP -#undef IS_DEVICEID -#undef CHECK_UNCDEV_PATH + if (workdir) { + path_parse(mrb, ary, workdir, ai); + } -#else -static int -mrb_file_is_absolute_path(const char *path) -{ - return (path[0] == *(char*)(FILE_SEPARATOR)); + if (basedir) { + path_parse(mrb, ary, basedir, ai); + } + + path_parse(mrb, ary, path, ai); + + return ary; } -#endif -static mrb_value -mrb_file__gethome(mrb_state *mrb, mrb_value klass) +static const char* +path_gethome(mrb_state *mrb, const char **pathp) { - mrb_int argc; - char *home; + mrb_assert(pathp && *pathp && **pathp == '~'); + + const char *home; mrb_value path; -#ifndef _WIN32 - mrb_value username; + const char *username = ++*pathp; + NEXT_DIRSEP(*pathp); + ptrdiff_t len = *pathp - username; - argc = mrb_get_args(mrb, "|S", &username); - if (argc == 0) { - home = getenv("HOME"); + if (len == 0) { + home = mrb_hal_io_gethome(mrb, NULL); if (home == NULL) { - return mrb_nil_value(); + mrb_raise(mrb, E_ARGUMENT_ERROR, "couldn't find HOME environment -- expanding '~'"); } - if (!mrb_file_is_absolute_path(home)) { + if (!path_absolute_p(home)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "non-absolute home"); } - } else { - const char *cuser = RSTRING_CSTR(mrb, username); - struct passwd *pwd = getpwnam(cuser); - if (pwd == NULL) { - return mrb_nil_value(); + } + else { + const char *uname = RSTRING_CSTR(mrb, mrb_str_new(mrb, username, (mrb_int)len)); + home = mrb_hal_io_gethome(mrb, uname); + if (home == NULL) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "user %s doesn't exist", uname); } - home = pwd->pw_dir; - if (!mrb_file_is_absolute_path(home)) { - mrb_raisef(mrb, E_ARGUMENT_ERROR, "non-absolute home of ~%v", username); + if (!path_absolute_p(home)) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "non-absolute home of ~%s", uname); } } - home = mrb_locale_from_utf8(home, -1); - path = mrb_str_new_cstr(mrb, home); - mrb_locale_free(home); - return path; -#else /* _WIN32 */ - argc = mrb_get_argc(mrb); - if (argc == 0) { - home = getenv("USERPROFILE"); - if (home == NULL) { - return mrb_nil_value(); + char *home_utf8 = mrb_utf8_from_locale(home, -1); + path = mrb_str_new_cstr(mrb, home_utf8); + mrb_utf8_free(home_utf8); + + SKIP_DIRSEP(*pathp); + return RSTRING_CSTR(mrb, path); +} + +static mrb_value +path_expand(mrb_state *mrb, const char *path, const char *base, mrb_bool tilda) +{ + mrb_value ary; + + // split path components as array and normalization + if (tilda && path[0] == '~') { + base = path_gethome(mrb, &path); + ary = path_split(mrb, path, base, NULL); + } + else if (path_absolute_p(path)) { + ary = path_split(mrb, path, NULL, NULL); + } + else { + const char *wd = NULL; + if (tilda && base[0] == '~') { + wd = path_gethome(mrb, &base); } - if (!mrb_file_is_absolute_path(home)) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "non-absolute home"); +#ifndef _WIN32 + else if (!path_absolute_p(base)) { + wd = path_getwd(mrb); + } +#else + else if (DRIVE_LETTER_P(path)) { + if (DRIVE_LETTER_P(base) && DRIVE_EQUAL_P(path, base) && DIRSEP_P(base[2])) { + wd = NULL; + } + else { + wd = path_getwd(mrb); + if (UNC_PATH_P(base) || (DRIVE_LETTER_P(base) && !DRIVE_EQUAL_P(path, base))) { + base = NULL; + } + if (!DRIVE_EQUAL_P(path, wd)) { + wd = NULL; + } + } + } + else if (UNC_PATH_P(base)) { + wd = NULL; } - } else { - return mrb_nil_value(); + else { + wd = path_getwd(mrb); + } +#endif /* _WIN32 */ + ary = path_split(mrb, path, base, wd); } - home = mrb_locale_from_utf8(home, -1); - path = mrb_str_new_cstr(mrb, home); - mrb_locale_free(home); - return path; + + // join path components as string + mrb_value ret; + mrb_assert(RARRAY_LEN(ary) >= 1); +#ifndef _WIN32 + mrb_assert(mrb_nil_p(RARRAY_PTR(ary)[0])); + ret = mrb_str_new(mrb, NULL, 0); +#else + mrb_assert(mrb_string_p(RARRAY_PTR(ary)[0])); + ret = RARRAY_PTR(ary)[0]; #endif + if (RARRAY_LEN(ary) == 1) { +#ifdef _WIN32 + mrb_assert(mrb_string_p(ret)); + mrb_assert(RSTRING_LEN(ret) >= 2); // drive letter or UNC prefix + if (!DIRSEP_P(RSTRING_PTR(ret)[0])) +#endif + { + mrb_str_cat_lit(mrb, ret, "/"); + } + } + else { + for (int i = 1; i < RARRAY_LEN(ary); i++) { + mrb_str_cat_lit(mrb, ret, "/"); + mrb_assert(mrb_string_p(RARRAY_PTR(ary)[i])); + mrb_str_cat_str(mrb, ret, RARRAY_PTR(ary)[i]); + } + } + + return ret; +} + +static mrb_value +mrb_file_expand_path(mrb_state *mrb, mrb_value self) +{ + const char *path; + const char *default_dir = "."; + mrb_get_args(mrb, "z|z", &path, &default_dir); + return path_expand(mrb, path, default_dir, TRUE); +} + +/* + * call-seq: + * File.absolute_path(file_name, [dir_string]) -> string + * + * Converts a pathname to an absolute pathname. + * + * File.absolute_path("~oracle/bin/oracle") #=> "/home/oracle/bin/oracle" + */ +static mrb_value +mrb_file_absolute_path(mrb_state *mrb, mrb_value self) +{ + const char *path; + const char *default_dir = "."; + mrb_get_args(mrb, "z|z", &path, &default_dir); + return path_expand(mrb, path, default_dir, FALSE); +} + +/* + * call-seq: + * File.absolute_path?(file_name) -> true or false + * + * Returns `true` if the file name is an absolute path, `false` otherwise. + * + * File.absolute_path?("/usr/bin/ruby") #=> true + * File.absolute_path?("bin/ruby") #=> false + */ +static mrb_value +mrb_file_absolute_path_p(mrb_state *mrb, mrb_value klass) +{ + mrb_value path = mrb_get_arg1(mrb); + mrb_ensure_string_type(mrb, path); + return mrb_bool_value(path_absolute_p(RSTRING_CSTR(mrb, path))); +} + +static mrb_value +mrb_file_atime(mrb_state *mrb, mrb_value self) +{ + int fd = mrb_io_fileno(mrb, self); + mrb_io_stat st; + + mrb->c->ci->mid = 0; + if (mrb_hal_io_fstat(mrb, fd, &st) == -1) + mrb_sys_fail(mrb, "atime"); + return mrb_int_value(mrb, st.st_atime); +} + +static mrb_value +mrb_file_ctime(mrb_state *mrb, mrb_value self) +{ + int fd = mrb_io_fileno(mrb, self); + mrb_io_stat st; + + mrb->c->ci->mid = 0; + if (mrb_hal_io_fstat(mrb, fd, &st) == -1) + mrb_sys_fail(mrb, "ctime"); + return mrb_int_value(mrb, st.st_ctime); } static mrb_value mrb_file_mtime(mrb_state *mrb, mrb_value self) { int fd = mrb_io_fileno(mrb, self); - mrb_stat st; + mrb_io_stat st; - if (mrb_fstat(fd, &st) == -1) - return mrb_false_value(); - return mrb_int_value(mrb, (mrb_int)st.st_mtime); + mrb->c->ci->mid = 0; + if (mrb_hal_io_fstat(mrb, fd, &st) == -1) + mrb_sys_fail(mrb, "mtime"); + return mrb_int_value(mrb, st.st_mtime); } +/* + * call-seq: + * file.flock(locking_constant) -> 0 or false + * + * Locks or unlocks a file according to `locking_constant`. + * See `File::LOCK_*` for locking constants. + * + * f = File.new("testfile") + * f.flock(File::LOCK_EX) #=> 0 + * f.flock(File::LOCK_UN) #=> 0 + */ static mrb_value mrb_file_flock(mrb_state *mrb, mrb_value self) { @@ -409,12 +684,11 @@ mrb_file_flock(mrb_state *mrb, mrb_value self) mrb_raise(mrb, E_NOTIMP_ERROR, "flock is not supported on Illumos/Solaris/Windows"); #else mrb_int operation; - int fd; mrb_get_args(mrb, "i", &operation); - fd = mrb_io_fileno(mrb, self); + int fd = mrb_io_fileno(mrb, self); - while (flock(fd, (int)operation) == -1) { + while (mrb_hal_io_flock(mrb, fd, (int)operation) == -1) { switch (errno) { case EINTR: /* retry */ @@ -423,12 +697,12 @@ mrb_file_flock(mrb_state *mrb, mrb_value self) #if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN case EWOULDBLOCK: /* FreeBSD OpenBSD Linux */ #endif - if (operation & LOCK_NB) { + if (operation & MRB_IO_LOCK_NB) { return mrb_false_value(); } /* FALLTHRU - should not happen */ default: - mrb_sys_fail(mrb, "flock failed"); + mrb_sys_fail(mrb, "flock"); break; } } @@ -436,111 +710,104 @@ mrb_file_flock(mrb_state *mrb, mrb_value self) return mrb_fixnum_value(0); } +/* + * call-seq: + * file.size -> integer + * + * Returns the size of `file` in bytes. + * + * File.new("testfile").size #=> 66 + */ static mrb_value mrb_file_size(mrb_state *mrb, mrb_value self) { - mrb_stat st; - int fd; - - fd = mrb_io_fileno(mrb, self); - if (mrb_fstat(fd, &st) == -1) { - mrb_raise(mrb, E_RUNTIME_ERROR, "fstat failed"); - } - - if (st.st_size > MRB_INT_MAX) { -#ifdef MRB_NO_FLOAT - mrb_raise(mrb, E_RUNTIME_ERROR, "File#size too large for MRB_NO_FLOAT"); -#else - return mrb_float_value(mrb, (mrb_float)st.st_size); -#endif + mrb_io_stat st; + int fd = mrb_io_fileno(mrb, self); + if (mrb_hal_io_fstat(mrb, fd, &st) == -1) { + mrb_sys_fail(mrb, "fstat"); } - - return mrb_int_value(mrb, (mrb_int)st.st_size); + return mrb_int_value(mrb, st.st_size); } static int -mrb_ftruncate(int fd, mrb_int length) +mrb_ftruncate(mrb_state *mrb, int fd, mrb_int length) { -#ifndef _WIN32 - return ftruncate(fd, (off_t)length); -#else - HANDLE file; - __int64 cur; - - file = (HANDLE)_get_osfhandle(fd); - if (file == INVALID_HANDLE_VALUE) { - return -1; - } - - cur = _lseeki64(fd, 0, SEEK_CUR); - if (cur == -1) return -1; - - if (_lseeki64(fd, (__int64)length, SEEK_SET) == -1) return -1; - - if (!SetEndOfFile(file)) { - errno = EINVAL; /* TODO: GetLastError to errno */ - return -1; - } - - if (_lseeki64(fd, cur, SEEK_SET) == -1) return -1; - - return 0; -#endif /* _WIN32 */ + return mrb_hal_io_ftruncate(mrb, fd, length); } +/* + * call-seq: + * file.truncate(integer) -> 0 + * + * Truncates a file to a maximum of `integer` bytes. + * + * f = File.new("out", "w") + * f.write("1234567890") #=> 10 + * f.truncate(5) #=> 0 + * f.size #=> 5 + */ static mrb_value mrb_file_truncate(mrb_state *mrb, mrb_value self) { - int fd; - mrb_int length; mrb_value lenv = mrb_get_arg1(mrb); - - fd = mrb_io_fileno(mrb, self); - length = mrb_as_int(mrb, lenv); - if (mrb_ftruncate(fd, length) != 0) { - mrb_raise(mrb, E_IO_ERROR, "ftruncate failed"); + int fd = mrb_io_fileno(mrb, self); + mrb_int length = mrb_as_int(mrb, lenv); + if (mrb_ftruncate(mrb, fd, length) != 0) { + mrb_sys_fail(mrb, "ftruncate"); } return mrb_fixnum_value(0); } +/* + * call-seq: + * File.symlink(old_name, new_name) -> 0 + * + * Creates a symbolic link `new_name` for the file `old_name`. + * + * File.symlink("testfile", "link-to-test") #=> 0 + */ static mrb_value mrb_file_s_symlink(mrb_state *mrb, mrb_value klass) { -#if defined(_WIN32) || defined(_WIN64) - mrb_raise(mrb, E_NOTIMP_ERROR, "symlink is not supported on this platform"); -#else mrb_value from, to; - const char *src, *dst; - int ai = mrb_gc_arena_save(mrb); mrb_get_args(mrb, "SS", &from, &to); - src = mrb_locale_from_utf8(RSTRING_CSTR(mrb, from), -1); - dst = mrb_locale_from_utf8(RSTRING_CSTR(mrb, to), -1); - if (symlink(src, dst) == -1) { + char *src = mrb_locale_from_utf8(RSTRING_CSTR(mrb, from), -1); + char *dst = mrb_locale_from_utf8(RSTRING_CSTR(mrb, to), -1); + if (mrb_hal_io_symlink(mrb, src, dst) == -1) { mrb_locale_free(src); mrb_locale_free(dst); mrb_sys_fail(mrb, RSTRING_CSTR(mrb, mrb_format(mrb, "(%v, %v)", from, to))); } mrb_locale_free(src); mrb_locale_free(dst); - mrb_gc_arena_restore(mrb, ai); -#endif return mrb_fixnum_value(0); } +/* + * call-seq: + * File.chmod(mode_int, file_name, ...) -> integer + * + * Changes permission bits on the named file(s) to the bit pattern + * represented by `mode_int`. + * + * File.chmod(0644, "testfile", "out") #=> 2 + */ static mrb_value -mrb_file_s_chmod(mrb_state *mrb, mrb_value klass) { +mrb_file_s_chmod(mrb_state *mrb, mrb_value klass) +{ mrb_int mode; - mrb_int argc, i; + mrb_int argc; const mrb_value *filenames; int ai = mrb_gc_arena_save(mrb); mrb_get_args(mrb, "i*", &mode, &filenames, &argc); - for (i = 0; i < argc; i++) { + for (int i = 0; i < argc; i++) { + mrb_ensure_string_type(mrb, filenames[i]); const char *utf8_path = RSTRING_CSTR(mrb, filenames[i]); char *path = mrb_locale_from_utf8(utf8_path, -1); - if (CHMOD(path, mode) == -1) { + if (mrb_hal_io_chmod(mrb, path, mode) == -1) { mrb_locale_free(path); mrb_sys_fail(mrb, utf8_path); } @@ -551,77 +818,294 @@ mrb_file_s_chmod(mrb_state *mrb, mrb_value klass) { return mrb_fixnum_value(argc); } +/* + * call-seq: + * File.readlink(link_name) -> string + * + * Returns the name of the file referenced by the given link. + * + * File.symlink("testfile", "link-to-test") #=> 0 + * File.readlink("link-to-test") #=> "testfile" + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + static mrb_value -mrb_file_s_readlink(mrb_state *mrb, mrb_value klass) { -#if defined(_WIN32) || defined(_WIN64) - mrb_raise(mrb, E_NOTIMP_ERROR, "readlink is not supported on this platform"); - return mrb_nil_value(); // unreachable -#else +mrb_file_s_readlink(mrb_state *mrb, mrb_value klass) +{ const char *path; - char *buf, *tmp; - size_t bufsize = 100; - ssize_t rc; - mrb_value ret; - int ai = mrb_gc_arena_save(mrb); mrb_get_args(mrb, "z", &path); - tmp = mrb_locale_from_utf8(path, -1); - buf = (char *)mrb_malloc(mrb, bufsize); - while ((rc = readlink(tmp, buf, bufsize)) == (ssize_t)bufsize && rc != -1) { - bufsize *= 2; - buf = (char *)mrb_realloc(mrb, buf, bufsize); - } + char *tmp = mrb_locale_from_utf8(path, -1); + /* Use mrb_temp_alloc for exception safety - GC will clean up on exception */ + char *buf = (char*)mrb_temp_alloc(mrb, PATH_MAX); + + mrb_int rc = mrb_hal_io_readlink(mrb, tmp, buf, PATH_MAX); mrb_locale_free(tmp); if (rc == -1) { - mrb_free(mrb, buf); mrb_sys_fail(mrb, path); } tmp = mrb_utf8_from_locale(buf, -1); - ret = mrb_str_new(mrb, tmp, rc); - mrb_locale_free(tmp); - mrb_free(mrb, buf); - mrb_gc_arena_restore(mrb, ai); + mrb_value ret = mrb_str_new(mrb, tmp, rc); + mrb_utf8_free(tmp); + return ret; -#endif } +/* + * call-seq: + * File.extname(path) -> string + * + * Returns the extension (the portion of file name in path starting from the + * last period). If path is a dotfile, or starts with a period, then the starting + * dot is not dealt with the start of the extension. + * + * File.extname("test.rb") #=> ".rb" + * File.extname("a/b/d/test.rb") #=> ".rb" + * File.extname("test") #=> "" + * File.extname(".profile") #=> "" + */ +static mrb_value +mrb_file_extname(mrb_state *mrb, mrb_value klass) +{ + char *path; + mrb_get_args(mrb, "z", &path); + + size_t len = strlen(path); + if (len == 0) { + return mrb_str_new_lit(mrb, ""); + } + + // Remove trailing slashes to find the actual filename + while (len > 1 && path[len - 1] == '/') { + len--; + } + + // Find the last path separator to get basename + ssize_t base_start = len - 1; + while (base_start >= 0 && path[base_start] != '/') { + base_start--; + } + base_start++; // move to first character after '/' + + // If the result is only slashes, no extension + if ((size_t)base_start == len) { + return mrb_str_new_lit(mrb, ""); + } + + // Look for the last '.' in the basename + ssize_t dot_pos = -1; + for (size_t i = base_start; i < len; i++) { + if (path[i] == '.') { + dot_pos = i; + } + } + + // No dot found, or dot is the first character (dotfile) + if (dot_pos == -1 || dot_pos == (ssize_t)base_start) { + return mrb_str_new_lit(mrb, ""); + } + + // Return extension from dot to end + return mrb_str_new(mrb, path + dot_pos, len - dot_pos); +} + +/* + * call-seq: + * File.path(path) -> string + * + * Returns the string representation of the path + * + * File.path("/dev/null") #=> "/dev/null" + * File.path(Pathname.new("/tmp")) #=> "/tmp" + */ +static mrb_value +mrb_file_path(mrb_state *mrb, mrb_value klass) +{ + mrb_value filename; + mrb_get_args(mrb, "S", &filename); + return filename; +} + +// Forward declaration for recursive join processing +static mrb_value mrb_file_join_process_args(mrb_state *mrb, const mrb_value *argv, mrb_int argc); + +static mrb_value +mrb_file_join_process_args(mrb_state *mrb, const mrb_value *argv, mrb_int argc) +{ + mrb_value result = mrb_ary_new_capa(mrb, argc); + + for (mrb_int i = 0; i < argc; i++) { + mrb_value arg = argv[i]; + + if (mrb_array_p(arg)) { + // Check for recursive arrays using mruby's built-in detection + if (MRB_RECURSIVE_UNARY_P(mrb, MRB_SYM(join), arg)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "recursive array"); + } + + // Recursively process the array + mrb_value nested = mrb_file_join_process_args(mrb, RARRAY_PTR(arg), RARRAY_LEN(arg)); + + // Append nested results to our result + mrb_int nested_len = RARRAY_LEN(nested); + for (mrb_int k = 0; k < nested_len; k++) { + mrb_ary_push(mrb, result, RARRAY_PTR(nested)[k]); + } + } + else { + // Convert to string (raises TypeError if not convertible) + mrb_ensure_string_type(mrb, arg); + mrb_ary_push(mrb, result, arg); + } + } + + return result; +} + +/* + * call-seq: + * File.join(string, ...) -> string + * + * Returns a new string formed by joining the strings using the operating + * system's path separator (File::SEPARATOR). + * + * File.join("usr", "mail", "gumby") #=> "usr/mail/gumby" + * File.join("usr", "mail", "gumby") #=> "usr\\mail\\gumby" (on Windows) + */ +static mrb_value +mrb_file_join(mrb_state *mrb, mrb_value klass) +{ + const mrb_value *argv; + mrb_int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + + // Handle empty case + if (argc == 0) { + return mrb_str_new_lit(mrb, ""); + } + + // Process arguments and flatten arrays + mrb_value names = mrb_file_join_process_args(mrb, argv, argc); + + mrb_int names_len = RARRAY_LEN(names); + if (names_len == 0) { + return mrb_str_new_lit(mrb, ""); + } + + // Handle single element case + if (names_len == 1) { + return RARRAY_PTR(names)[0]; + } + + // Start building the result + mrb_value first = RARRAY_PTR(names)[0]; + mrb_value result = mrb_str_dup(mrb, first); + + // Remove trailing separator from first component + const char *sep = FILE_SEPARATOR; + mrb_int sep_len = strlen(sep); + if (RSTRING_LEN(result) > 0 && + RSTRING_LEN(result) >= sep_len && + memcmp(RSTRING_PTR(result) + RSTRING_LEN(result) - sep_len, sep, sep_len) == 0) { + mrb_str_resize(mrb, result, RSTRING_LEN(result) - sep_len); + } + + // Process middle components + for (mrb_int i = 1; i < names_len - 1; i++) { + mrb_value component = RARRAY_PTR(names)[i]; + const char *comp_str = RSTRING_PTR(component); + mrb_int comp_len = RSTRING_LEN(component); + + // Skip empty components + if (comp_len == 0) continue; + + // Remove leading separator + if (comp_len >= sep_len && memcmp(comp_str, sep, sep_len) == 0) { + comp_str += sep_len; + comp_len -= sep_len; + } + + // Remove trailing separator + if (comp_len >= sep_len && memcmp(comp_str + comp_len - sep_len, sep, sep_len) == 0) { + comp_len -= sep_len; + } + + // Add separator and component if not empty + if (comp_len > 0) { + mrb_str_cat_cstr(mrb, result, sep); + mrb_str_cat(mrb, result, comp_str, comp_len); + } + } + + // Process last component + if (names_len > 1) { + mrb_value last = RARRAY_PTR(names)[names_len - 1]; + const char *last_str = RSTRING_PTR(last); + mrb_int last_len = RSTRING_LEN(last); + + // Remove leading separator from last component + if (last_len >= sep_len && memcmp(last_str, sep, sep_len) == 0) { + last_str += sep_len; + last_len -= sep_len; + } + + // Add separator and last component + mrb_str_cat_cstr(mrb, result, sep); + mrb_str_cat(mrb, result, last_str, last_len); + } + + return result; +} + +/* ---------------------------*/ +static const mrb_mt_entry file_rom_entries[] = { + MRB_MT_ENTRY(mrb_file_flock, MRB_SYM(flock), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_file_atime, MRB_SYM(_atime), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_file_ctime, MRB_SYM(_ctime), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_file_mtime, MRB_SYM(_mtime), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_file_size, MRB_SYM(size), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_file_truncate, MRB_SYM(truncate), MRB_ARGS_REQ(1)), +}; + void mrb_init_file(mrb_state *mrb) { - struct RClass *io, *file, *cnst; - - io = mrb_class_get_id(mrb, MRB_SYM(IO)); - file = mrb_define_class(mrb, "File", io); - MRB_SET_INSTANCE_TT(file, MRB_TT_DATA); - mrb_define_class_method(mrb, file, "umask", mrb_file_s_umask, MRB_ARGS_OPT(1)); - mrb_define_class_method(mrb, file, "delete", mrb_file_s_unlink, MRB_ARGS_ANY()); - mrb_define_class_method(mrb, file, "unlink", mrb_file_s_unlink, MRB_ARGS_ANY()); - mrb_define_class_method(mrb, file, "rename", mrb_file_s_rename, MRB_ARGS_REQ(2)); - mrb_define_class_method(mrb, file, "symlink", mrb_file_s_symlink, MRB_ARGS_REQ(2)); - mrb_define_class_method(mrb, file, "chmod", mrb_file_s_chmod, MRB_ARGS_REQ(1) | MRB_ARGS_REST()); - mrb_define_class_method(mrb, file, "readlink", mrb_file_s_readlink, MRB_ARGS_REQ(1)); - - mrb_define_class_method(mrb, file, "dirname", mrb_file_dirname, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, file, "basename", mrb_file_basename, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, file, "realpath", mrb_file_realpath, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - mrb_define_class_method(mrb, file, "_getwd", mrb_file__getwd, MRB_ARGS_NONE()); - mrb_define_class_method(mrb, file, "_gethome", mrb_file__gethome, MRB_ARGS_OPT(1)); - - mrb_define_method(mrb, file, "flock", mrb_file_flock, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, file, "_mtime", mrb_file_mtime, MRB_ARGS_NONE()); - mrb_define_method(mrb, file, "size", mrb_file_size, MRB_ARGS_NONE()); - mrb_define_method(mrb, file, "truncate", mrb_file_truncate, MRB_ARGS_REQ(1)); - - cnst = mrb_define_module_under_id(mrb, file, MRB_SYM(Constants)); + struct RClass *io = mrb_class_get_id(mrb, MRB_SYM(IO)); + struct RClass *file = mrb_define_class_id(mrb, MRB_SYM(File), io); + MRB_SET_INSTANCE_TT(file, MRB_TT_CDATA); + mrb_define_class_method_id(mrb, file, MRB_SYM(umask), mrb_file_s_umask, MRB_ARGS_OPT(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM(delete), mrb_file_s_unlink, MRB_ARGS_ANY()); + mrb_define_class_method_id(mrb, file, MRB_SYM(unlink), mrb_file_s_unlink, MRB_ARGS_ANY()); + mrb_define_class_method_id(mrb, file, MRB_SYM(rename), mrb_file_s_rename, MRB_ARGS_REQ(2)); + mrb_define_class_method_id(mrb, file, MRB_SYM(symlink), mrb_file_s_symlink, MRB_ARGS_REQ(2)); + mrb_define_class_method_id(mrb, file, MRB_SYM(chmod), mrb_file_s_chmod, MRB_ARGS_REQ(1) | MRB_ARGS_REST()); + mrb_define_class_method_id(mrb, file, MRB_SYM(readlink), mrb_file_s_readlink, MRB_ARGS_REQ(1)); + + mrb_define_class_method_id(mrb, file, MRB_SYM(dirname), mrb_file_dirname, MRB_ARGS_ARG(1,1)); + mrb_define_class_method_id(mrb, file, MRB_SYM(basename), mrb_file_basename, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM(extname), mrb_file_extname, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM(join), mrb_file_join, MRB_ARGS_ANY()); + mrb_define_class_method_id(mrb, file, MRB_SYM(path), mrb_file_path, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM(realpath), mrb_file_realpath, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM(absolute_path), mrb_file_absolute_path, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM_Q(absolute_path), mrb_file_absolute_path_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM(expand_path), mrb_file_expand_path, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); + + MRB_MT_INIT_ROM(mrb, file, file_rom_entries); + + struct RClass *cnst = mrb_define_module_under_id(mrb, file, MRB_SYM(Constants)); mrb_define_const_id(mrb, cnst, MRB_SYM(LOCK_SH), mrb_fixnum_value(LOCK_SH)); mrb_define_const_id(mrb, cnst, MRB_SYM(LOCK_EX), mrb_fixnum_value(LOCK_EX)); mrb_define_const_id(mrb, cnst, MRB_SYM(LOCK_UN), mrb_fixnum_value(LOCK_UN)); mrb_define_const_id(mrb, cnst, MRB_SYM(LOCK_NB), mrb_fixnum_value(LOCK_NB)); mrb_define_const_id(mrb, cnst, MRB_SYM(SEPARATOR), mrb_str_new_cstr(mrb, FILE_SEPARATOR)); mrb_define_const_id(mrb, cnst, MRB_SYM(PATH_SEPARATOR), mrb_str_new_cstr(mrb, PATH_SEPARATOR)); -#if defined(_WIN32) || defined(_WIN64) +#if defined(_WIN32) mrb_define_const_id(mrb, cnst, MRB_SYM(ALT_SEPARATOR), mrb_str_new_cstr(mrb, FILE_ALT_SEPARATOR)); #else mrb_define_const_id(mrb, cnst, MRB_SYM(ALT_SEPARATOR), mrb_nil_value()); diff --git a/mrbgems/mruby-io/src/file_test.c b/mrbgems/mruby-io/src/file_test.c index f1762369a9..e5c824b981 100644 --- a/mrbgems/mruby-io/src/file_test.c +++ b/mrbgems/mruby-io/src/file_test.c @@ -2,31 +2,18 @@ ** file_test.c - FileTest class */ -#include "mruby.h" -#include "mruby/class.h" -#include "mruby/data.h" -#include "mruby/string.h" -#include "mruby/ext/io.h" -#include "mruby/error.h" +#include +#include +#include +#include +#include +#include +#include +#include "io_hal.h" #include #include -#if defined(_WIN32) || defined(_WIN64) - #define LSTAT stat - #include -#else - #define LSTAT lstat - #include - #include - #include - #include - #include - #include -#endif - -#include - #include #include #include @@ -34,14 +21,14 @@ extern struct mrb_data_type mrb_io_type; static int -mrb_stat0(mrb_state *mrb, mrb_value obj, struct stat *st, int do_lstat) +mrb_stat0(mrb_state *mrb, mrb_value obj, mrb_io_stat *st, int do_lstat) { - if (mrb_obj_is_kind_of(mrb, obj, mrb_class_get(mrb, "IO"))) { + if (mrb_obj_is_kind_of(mrb, obj, mrb_class_get_id(mrb, MRB_SYM(IO)))) { struct mrb_io *fptr; - fptr = (struct mrb_io *)mrb_data_get_ptr(mrb, obj, &mrb_io_type); + fptr = (struct mrb_io*)mrb_data_get_ptr(mrb, obj, &mrb_io_type); if (fptr && fptr->fd >= 0) { - return fstat(fptr->fd, st); + return mrb_hal_io_fstat(mrb, fptr->fd, st); } mrb_raise(mrb, E_IO_ERROR, "closed stream"); @@ -51,9 +38,10 @@ mrb_stat0(mrb_state *mrb, mrb_value obj, struct stat *st, int do_lstat) char *path = mrb_locale_from_utf8(RSTRING_CSTR(mrb, obj), -1); int ret; if (do_lstat) { - ret = LSTAT(path, st); - } else { - ret = stat(path, st); + ret = mrb_hal_io_lstat(mrb, path, st); + } + else { + ret = mrb_hal_io_stat(mrb, path, st); } mrb_locale_free(path); return ret; @@ -61,30 +49,29 @@ mrb_stat0(mrb_state *mrb, mrb_value obj, struct stat *st, int do_lstat) } static int -mrb_stat(mrb_state *mrb, mrb_value obj, struct stat *st) +mrb_stat(mrb_state *mrb, mrb_value obj, mrb_io_stat *st) { return mrb_stat0(mrb, obj, st, 0); } -#ifdef S_ISLNK +#if defined(S_ISLNK) || defined(_S_ISLNK) || defined(S_IFLNK) || defined(_S_IFLNK) static int -mrb_lstat(mrb_state *mrb, mrb_value obj, struct stat *st) +mrb_lstat(mrb_state *mrb, mrb_value obj, mrb_io_stat *st) { return mrb_stat0(mrb, obj, st, 1); } #endif /* - * Document-method: directory? - * * call-seq: * File.directory?(file_name) -> true or false + * FileTest.directory?(file_name) -> true or false * - * Returns true if the named file is a directory, - * or a symlink that points at a directory, and false + * Returns `true` if the named file is a directory, or a symlink that points at a directory, and `false` * otherwise. * - * File.directory?(".") + * File.directory?(".") #=> true + * FileTest.directory?(".") #=> true */ static mrb_value @@ -94,7 +81,7 @@ mrb_filetest_s_directory_p(mrb_state *mrb, mrb_value klass) # define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) #endif - struct stat st; + mrb_io_stat st; mrb_value obj = mrb_get_arg1(mrb); if (mrb_stat(mrb, obj, &st) < 0) @@ -108,22 +95,27 @@ mrb_filetest_s_directory_p(mrb_state *mrb, mrb_value klass) /* * call-seq: * File.pipe?(file_name) -> true or false + * FileTest.pipe?(file_name) -> true or false + * + * Returns `true` if the named file is a pipe. * - * Returns true if the named file is a pipe. + * File.pipe?("/dev/stdin") #=> true + * FileTest.pipe?("/dev/stdin") #=> true */ static mrb_value mrb_filetest_s_pipe_p(mrb_state *mrb, mrb_value klass) { -#if defined(_WIN32) || defined(_WIN64) - mrb_raise(mrb, E_NOTIMP_ERROR, "pipe is not supported on this platform"); +#ifdef _WIN32 + /* Windows anonymous pipes are not Unix FIFOs */ + mrb_raise(mrb, E_NOTIMP_ERROR, "pipe? is not supported on Windows"); #else #ifdef S_IFIFO # ifndef S_ISFIFO # define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) # endif - struct stat st; + mrb_io_stat st; mrb_value obj = mrb_get_arg1(mrb); if (mrb_stat(mrb, obj, &st) < 0) @@ -139,15 +131,20 @@ mrb_filetest_s_pipe_p(mrb_state *mrb, mrb_value klass) /* * call-seq: * File.symlink?(file_name) -> true or false + * FileTest.symlink?(file_name) -> true or false * - * Returns true if the named file is a symbolic link. + * Returns `true` if the named file is a symbolic link. + * + * File.symlink?("link-to-test") #=> true + * FileTest.symlink?("link-to-test") #=> true */ static mrb_value mrb_filetest_s_symlink_p(mrb_state *mrb, mrb_value klass) { -#if defined(_WIN32) || defined(_WIN64) - mrb_raise(mrb, E_NOTIMP_ERROR, "symlink is not supported on this platform"); +#ifdef _WIN32 + /* Symlinks not reliably supported on Windows */ + mrb_raise(mrb, E_NOTIMP_ERROR, "symlink? is not supported on Windows"); #else #ifndef S_ISLNK # ifdef _S_ISLNK @@ -164,31 +161,36 @@ mrb_filetest_s_symlink_p(mrb_state *mrb, mrb_value klass) #endif #ifdef S_ISLNK - struct stat st; + mrb_io_stat st; mrb_value obj = mrb_get_arg1(mrb); if (mrb_lstat(mrb, obj, &st) == -1) return mrb_false_value(); if (S_ISLNK(st.st_mode)) return mrb_true_value(); +#endif #endif return mrb_false_value(); -#endif } /* * call-seq: * File.socket?(file_name) -> true or false + * FileTest.socket?(file_name) -> true or false + * + * Returns `true` if the named file is a socket. * - * Returns true if the named file is a socket. + * File.socket?("/tmp/.X11-unix/X0") #=> true + * FileTest.socket?("/tmp/.X11-unix/X0") #=> true */ static mrb_value mrb_filetest_s_socket_p(mrb_state *mrb, mrb_value klass) { -#if defined(_WIN32) || defined(_WIN64) - mrb_raise(mrb, E_NOTIMP_ERROR, "socket is not supported on this platform"); +#ifdef _WIN32 + /* Unix domain sockets not supported on Windows */ + mrb_raise(mrb, E_NOTIMP_ERROR, "socket? is not supported on Windows"); #else #ifndef S_ISSOCK # ifdef _S_ISSOCK @@ -205,31 +207,38 @@ mrb_filetest_s_socket_p(mrb_state *mrb, mrb_value klass) #endif #ifdef S_ISSOCK - struct stat st; + mrb_io_stat st; mrb_value obj = mrb_get_arg1(mrb); if (mrb_stat(mrb, obj, &st) < 0) return mrb_false_value(); if (S_ISSOCK(st.st_mode)) return mrb_true_value(); +#endif #endif return mrb_false_value(); -#endif } /* * call-seq: * File.exist?(file_name) -> true or false * File.exists?(file_name) -> true or false + * FileTest.exist?(file_name) -> true or false + * FileTest.exists?(file_name) -> true or false + * + * Returns `true` if the named file exists. * - * Return true if the named file exists. + * File.exist?("config.h") #=> true + * File.exist?("no_such_file") #=> false + * FileTest.exist?("config.h") #=> true + * FileTest.exist?("no_such_file") #=> false */ static mrb_value mrb_filetest_s_exist_p(mrb_state *mrb, mrb_value klass) { - struct stat st; + mrb_io_stat st; mrb_value obj = mrb_get_arg1(mrb); if (mrb_stat(mrb, obj, &st) < 0) @@ -241,9 +250,12 @@ mrb_filetest_s_exist_p(mrb_state *mrb, mrb_value klass) /* * call-seq: * File.file?(file_name) -> true or false + * FileTest.file?(file_name) -> true or false * - * Returns true if the named file exists and is a - * regular file. + * Returns `true` if the named file exists and is a regular file. + * + * File.file?("testfile") #=> true + * FileTest.file?("testfile") #=> true */ static mrb_value @@ -253,7 +265,7 @@ mrb_filetest_s_file_p(mrb_state *mrb, mrb_value klass) # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) #endif - struct stat st; + mrb_io_stat st; mrb_value obj = mrb_get_arg1(mrb); if (mrb_stat(mrb, obj, &st) < 0) @@ -267,15 +279,18 @@ mrb_filetest_s_file_p(mrb_state *mrb, mrb_value klass) /* * call-seq: * File.zero?(file_name) -> true or false + * FileTest.zero?(file_name) -> true or false + * + * Returns `true` if the named file exists and has a zero size. * - * Returns true if the named file exists and has - * a zero size. + * File.zero?("testfile") #=> false + * FileTest.zero?("testfile") #=> false */ static mrb_value mrb_filetest_s_zero_p(mrb_state *mrb, mrb_value klass) { - struct stat st; + mrb_io_stat st; mrb_value obj = mrb_get_arg1(mrb); if (mrb_stat(mrb, obj, &st) < 0) @@ -289,16 +304,20 @@ mrb_filetest_s_zero_p(mrb_state *mrb, mrb_value klass) /* * call-seq: * File.size(file_name) -> integer + * FileTest.size(file_name) -> integer * - * Returns the size of file_name. + * Returns the size of `file_name`. * - * _file_name_ can be an IO object. + * `file_name` can be an IO object. + * + * File.size("testfile") #=> 66 + * FileTest.size("testfile") #=> 66 */ static mrb_value mrb_filetest_s_size(mrb_state *mrb, mrb_value klass) { - struct stat st; + mrb_io_stat st; mrb_value obj = mrb_get_arg1(mrb); if (mrb_stat(mrb, obj, &st) < 0) @@ -310,15 +329,19 @@ mrb_filetest_s_size(mrb_state *mrb, mrb_value klass) /* * call-seq: * File.size?(file_name) -> Integer or nil + * FileTest.size?(file_name) -> Integer or nil * - * Returns +nil+ if +file_name+ doesn't exist or has zero size, the size of the + * Returns `nil` if `file_name` doesn't exist or has zero size, the size of the * file otherwise. + * + * File.size?("testfile") #=> 66 + * FileTest.size?("testfile") #=> 66 */ static mrb_value mrb_filetest_s_size_p(mrb_state *mrb, mrb_value klass) { - struct stat st; + mrb_io_stat st; mrb_value obj = mrb_get_arg1(mrb); if (mrb_stat(mrb, obj, &st) < 0) @@ -334,16 +357,29 @@ mrb_init_file_test(mrb_state *mrb) { struct RClass *f; - f = mrb_define_class(mrb, "FileTest", mrb->object_class); - - mrb_define_class_method(mrb, f, "directory?", mrb_filetest_s_directory_p, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, f, "exist?", mrb_filetest_s_exist_p, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, f, "exists?", mrb_filetest_s_exist_p, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, f, "file?", mrb_filetest_s_file_p, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, f, "pipe?", mrb_filetest_s_pipe_p, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, f, "size", mrb_filetest_s_size, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, f, "size?", mrb_filetest_s_size_p, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, f, "socket?", mrb_filetest_s_socket_p, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, f, "symlink?", mrb_filetest_s_symlink_p, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, f, "zero?", mrb_filetest_s_zero_p, MRB_ARGS_REQ(1)); + f = mrb_define_module_id(mrb, MRB_SYM(FileTest)); + + mrb_define_class_method_id(mrb, f, MRB_SYM_Q(directory), mrb_filetest_s_directory_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, f, MRB_SYM_Q(exist), mrb_filetest_s_exist_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, f, MRB_SYM_Q(exists), mrb_filetest_s_exist_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, f, MRB_SYM_Q(file), mrb_filetest_s_file_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, f, MRB_SYM_Q(pipe), mrb_filetest_s_pipe_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, f, MRB_SYM(size), mrb_filetest_s_size, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, f, MRB_SYM_Q(size), mrb_filetest_s_size_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, f, MRB_SYM_Q(socket), mrb_filetest_s_socket_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, f, MRB_SYM_Q(symlink), mrb_filetest_s_symlink_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, f, MRB_SYM_Q(zero), mrb_filetest_s_zero_p, MRB_ARGS_REQ(1)); + + // Also register the same methods on File class + struct RClass *file = mrb_class_get_id(mrb, MRB_SYM(File)); + mrb_define_class_method_id(mrb, file, MRB_SYM_Q(directory), mrb_filetest_s_directory_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM_Q(exist), mrb_filetest_s_exist_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM_Q(exists), mrb_filetest_s_exist_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM_Q(file), mrb_filetest_s_file_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM_Q(pipe), mrb_filetest_s_pipe_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM(size), mrb_filetest_s_size, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM_Q(size), mrb_filetest_s_size_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM_Q(socket), mrb_filetest_s_socket_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM_Q(symlink), mrb_filetest_s_symlink_p, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, file, MRB_SYM_Q(zero), mrb_filetest_s_zero_p, MRB_ARGS_REQ(1)); } diff --git a/mrbgems/mruby-io/src/io.c b/mrbgems/mruby-io/src/io.c index 70d48a01e5..87d6f9a46b 100644 --- a/mrbgems/mruby-io/src/io.c +++ b/mrbgems/mruby-io/src/io.c @@ -2,22 +2,22 @@ ** io.c - IO class */ -#include "mruby.h" -#include "mruby/array.h" -#include "mruby/class.h" -#include "mruby/data.h" -#include "mruby/hash.h" -#include "mruby/string.h" -#include "mruby/variable.h" -#include "mruby/ext/io.h" -#include "mruby/error.h" -#include "mruby/internal.h" -#include "mruby/presym.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "io_hal.h" #include #include -#if defined(_WIN32) || defined(_WIN64) +#if defined(_WIN32) #include #include #include @@ -34,7 +34,7 @@ typedef long ftime_t; typedef long fsuseconds_t; typedef int fmode_t; - typedef int mrb_io_read_write_size; + typedef int fssize_t; #ifndef O_TMPFILE #define O_TMPFILE O_TEMPORARY @@ -46,9 +46,13 @@ #include typedef size_t fsize_t; typedef time_t ftime_t; +#ifdef __DJGPP__ + typedef long fsuseconds_t; +#else typedef suseconds_t fsuseconds_t; +#endif typedef mode_t fmode_t; - typedef ssize_t mrb_io_read_write_size; + typedef ssize_t fssize_t; #endif #ifdef _MSC_VER @@ -67,20 +71,18 @@ typedef mrb_int pid_t; #define OPEN_READABLE_P(f) ((mrb_bool)(OPEN_RDONLY_P(f) || OPEN_RDWR_P(f))) #define OPEN_WRITABLE_P(f) ((mrb_bool)(OPEN_WRONLY_P(f) || OPEN_RDWR_P(f))) -static void mrb_io_free(mrb_state *mrb, void *ptr); -struct mrb_data_type mrb_io_type = { "IO", mrb_io_free }; +static void io_free(mrb_state *mrb, void *ptr); +struct mrb_data_type mrb_io_type = { "IO", io_free }; -static int mrb_io_modestr_to_flags(mrb_state *mrb, const char *modestr); -static int mrb_io_mode_to_flags(mrb_state *mrb, mrb_value mode); +static int io_modestr_to_flags(mrb_state *mrb, const char *modestr); +static int io_mode_to_flags(mrb_state *mrb, mrb_value mode); static void fptr_finalize(mrb_state *mrb, struct mrb_io *fptr, int quiet); static struct mrb_io* -io_get_open_fptr(mrb_state *mrb, mrb_value self) +io_get_open_fptr(mrb_state *mrb, mrb_value io) { - struct mrb_io *fptr; - - fptr = (struct mrb_io *)mrb_data_get_ptr(mrb, self, &mrb_io_type); + struct mrb_io *fptr = (struct mrb_io*)mrb_data_get_ptr(mrb, io, &mrb_io_type); if (fptr == NULL) { mrb_raise(mrb, E_IO_ERROR, "uninitialized stream"); } @@ -90,29 +92,41 @@ io_get_open_fptr(mrb_state *mrb, mrb_value self) return fptr; } +#if !defined(MRB_NO_IO_POPEN) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE +# define MRB_NO_IO_POPEN 1 +#endif + +#ifndef MRB_NO_IO_POPEN static void io_set_process_status(mrb_state *mrb, pid_t pid, int status) { - struct RClass *c_process, *c_status; + struct RClass *c_status = NULL; mrb_value v; - c_status = NULL; if (mrb_class_defined_id(mrb, MRB_SYM(Process))) { - c_process = mrb_module_get_id(mrb, MRB_SYM(Process)); + struct RClass *c_process = mrb_module_get_id(mrb, MRB_SYM(Process)); if (mrb_const_defined(mrb, mrb_obj_value(c_process), MRB_SYM(Status))) { c_status = mrb_class_get_under_id(mrb, c_process, MRB_SYM(Status)); } } if (c_status != NULL) { - v = mrb_funcall_id(mrb, mrb_obj_value(c_status), MRB_SYM(new), 2, mrb_fixnum_value(pid), mrb_fixnum_value(status)); - } else { + v = mrb_funcall_argv2(mrb, mrb_obj_value(c_status), MRB_SYM(new), mrb_fixnum_value(pid), mrb_fixnum_value(status)); + } + else { v = mrb_fixnum_value(WEXITSTATUS(status)); } mrb_gv_set(mrb, mrb_intern_lit(mrb, "$?"), v); } +#endif + +static mrb_noreturn void +mode_error(mrb_state *mrb, const char *mode) +{ + mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal access mode %s", mode); +} static int -mrb_io_modestr_to_flags(mrb_state *mrb, const char *mode) +io_modestr_to_flags(mrb_state *mrb, const char *mode) { int flags; const char *m = mode; @@ -128,8 +142,7 @@ mrb_io_modestr_to_flags(mrb_state *mrb, const char *mode) flags = O_WRONLY | O_CREAT | O_APPEND; break; default: - mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal access mode %s", mode); - flags = 0; /* not reached */ + mode_error(mrb, mode); } while (*m) { @@ -139,13 +152,17 @@ mrb_io_modestr_to_flags(mrb_state *mrb, const char *mode) flags |= O_BINARY; #endif break; + case 'x': + if (mode[0] != 'w') mode_error(mrb, mode); + flags |= O_EXCL; + break; case '+': flags = (flags & ~OPEN_ACCESS_MODE_FLAGS) | O_RDWR; break; case ':': /* XXX: PASSTHROUGH*/ default: - mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal access mode %s", mode); + mode_error(mrb, mode); } } @@ -153,13 +170,13 @@ mrb_io_modestr_to_flags(mrb_state *mrb, const char *mode) } static int -mrb_io_mode_to_flags(mrb_state *mrb, mrb_value mode) +io_mode_to_flags(mrb_state *mrb, mrb_value mode) { if (mrb_nil_p(mode)) { - return mrb_io_modestr_to_flags(mrb, "r"); + return O_RDONLY; } else if (mrb_string_p(mode)) { - return mrb_io_modestr_to_flags(mrb, RSTRING_CSTR(mrb, mode)); + return io_modestr_to_flags(mrb, RSTRING_CSTR(mrb, mode)); } else { int flags = 0; @@ -222,14 +239,14 @@ mrb_io_mode_to_flags(mrb_state *mrb, mrb_value mode) } static void -mrb_fd_cloexec(mrb_state *mrb, int fd) +io_fd_cloexec(mrb_state *mrb, int fd) { #if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC) - int flags, flags2; + int flags = fcntl(fd, F_GETFD); + int flags2; - flags = fcntl(fd, F_GETFD); - if (flags == -1) { - mrb_bug(mrb, "mrb_fd_cloexec: fcntl(%d, F_GETFD) failed: %d", fd, errno); + if (flags < 0) { + mrb_sys_fail(mrb, "cloexec GETFD"); } if (fd <= 2) { flags2 = flags & ~FD_CLOEXEC; /* Clear CLOEXEC for standard file descriptors: 0, 1, 2. */ @@ -238,82 +255,49 @@ mrb_fd_cloexec(mrb_state *mrb, int fd) flags2 = flags | FD_CLOEXEC; /* Set CLOEXEC for non-standard file descriptors: 3, 4, 5, ... */ } if (flags != flags2) { - if (fcntl(fd, F_SETFD, flags2) == -1) { - mrb_bug(mrb, "mrb_fd_cloexec: fcntl(%d, F_SETFD, %d) failed: %d", fd, flags2, errno); + if (fcntl(fd, F_SETFD, flags2) < 0) { + mrb_sys_fail(mrb, "cloexec SETFD"); } } #endif } -#if !defined(_WIN32) && !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) -static int -mrb_cloexec_pipe(mrb_state *mrb, int fildes[2]) -{ - int ret; - ret = pipe(fildes); - if (ret == -1) - return -1; - mrb_fd_cloexec(mrb, fildes[0]); - mrb_fd_cloexec(mrb, fildes[1]); - return ret; -} - -static int -mrb_pipe(mrb_state *mrb, int pipes[2]) -{ - int ret; - ret = mrb_cloexec_pipe(mrb, pipes); - if (ret == -1) { - if (errno == EMFILE || errno == ENFILE) { - mrb_garbage_collect(mrb); - ret = mrb_cloexec_pipe(mrb, pipes); - } - } - return ret; -} -static int -mrb_proc_exec(const char *pname) +static void +io_free(mrb_state *mrb, void *ptr) { - const char *s; - s = pname; - - while (*s == ' ' || *s == '\t' || *s == '\n') - s++; - - if (!*s) { - errno = ENOENT; - return -1; + struct mrb_io *io = (struct mrb_io*)ptr; + if (io != NULL) { + fptr_finalize(mrb, io, TRUE); + mrb_free(mrb, io); } - - execl("/bin/sh", "sh", "-c", pname, (char *)NULL); - return -1; } -#endif static void -mrb_io_free(mrb_state *mrb, void *ptr) +io_init_buf(mrb_state *mrb, struct mrb_io *fptr) { - struct mrb_io *io = (struct mrb_io *)ptr; - if (io != NULL) { - fptr_finalize(mrb, io, TRUE); - mrb_free(mrb, io); + if (fptr->readable) { + fptr->buf = (struct mrb_io_buf*)mrb_malloc(mrb, sizeof(struct mrb_io_buf)); + fptr->buf->start = 0; + fptr->buf->len = 0; } } static struct mrb_io * -mrb_io_alloc(mrb_state *mrb) +io_alloc(mrb_state *mrb) { - struct mrb_io *fptr; - - fptr = (struct mrb_io *)mrb_malloc(mrb, sizeof(struct mrb_io)); + struct mrb_io *fptr = (struct mrb_io*)mrb_malloc(mrb, sizeof(struct mrb_io)); fptr->fd = -1; fptr->fd2 = -1; fptr->pid = 0; + fptr->buf = 0; fptr->readable = 0; fptr->writable = 0; fptr->sync = 0; + fptr->eof = 0; fptr->is_socket = 0; + fptr->close_fd = 1; + fptr->close_fd2 = 1; return fptr; } @@ -321,9 +305,17 @@ mrb_io_alloc(mrb_state *mrb) #define NOFILE 64 #endif -#if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE -# define mrb_io_s_popen mrb_notimplement_m +#ifdef MRB_NO_IO_POPEN +# define io_s_popen mrb_notimplement_m #else +struct popen_params { + mrb_value klass; + const char *cmd; + int flags; + int doexec; + int opt_in, opt_out, opt_err; +}; + static int option_to_fd(mrb_state *mrb, mrb_value v) { @@ -331,7 +323,7 @@ option_to_fd(mrb_state *mrb, mrb_value v) if (mrb_nil_p(v)) return -1; switch (mrb_type(v)) { - case MRB_TT_DATA: /* IO */ + case MRB_TT_CDATA: /* IO */ return mrb_io_fileno(mrb, v); case MRB_TT_INTEGER: return (int)mrb_integer(v); @@ -342,10 +334,8 @@ option_to_fd(mrb_state *mrb, mrb_value v) return -1; /* never reached */ } -static mrb_value -mrb_io_s_popen_args(mrb_state *mrb, mrb_value klass, - const char **cmd, int *flags, int *doexec, - int *opt_in, int *opt_out, int *opt_err) +static void +parse_popen_args(mrb_state *mrb, struct popen_params *p) { mrb_value mode = mrb_nil_value(); struct { mrb_value opt_in, opt_out, opt_err; } kv; @@ -357,236 +347,114 @@ mrb_io_s_popen_args(mrb_state *mrb, mrb_value klass, NULL, }; - mrb_get_args(mrb, "zo:", cmd, &mode, &kw); + mrb_get_args(mrb, "zo:", &p->cmd, &mode, &kw); - *flags = mrb_io_mode_to_flags(mrb, mode); - *doexec = (strcmp("-", *cmd) != 0); - *opt_in = option_to_fd(mrb, kv.opt_in); - *opt_out = option_to_fd(mrb, kv.opt_out); - *opt_err = option_to_fd(mrb, kv.opt_err); - - return mrb_obj_value(mrb_data_object_alloc(mrb, mrb_class_ptr(klass), NULL, &mrb_io_type)); + p->flags = io_mode_to_flags(mrb, mode); + p->doexec = (strcmp("-", p->cmd) != 0); + p->opt_in = option_to_fd(mrb, kv.opt_in); + p->opt_out = option_to_fd(mrb, kv.opt_out); + p->opt_err = option_to_fd(mrb, kv.opt_err); } -#ifdef _WIN32 static mrb_value -mrb_io_s_popen(mrb_state *mrb, mrb_value klass) +io_s_popen(mrb_state *mrb, mrb_value klass) { - mrb_value io; - int doexec; - int opt_in, opt_out, opt_err; - const char *cmd; - - struct mrb_io *fptr; - int pid = 0, flags; - STARTUPINFO si; - PROCESS_INFORMATION pi; - SECURITY_ATTRIBUTES saAttr; - - HANDLE ifd[2]; - HANDLE ofd[2]; - - ifd[0] = INVALID_HANDLE_VALUE; - ifd[1] = INVALID_HANDLE_VALUE; - ofd[0] = INVALID_HANDLE_VALUE; - ofd[1] = INVALID_HANDLE_VALUE; + struct popen_params p; + p.klass = klass; + int pid = 0; + int pr[2] = { -1, -1 }; /* read pipe: parent reads, child writes */ + int pw[2] = { -1, -1 }; /* write pipe: parent writes, child reads */ + int readable, writable; + int stdin_fd = -1, stdout_fd = -1, stderr_fd = -1; mrb->c->ci->mid = 0; - io = mrb_io_s_popen_args(mrb, klass, &cmd, &flags, &doexec, - &opt_in, &opt_out, &opt_err); + parse_popen_args(mrb, &p); - saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); - saAttr.bInheritHandle = TRUE; - saAttr.lpSecurityDescriptor = NULL; + readable = OPEN_READABLE_P(p.flags); + writable = OPEN_WRITABLE_P(p.flags); - if (OPEN_READABLE_P(flags)) { - if (!CreatePipe(&ofd[0], &ofd[1], &saAttr, 0) - || !SetHandleInformation(ofd[0], HANDLE_FLAG_INHERIT, 0)) { + /* Create pipes for communication */ + if (readable) { + if (mrb_hal_io_pipe(mrb, pr) == -1) { mrb_sys_fail(mrb, "pipe"); } } - if (OPEN_WRITABLE_P(flags)) { - if (!CreatePipe(&ifd[0], &ifd[1], &saAttr, 0) - || !SetHandleInformation(ifd[1], HANDLE_FLAG_INHERIT, 0)) { + if (writable) { + if (mrb_hal_io_pipe(mrb, pw) == -1) { + if (pr[0] != -1) { + mrb_hal_io_close(mrb, pr[0]); + mrb_hal_io_close(mrb, pr[1]); + } mrb_sys_fail(mrb, "pipe"); } } - if (doexec) { - ZeroMemory(&pi, sizeof(pi)); - ZeroMemory(&si, sizeof(si)); - si.cb = sizeof(si); - si.dwFlags |= STARTF_USESHOWWINDOW; - si.wShowWindow = SW_HIDE; - si.dwFlags |= STARTF_USESTDHANDLES; - if (OPEN_READABLE_P(flags)) { - si.hStdOutput = ofd[1]; - si.hStdError = ofd[1]; - } - if (OPEN_WRITABLE_P(flags)) { - si.hStdInput = ifd[0]; - } - if (!CreateProcess( - NULL, (char*)cmd, NULL, NULL, - TRUE, CREATE_NEW_PROCESS_GROUP, NULL, NULL, &si, &pi)) { - CloseHandle(ifd[0]); - CloseHandle(ifd[1]); - CloseHandle(ofd[0]); - CloseHandle(ofd[1]); - mrb_raisef(mrb, E_IO_ERROR, "command not found: %s", cmd); - } - CloseHandle(pi.hThread); - CloseHandle(ifd[0]); - CloseHandle(ofd[1]); - pid = pi.dwProcessId; - } - - mrb_iv_set(mrb, io, mrb_intern_lit(mrb, "@buf"), mrb_str_new_cstr(mrb, "")); - - fptr = mrb_io_alloc(mrb); - fptr->fd = _open_osfhandle((intptr_t)ofd[0], 0); - fptr->fd2 = _open_osfhandle((intptr_t)ifd[1], 0); - fptr->pid = pid; - fptr->readable = OPEN_READABLE_P(flags); - fptr->writable = OPEN_WRITABLE_P(flags); - fptr->sync = 0; - - DATA_TYPE(io) = &mrb_io_type; - DATA_PTR(io) = fptr; - return io; -} -#else -static mrb_value -mrb_io_s_popen(mrb_state *mrb, mrb_value klass) -{ - mrb_value io, result; - int doexec; - int opt_in, opt_out, opt_err; - const char *cmd; + /* Set up child process file descriptors */ + if (p.doexec) { + /* Child stdin: either write pipe read end or opt_in */ + stdin_fd = (p.opt_in != -1) ? p.opt_in : (writable ? pw[0] : -1); - struct mrb_io *fptr; - int pid, flags, fd, write_fd = -1; - int pr[2] = { -1, -1 }; - int pw[2] = { -1, -1 }; - int saved_errno; + /* Child stdout: either read pipe write end or opt_out */ + stdout_fd = (p.opt_out != -1) ? p.opt_out : (readable ? pr[1] : -1); - mrb->c->ci->mid = 0; - io = mrb_io_s_popen_args(mrb, klass, &cmd, &flags, &doexec, - &opt_in, &opt_out, &opt_err); + /* Child stderr: opt_err or stdout */ + stderr_fd = (p.opt_err != -1) ? p.opt_err : stdout_fd; - if (OPEN_READABLE_P(flags)) { - if (pipe(pr) == -1) { - mrb_sys_fail(mrb, "pipe"); + /* Spawn child process using HAL */ + if (mrb_hal_io_spawn_process(mrb, p.cmd, stdin_fd, stdout_fd, stderr_fd, &pid) == -1) { + int saved_errno = errno; + if (readable) { + mrb_hal_io_close(mrb, pr[0]); + mrb_hal_io_close(mrb, pr[1]); + } + if (writable) { + mrb_hal_io_close(mrb, pw[0]); + mrb_hal_io_close(mrb, pw[1]); + } + errno = saved_errno; + mrb_raisef(mrb, E_IO_ERROR, "command not found: %s", p.cmd); } - mrb_fd_cloexec(mrb, pr[0]); - mrb_fd_cloexec(mrb, pr[1]); - } - if (OPEN_WRITABLE_P(flags)) { - if (pipe(pw) == -1) { - if (pr[0] != -1) close(pr[0]); - if (pr[1] != -1) close(pr[1]); - mrb_sys_fail(mrb, "pipe"); + /* Close child ends of pipes in parent */ + if (readable) { + mrb_hal_io_close(mrb, pr[1]); /* close write end */ + } + if (writable) { + mrb_hal_io_close(mrb, pw[0]); /* close read end */ } - mrb_fd_cloexec(mrb, pw[0]); - mrb_fd_cloexec(mrb, pw[1]); - } - - if (!doexec) { - // XXX - fflush(stdin); - fflush(stdout); - fflush(stderr); } - result = mrb_nil_value(); - switch (pid = fork()) { - case 0: /* child */ - if (opt_in != -1) { - dup2(opt_in, 0); - } - if (opt_out != -1) { - dup2(opt_out, 1); - } - if (opt_err != -1) { - dup2(opt_err, 2); - } - if (OPEN_READABLE_P(flags)) { - close(pr[0]); - if (pr[1] != 1) { - dup2(pr[1], 1); - close(pr[1]); - } - } - if (OPEN_WRITABLE_P(flags)) { - close(pw[1]); - if (pw[0] != 0) { - dup2(pw[0], 0); - close(pw[0]); - } - } - if (doexec) { - for (fd = 3; fd < NOFILE; fd++) { - close(fd); - } - mrb_proc_exec(cmd); - mrb_raisef(mrb, E_IO_ERROR, "command not found: %s", cmd); - _exit(127); - } - result = mrb_nil_value(); - break; - - default: /* parent */ - if (OPEN_RDWR_P(flags)) { - close(pr[1]); - fd = pr[0]; - close(pw[0]); - write_fd = pw[1]; - } else if (OPEN_RDONLY_P(flags)) { - close(pr[1]); - fd = pr[0]; - } else { - close(pw[0]); - fd = pw[1]; - } - - mrb_iv_set(mrb, io, mrb_intern_lit(mrb, "@buf"), mrb_str_new_cstr(mrb, "")); + /* Set up parent IO object */ + mrb_value io = mrb_obj_value(mrb_data_object_alloc(mrb, mrb_class_ptr(klass), NULL, &mrb_io_type)); + struct mrb_io *fptr = io_alloc(mrb); - fptr = mrb_io_alloc(mrb); - fptr->fd = fd; - fptr->fd2 = write_fd; - fptr->pid = pid; - fptr->readable = OPEN_READABLE_P(flags); - fptr->writable = OPEN_WRITABLE_P(flags); - fptr->sync = 0; + if (readable && writable) { + fptr->fd = pr[0]; /* parent reads from here */ + fptr->fd2 = pw[1]; /* parent writes to here */ + } + else if (readable) { + fptr->fd = pr[0]; /* parent reads from here */ + fptr->fd2 = -1; + } + else { + fptr->fd = pw[1]; /* parent writes to here */ + fptr->fd2 = -1; + } - DATA_TYPE(io) = &mrb_io_type; - DATA_PTR(io) = fptr; - result = io; - break; + fptr->pid = pid; + fptr->readable = readable; + fptr->writable = writable; + io_init_buf(mrb, fptr); - case -1: /* error */ - saved_errno = errno; - if (OPEN_READABLE_P(flags)) { - close(pr[0]); - close(pr[1]); - } - if (OPEN_WRITABLE_P(flags)) { - close(pw[0]); - close(pw[1]); - } - errno = saved_errno; - mrb_sys_fail(mrb, "pipe_open failed"); - break; - } - return result; + DATA_TYPE(io) = &mrb_io_type; + DATA_PTR(io) = fptr; + return io; } -#endif /* _WIN32 */ -#endif /* TARGET_OS_IPHONE */ +#endif /* MRB_NO_IO_POPEN */ static int -mrb_dup(mrb_state *mrb, int fd, mrb_bool *failed) +symdup(mrb_state *mrb, int fd, mrb_bool *failed) { int new_fd; @@ -600,53 +468,56 @@ mrb_dup(mrb_state *mrb, int fd, mrb_bool *failed) } static mrb_value -mrb_io_initialize_copy(mrb_state *mrb, mrb_value copy) +io_init_copy(mrb_state *mrb, mrb_value copy) { mrb_value orig = mrb_get_arg1(mrb); - mrb_value buf; struct mrb_io *fptr_copy; struct mrb_io *fptr_orig; mrb_bool failed = TRUE; fptr_orig = io_get_open_fptr(mrb, orig); - fptr_copy = (struct mrb_io *)DATA_PTR(copy); + fptr_copy = (struct mrb_io*)DATA_PTR(copy); if (fptr_orig == fptr_copy) return copy; if (fptr_copy != NULL) { fptr_finalize(mrb, fptr_copy, FALSE); mrb_free(mrb, fptr_copy); } - fptr_copy = (struct mrb_io *)mrb_io_alloc(mrb); + fptr_copy = (struct mrb_io*)io_alloc(mrb); + fptr_copy->pid = fptr_orig->pid; + fptr_copy->readable = fptr_orig->readable; + fptr_copy->writable = fptr_orig->writable; + fptr_copy->sync = fptr_orig->sync; + fptr_copy->is_socket = fptr_orig->is_socket; + + io_init_buf(mrb, fptr_copy); DATA_TYPE(copy) = &mrb_io_type; DATA_PTR(copy) = fptr_copy; - buf = mrb_iv_get(mrb, orig, mrb_intern_lit(mrb, "@buf")); - mrb_iv_set(mrb, copy, mrb_intern_lit(mrb, "@buf"), buf); - - fptr_copy->fd = mrb_dup(mrb, fptr_orig->fd, &failed); + fptr_copy->fd = symdup(mrb, fptr_orig->fd, &failed); if (failed) { mrb_sys_fail(mrb, 0); } - mrb_fd_cloexec(mrb, fptr_copy->fd); + io_fd_cloexec(mrb, fptr_copy->fd); if (fptr_orig->fd2 != -1) { - fptr_copy->fd2 = mrb_dup(mrb, fptr_orig->fd2, &failed); + fptr_copy->fd2 = symdup(mrb, fptr_orig->fd2, &failed); if (failed) { close(fptr_copy->fd); mrb_sys_fail(mrb, 0); } - mrb_fd_cloexec(mrb, fptr_copy->fd2); + io_fd_cloexec(mrb, fptr_copy->fd2); } - fptr_copy->pid = fptr_orig->pid; - fptr_copy->readable = fptr_orig->readable; - fptr_copy->writable = fptr_orig->writable; - fptr_copy->sync = fptr_orig->sync; - fptr_copy->is_socket = fptr_orig->is_socket; - return copy; } +static mrb_noreturn void +badfd_error(mrb_state *mrb) +{ + mrb_sys_fail(mrb, "bad file descriptor"); +} + static void check_file_descriptor(mrb_state *mrb, mrb_int fd) { @@ -655,7 +526,8 @@ check_file_descriptor(mrb_state *mrb, mrb_int fd) #if MRB_INT_MIN < INT_MIN || MRB_INT_MAX > INT_MAX if (fdi != fd) { - goto badfd; + errno = EBADF; + badfd_error(mrb); } #endif @@ -670,31 +542,26 @@ check_file_descriptor(mrb_state *mrb, mrb_int fd) } if (fdi < 0 || fdi > _getmaxstdio()) { - goto badfd; + errno = EBADF; + badfd_error(mrb); } #endif /* _WIN32 */ - if (fstat(fdi, &sb) != 0) { - goto badfd; - } - - return; - -badfd: - mrb_sys_fail(mrb, "bad file descriptor"); + if (fstat(fdi, &sb) == 0) return; + if (errno == EBADF) badfd_error(mrb); } static mrb_value -mrb_io_initialize(mrb_state *mrb, mrb_value io) +io_init(mrb_state *mrb, mrb_value io) { - struct mrb_io *fptr; mrb_int fd; - mrb_value mode, opt; - int flags; - - mode = opt = mrb_nil_value(); + mrb_value mode = mrb_nil_value(); + mrb_value opt = mrb_nil_value(); - mrb_get_args(mrb, "i|oo", &fd, &mode, &opt); + if (mrb_block_given_p(mrb)) { + mrb_warn(mrb, "File.new() does not take block; use File.open() instead"); + } + mrb_get_args(mrb, "i|oH", &fd, &mode, &opt); switch (fd) { case 0: /* STDIN_FILENO */ case 1: /* STDOUT_FILENO */ @@ -704,23 +571,14 @@ mrb_io_initialize(mrb_state *mrb, mrb_value io) check_file_descriptor(mrb, fd); break; } - if (mrb_nil_p(mode)) { - mode = mrb_str_new_cstr(mrb, "r"); - } - if (mrb_nil_p(opt)) { - opt = mrb_hash_new(mrb); - } - - flags = mrb_io_mode_to_flags(mrb, mode); + int flags = io_mode_to_flags(mrb, mode); - mrb_iv_set(mrb, io, mrb_intern_lit(mrb, "@buf"), mrb_str_new_cstr(mrb, "")); - - fptr = (struct mrb_io *)DATA_PTR(io); + struct mrb_io *fptr = (struct mrb_io*)DATA_PTR(io); if (fptr != NULL) { fptr_finalize(mrb, fptr, TRUE); mrb_free(mrb, fptr); } - fptr = mrb_io_alloc(mrb); + fptr = io_alloc(mrb); DATA_TYPE(io) = &mrb_io_type; DATA_PTR(io) = fptr; @@ -728,7 +586,7 @@ mrb_io_initialize(mrb_state *mrb, mrb_value io) fptr->fd = (int)fd; fptr->readable = OPEN_READABLE_P(flags); fptr->writable = OPEN_WRITABLE_P(flags); - fptr->sync = 0; + io_init_buf(mrb, fptr); return io; } @@ -736,21 +594,22 @@ static void fptr_finalize(mrb_state *mrb, struct mrb_io *fptr, int quiet) { int saved_errno = 0; + int limit = quiet ? 3 : 0; if (fptr == NULL) { return; } - if (fptr->fd > 2) { + if (fptr->fd >= limit) { #ifdef _WIN32 if (fptr->is_socket) { - if (closesocket(fptr->fd) != 0) { + if (fptr->close_fd && closesocket(fptr->fd) != 0) { saved_errno = WSAGetLastError(); } fptr->fd = -1; } #endif - if (fptr->fd != -1) { + if (fptr->fd != -1 && fptr->close_fd) { if (close(fptr->fd) == -1) { saved_errno = errno; } @@ -758,8 +617,8 @@ fptr_finalize(mrb_state *mrb, struct mrb_io *fptr, int quiet) fptr->fd = -1; } - if (fptr->fd2 > 2) { - if (close(fptr->fd2) == -1) { + if (fptr->fd2 >= limit) { + if (fptr->close_fd2 && close(fptr->fd2) == -1) { if (saved_errno == 0) { saved_errno = errno; } @@ -767,8 +626,9 @@ fptr_finalize(mrb_state *mrb, struct mrb_io *fptr, int quiet) fptr->fd2 = -1; } +#ifndef MRB_NO_IO_POPEN if (fptr->pid != 0) { -#if !defined(_WIN32) && !defined(_WIN64) +#if !defined(_WIN32) pid_t pid; int status; do { @@ -788,6 +648,12 @@ fptr_finalize(mrb_state *mrb, struct mrb_io *fptr, int quiet) fptr->pid = 0; /* Note: we don't raise an exception when waitpid(3) fails */ } +#endif + + if (fptr->buf) { + mrb_free(mrb, fptr->buf); + fptr->buf = NULL; + } if (!quiet && saved_errno != 0) { errno = saved_errno; @@ -796,58 +662,60 @@ fptr_finalize(mrb_state *mrb, struct mrb_io *fptr, int quiet) } static struct mrb_io* -io_get_read_fptr(mrb_state *mrb, mrb_value self) +io_get_read_fptr(mrb_state *mrb, mrb_value io) { - struct mrb_io *fptr = io_get_open_fptr(mrb, self); + struct mrb_io *fptr = io_get_open_fptr(mrb, io); if (!fptr->readable) { mrb_raise(mrb, E_IO_ERROR, "not opened for reading"); } return fptr; } -static mrb_value -mrb_io_check_readable(mrb_state *mrb, mrb_value self) -{ - io_get_read_fptr(mrb, self); - return mrb_nil_value(); -} - static struct mrb_io* -io_get_write_fptr(mrb_state *mrb, mrb_value self) +io_get_write_fptr(mrb_state *mrb, mrb_value io) { - struct mrb_io *fptr = io_get_open_fptr(mrb, self); + struct mrb_io *fptr = io_get_open_fptr(mrb, io); if (!fptr->writable) { mrb_raise(mrb, E_IO_ERROR, "not opened for writing"); } return fptr; } -static mrb_value -mrb_io_isatty(mrb_state *mrb, mrb_value self) +static int +io_get_write_fd(struct mrb_io *fptr) { - struct mrb_io *fptr; + if (fptr->fd2 == -1) { + return fptr->fd; + } + else { + return fptr->fd2; + } +} - fptr = io_get_open_fptr(mrb, self); +static mrb_value +io_isatty(mrb_state *mrb, mrb_value io) +{ + struct mrb_io *fptr = io_get_open_fptr(mrb, io); if (isatty(fptr->fd) == 0) return mrb_false_value(); return mrb_true_value(); } static mrb_value -mrb_io_s_for_fd(mrb_state *mrb, mrb_value klass) +io_s_for_fd(mrb_state *mrb, mrb_value klass) { struct RClass *c = mrb_class_ptr(klass); enum mrb_vtype ttype = MRB_INSTANCE_TT(c); - mrb_value obj; /* copied from mrb_instance_alloc() */ if (ttype == 0) ttype = MRB_TT_OBJECT; - obj = mrb_obj_value((struct RObject*)mrb_obj_alloc(mrb, ttype, c)); - return mrb_io_initialize(mrb, obj); + + mrb_value obj = mrb_obj_value((struct RObject*)mrb_obj_alloc(mrb, ttype, c)); + return io_init(mrb, obj); } static mrb_value -mrb_io_s_sysclose(mrb_state *mrb, mrb_value klass) +io_s_sysclose(mrb_state *mrb, mrb_value klass) { mrb_int fd; mrb->c->ci->mid = 0; @@ -859,10 +727,11 @@ mrb_io_s_sysclose(mrb_state *mrb, mrb_value klass) } static int -mrb_cloexec_open(mrb_state *mrb, const char *pathname, mrb_int flags, mrb_int mode) +io_cloexec_open(mrb_state *mrb, const char *pathname, int flags, fmode_t mode) { - int fd, retry = FALSE; - char* fname = mrb_locale_from_utf8(pathname, -1); + int retry = FALSE; + char *fname = mrb_locale_from_utf8(pathname, -1); + int fd; #ifdef O_CLOEXEC /* O_CLOEXEC is available since Linux 2.6.23. Linux 2.6.18 silently ignore it. */ @@ -871,77 +740,56 @@ mrb_cloexec_open(mrb_state *mrb, const char *pathname, mrb_int flags, mrb_int mo flags |= O_NOINHERIT; #endif reopen: - fd = open(fname, (int)flags, (fmode_t)mode); + fd = open(fname, flags, mode); if (fd == -1) { if (!retry) { switch (errno) { - case ENFILE: - case EMFILE: + case ENFILE: + case EMFILE: mrb_garbage_collect(mrb); retry = TRUE; goto reopen; } } - mrb_sys_fail(mrb, RSTRING_CSTR(mrb, mrb_format(mrb, "open %s", pathname))); } mrb_locale_free(fname); if (fd <= 2) { - mrb_fd_cloexec(mrb, fd); + io_fd_cloexec(mrb, fd); } return fd; } static mrb_value -mrb_io_s_sysopen(mrb_state *mrb, mrb_value klass) +io_s_sysopen(mrb_state *mrb, mrb_value klass) { mrb_value path = mrb_nil_value(); mrb_value mode = mrb_nil_value(); - mrb_int fd, perm = -1; - const char *pat; - int flags; + mrb_int perm = -1; mrb_get_args(mrb, "S|oi", &path, &mode, &perm); if (perm < 0) { perm = 0666; } - pat = RSTRING_CSTR(mrb, path); - flags = mrb_io_mode_to_flags(mrb, mode); - fd = mrb_cloexec_open(mrb, pat, flags, perm); + const char *pat = RSTRING_CSTR(mrb, path); + int flags = io_mode_to_flags(mrb, mode); + mrb_int fd = io_cloexec_open(mrb, pat, flags, (fmode_t)perm); return mrb_fixnum_value(fd); } -static mrb_value mrb_io_sysread_common(mrb_state *mrb, - mrb_io_read_write_size (*readfunc)(int, void *, fsize_t, off_t), - mrb_value io, mrb_value buf, mrb_int maxlen, off_t offset); - -static mrb_io_read_write_size -mrb_sysread_dummy(int fd, void *buf, fsize_t nbytes, off_t offset) -{ - return (mrb_io_read_write_size)read(fd, buf, nbytes); -} - -static mrb_value -mrb_io_sysread(mrb_state *mrb, mrb_value io) +static void +eof_error(mrb_state *mrb) { - mrb_value buf = mrb_nil_value(); - mrb_int maxlen; - - mrb_get_args(mrb, "i|S", &maxlen, &buf); - - return mrb_io_sysread_common(mrb, mrb_sysread_dummy, io, buf, maxlen, 0); + mrb_raise(mrb, E_EOF_ERROR, "end of file reached"); } static mrb_value -mrb_io_sysread_common(mrb_state *mrb, - mrb_io_read_write_size (*readfunc)(int, void *, fsize_t, off_t), +io_read_common(mrb_state *mrb, + fssize_t (*readfunc)(int, void*, fsize_t, off_t), mrb_value io, mrb_value buf, mrb_int maxlen, off_t offset) { - struct mrb_io *fptr; - int ret; - if (maxlen < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "negative expanding string size"); } @@ -960,8 +808,8 @@ mrb_io_sysread_common(mrb_state *mrb, mrb_str_modify(mrb, RSTRING(buf)); } - fptr = io_get_read_fptr(mrb, io); - ret = readfunc(fptr->fd, RSTRING_PTR(buf), (fsize_t)maxlen, offset); + struct mrb_io *fptr = io_get_read_fptr(mrb, io); + int ret = readfunc(fptr->fd, RSTRING_PTR(buf), (fsize_t)maxlen, offset); if (ret < 0) { mrb_sys_fail(mrb, "sysread failed"); } @@ -969,16 +817,32 @@ mrb_io_sysread_common(mrb_state *mrb, buf = mrb_str_resize(mrb, buf, ret); } if (ret == 0 && maxlen > 0) { - mrb_raise(mrb, E_EOF_ERROR, "sysread failed: End of File"); + fptr->eof = 1; + eof_error(mrb); } return buf; } +static fssize_t +sysread(int fd, void *buf, fsize_t nbytes, off_t offset) +{ + return (fssize_t)read(fd, buf, nbytes); +} + static mrb_value -mrb_io_sysseek(mrb_state *mrb, mrb_value io) +io_sysread(mrb_state *mrb, mrb_value io) +{ + mrb_value buf = mrb_nil_value(); + mrb_int maxlen; + + mrb_get_args(mrb, "i|S", &maxlen, &buf); + + return io_read_common(mrb, sysread, io, buf, maxlen, 0); +} + +static mrb_value +io_sysseek(mrb_state *mrb, mrb_value io) { - struct mrb_io *fptr; - off_t pos; mrb_int offset, whence = -1; mrb_get_args(mrb, "i|i", &offset, &whence); @@ -986,169 +850,483 @@ mrb_io_sysseek(mrb_state *mrb, mrb_value io) whence = 0; } - fptr = io_get_open_fptr(mrb, io); - pos = lseek(fptr->fd, (off_t)offset, (int)whence); + struct mrb_io *fptr = io_get_open_fptr(mrb, io); + off_t pos = lseek(fptr->fd, (off_t)offset, (int)whence); if (pos == -1) { mrb_sys_fail(mrb, "sysseek"); } - if (pos > MRB_INT_MAX) { -#ifndef MRB_NO_FLOAT - return mrb_float_value(mrb, (mrb_float)pos); -#else - mrb_raise(mrb, E_IO_ERROR, "sysseek reached too far for MRB_NO_FLOAT"); -#endif - } else { - return mrb_int_value(mrb, pos); + fptr->eof = 0; + if (sizeof(off_t) > sizeof(mrb_int) && pos > (off_t)MRB_INT_MAX) { + mrb_raise(mrb, E_IO_ERROR, "sysseek reached too far for mrb_int"); } + return mrb_int_value(mrb, (mrb_int)pos); } static mrb_value -mrb_io_syswrite_common(mrb_state *mrb, - mrb_io_read_write_size (*writefunc)(int, const void *, fsize_t, off_t), - mrb_value io, mrb_value buf, off_t offset) +io_seek(mrb_state *mrb, mrb_value io) { - struct mrb_io *fptr; - int fd, length; - - fptr = io_get_write_fptr(mrb, io); - if (fptr->fd2 == -1) { - fd = fptr->fd; - } else { - fd = fptr->fd2; + mrb_value pos = io_sysseek(mrb, io); + struct mrb_io *fptr = io_get_open_fptr(mrb, io); + if (fptr->buf) { + fptr->buf->start = 0; + fptr->buf->len = 0; } - length = writefunc(fd, RSTRING_PTR(buf), (fsize_t)RSTRING_LEN(buf), offset); + return pos; +} + +static mrb_value +io_write_common(mrb_state *mrb, + fssize_t (*writefunc)(int, const void*, fsize_t, off_t), + struct mrb_io *fptr, const void *buf, mrb_ssize blen, off_t offset) +{ + int fd = io_get_write_fd(fptr); + fssize_t length = writefunc(fd, buf, (fsize_t)blen, offset); if (length == -1) { - mrb_sys_fail(mrb, 0); + mrb_sys_fail(mrb, "syswrite"); } - return mrb_int_value(mrb, (mrb_int)length); } -static mrb_io_read_write_size -mrb_syswrite_dummy(int fd, const void *buf, fsize_t nbytes, off_t offset) +static fssize_t +syswrite(int fd, const void *buf, fsize_t nbytes, off_t offset) { - return (mrb_io_read_write_size)write(fd, buf, nbytes); + return (fssize_t)write(fd, buf, nbytes); } static mrb_value -mrb_io_syswrite(mrb_state *mrb, mrb_value io) +io_syswrite(mrb_state *mrb, mrb_value io) { mrb_value buf; mrb_get_args(mrb, "S", &buf); - return mrb_io_syswrite_common(mrb, mrb_syswrite_dummy, io, buf, 0); + return io_write_common(mrb, syswrite, io_get_write_fptr(mrb, io), RSTRING_PTR(buf), RSTRING_LEN(buf), 0); } -static mrb_value -mrb_io_close(mrb_state *mrb, mrb_value self) + /* def write(string) */ + /* str = string.is_a?(String) ? string : string.to_s */ + /* return 0 if str.empty? */ + /* len = syswrite(str) */ + /* len */ + /* end */ + +static mrb_int +fd_write_buf(mrb_state *mrb, int fd, const char *ptr, mrb_int len) { - struct mrb_io *fptr; - fptr = io_get_open_fptr(mrb, self); - fptr_finalize(mrb, fptr, FALSE); - return mrb_nil_value(); + if (len == 0) return 0; + fssize_t sum = 0; + while (sum < (fssize_t)len) { + fssize_t n = write(fd, ptr + sum, (size_t)(len - sum)); + if (n == -1) { + if (errno == EINTR) continue; + mrb_sys_fail(mrb, "syswrite"); + } + sum += n; + } + return len; } -static mrb_value -mrb_io_close_write(mrb_state *mrb, mrb_value self) +static mrb_int +fd_write(mrb_state *mrb, int fd, mrb_value str) { - struct mrb_io *fptr; - fptr = io_get_open_fptr(mrb, self); - if (close((int)fptr->fd2) == -1) { - mrb_sys_fail(mrb, "close"); - } - return mrb_nil_value(); + str = mrb_obj_as_string(mrb, str); + return fd_write_buf(mrb, fd, RSTRING_PTR(str), RSTRING_LEN(str)); } -static mrb_value -mrb_io_closed(mrb_state *mrb, mrb_value io) +#define FD_WRITE_LIT(mrb, fd, s) fd_write_buf(mrb, fd, "" s "", sizeof(s) - 1) + +/* Helper function to prepare IO object for writing by adjusting buffer state */ +static void +io_prepare_write(mrb_state *mrb, struct mrb_io *fptr) { - struct mrb_io *fptr; - fptr = (struct mrb_io *)mrb_data_get_ptr(mrb, io, &mrb_io_type); - if (fptr == NULL || fptr->fd >= 0) { - return mrb_false_value(); + if (fptr->buf && fptr->buf->len > 0) { + int fd = io_get_write_fd(fptr); + off_t n; + + /* get current position */ + n = lseek(fd, 0, SEEK_CUR); + if (n == -1) mrb_sys_fail(mrb, "lseek"); + /* move cursor */ + n = lseek(fd, n - fptr->buf->len, SEEK_SET); + if (n == -1) mrb_sys_fail(mrb, "lseek(2)"); + fptr->buf->start = fptr->buf->len = 0; } - - return mrb_true_value(); } static mrb_value -mrb_io_pid(mrb_state *mrb, mrb_value io) +io_write(mrb_state *mrb, mrb_value io) { - struct mrb_io *fptr; - fptr = io_get_open_fptr(mrb, io); + struct mrb_io *fptr = io_get_write_fptr(mrb, io); + int fd = io_get_write_fd(fptr); - if (fptr->pid > 0) { - return mrb_fixnum_value(fptr->pid); + io_prepare_write(mrb, fptr); + + mrb_int len = 0; + if (mrb_get_argc(mrb) == 1) { + len = fd_write(mrb, fd, mrb_get_arg1(mrb)); } + else { + mrb_value *argv; + mrb_int argc; - return mrb_nil_value(); + mrb_get_args(mrb, "*", &argv, &argc); + while (argc--) { + len += fd_write(mrb, fd, *argv++); + } + } + return mrb_int_value(mrb, len); } -static struct timeval -time2timeval(mrb_state *mrb, mrb_value time) +/* Helper function to write a string followed by newline if needed */ +static void +io_puts_str(mrb_state *mrb, int fd, mrb_value str) { - struct timeval t = { 0, 0 }; + str = mrb_obj_as_string(mrb, str); + const char *ptr = RSTRING_PTR(str); + mrb_int len = RSTRING_LEN(str); - switch (mrb_type(time)) { - case MRB_TT_INTEGER: - t.tv_sec = (ftime_t)mrb_integer(time); - t.tv_usec = 0; - break; + /* Write the original string */ + fd_write(mrb, fd, str); -#ifndef MRB_NO_FLOAT - case MRB_TT_FLOAT: - t.tv_sec = (ftime_t)mrb_float(time); - t.tv_usec = (fsuseconds_t)((mrb_float(time) - t.tv_sec) * 1000000.0); - break; -#endif + /* Add newline if string doesn't end with one */ + if (len == 0 || ptr[len-1] != '\n') { + FD_WRITE_LIT(mrb, fd, "\n"); + } +} - default: - mrb_raise(mrb, E_TYPE_ERROR, "wrong argument class"); +/* Maximum nesting depth for puts with arrays; guards against cyclic and + pathologically deep arrays causing C stack overflow. */ +#define IO_PUTS_MAX_DEPTH 16 + +/* Recursive helper for puts with arrays */ +static void +io_puts_ary(mrb_state *mrb, int fd, mrb_value ary, int depth) +{ + if (depth >= IO_PUTS_MAX_DEPTH) { + FD_WRITE_LIT(mrb, fd, "[...]\n"); + return; } - return t; + mrb_int len = RARRAY_LEN(ary); + + if (len == 0) { + /* Empty array - write a single newline */ + FD_WRITE_LIT(mrb, fd, "\n"); + return; + } + + for (mrb_int i = 0; i < len; i++) { + mrb_value elem = RARRAY_PTR(ary)[i]; + if (mrb_array_p(elem)) { + io_puts_ary(mrb, fd, elem, depth + 1); + } + else { + io_puts_str(mrb, fd, elem); + } + } } -static int -mrb_io_read_data_pending(mrb_state *mrb, mrb_value io) +static mrb_value +io_puts(mrb_state *mrb, mrb_value io) { - mrb_value buf = mrb_iv_get(mrb, io, mrb_intern_lit(mrb, "@buf")); - if (mrb_string_p(buf) && RSTRING_LEN(buf) > 0) { - return 1; + struct mrb_io *fptr = io_get_write_fptr(mrb, io); + int fd = io_get_write_fd(fptr); + + /* Prepare IO for writing (handle read buffer adjustment) */ + io_prepare_write(mrb, fptr); + + mrb_value *argv; + mrb_int argc; + mrb_get_args(mrb, "*", &argv, &argc); + + if (argc == 0) { + /* No arguments - just write a newline */ + FD_WRITE_LIT(mrb, fd, "\n"); + return mrb_nil_value(); } - return 0; + + /* Process each argument */ + for (mrb_int i = 0; i < argc; i++) { + mrb_value arg = argv[i]; + if (mrb_array_p(arg)) { + io_puts_ary(mrb, fd, arg, 0); + } + else { + io_puts_str(mrb, fd, arg); + } + } + + return mrb_nil_value(); +} + +/* + * call-seq: + * ios.print() -> nil + * ios.print(obj, ...) -> nil + * + * Writes the given object(s) to ios. Objects that aren't strings will be + * converted by calling their to_s method. + */ +static mrb_value +io_print(mrb_state *mrb, mrb_value io) +{ + struct mrb_io *fptr = io_get_write_fptr(mrb, io); + int fd = io_get_write_fd(fptr); + + /* Prepare IO for writing (handle read buffer adjustment) */ + io_prepare_write(mrb, fptr); + + mrb_value *argv; + mrb_int argc; + mrb_get_args(mrb, "*", &argv, &argc); + + /* Convert each argument to string and write it */ + for (mrb_int i = 0; i < argc; i++) { + mrb_value str = mrb_obj_as_string(mrb, argv[i]); + fd_write(mrb, fd, str); + } + + return mrb_nil_value(); +} + +/* + * call-seq: + * ios.putc(obj) -> obj + * + * If obj is Integer, write the byte (mod 256). + * If obj is String, write the first character. + * Returns obj. + */ +static mrb_value +io_putc(mrb_state *mrb, mrb_value io) +{ + struct mrb_io *fptr = io_get_write_fptr(mrb, io); + int fd = io_get_write_fd(fptr); + mrb_value c = mrb_get_arg1(mrb); + const char *ptr; + mrb_int write_len; + + io_prepare_write(mrb, fptr); + + if (mrb_integer_p(c)) { + unsigned char byte = (unsigned char)(mrb_integer(c) & 0xff); + ssize_t n; + do { + n = write(fd, &byte, 1); + } while (n == -1 && errno == EINTR); + if (n == -1) mrb_sys_fail(mrb, "write"); + return c; + } + + mrb_value str; + if (mrb_string_p(c)) { + str = c; + } + else { + str = mrb_obj_as_string(mrb, c); + } + + ptr = RSTRING_PTR(str); + mrb_int len = RSTRING_LEN(str); + + if (len == 0) return c; + +#ifdef MRB_UTF8_STRING + write_len = mrb_utf8len(ptr, ptr + len); +#else + write_len = 1; /* Non-UTF8: write single byte */ +#endif + + /* Write the character bytes */ + while (write_len > 0) { + ssize_t n = write(fd, ptr, write_len); + if (n == -1) { + if (errno == EINTR) continue; + mrb_sys_fail(mrb, "write"); + } + ptr += n; + write_len -= n; + } + + return c; +} + +/* + * call-seq: + * ios << obj -> ios + * + * String Output - Writes obj to ios. obj will be converted to a string using + * to_s. + */ +static mrb_value +io_lshift(mrb_state *mrb, mrb_value io) +{ + struct mrb_io *fptr = io_get_write_fptr(mrb, io); + int fd = io_get_write_fd(fptr); + + /* Prepare IO for writing (handle read buffer adjustment) */ + io_prepare_write(mrb, fptr); + + mrb_value str = mrb_get_arg1(mrb); + str = mrb_obj_as_string(mrb, str); + fd_write(mrb, fd, str); + + return io; } +static mrb_value +io_close(mrb_state *mrb, mrb_value io) +{ + struct mrb_io *fptr; + fptr = io_get_open_fptr(mrb, io); + fptr_finalize(mrb, fptr, FALSE); + return mrb_nil_value(); +} + +/* + * call-seq: + * ios.close_write -> nil + * + * Closes the write end of a duplex I/O stream (i.e., a pipe). + * It will raise an `IOError` if the stream is not duplex. + * + * r, w = IO.pipe + * w.close_write + * r.read #=> "" + */ +static mrb_value +io_close_write(mrb_state *mrb, mrb_value io) +{ + struct mrb_io *fptr = io_get_open_fptr(mrb, io); + if (close((int)fptr->fd2) == -1) { + mrb_sys_fail(mrb, "close"); + } + return mrb_nil_value(); +} + +/* + * call-seq: + * ios.closed? -> true or false + * + * Returns `true` if the stream is closed, `false` otherwise. + * + * f = File.new("testfile") + * f.close #=> nil + * f.closed? #=> true + */ +static mrb_value +io_closed(mrb_state *mrb, mrb_value io) +{ + struct mrb_io *fptr = (struct mrb_io*)mrb_data_get_ptr(mrb, io, &mrb_io_type); + if (fptr == NULL || fptr->fd >= 0) { + return mrb_false_value(); + } + + return mrb_true_value(); +} + +static mrb_value +io_pos(mrb_state *mrb, mrb_value io) +{ + struct mrb_io *fptr = io_get_open_fptr(mrb, io); + off_t pos = lseek(fptr->fd, 0, SEEK_CUR); + if (pos == -1) mrb_sys_fail(mrb, 0); + + if (fptr->buf) { + return mrb_int_value(mrb, pos - fptr->buf->len); + } + else { + return mrb_int_value(mrb, pos); + } +} + +/* + * call-seq: + * ios.pid -> integer or nil + * + * Returns the process ID of a child process on a pipe, or `nil` if the + * stream is not a pipe. + * + * r, w = IO.pipe + * fork do + * r.close + * w.write "hello" + * w.close + * end + * w.close + * p r.pid #=> 2056 + * r.read #=> "hello" + * r.close + */ +static mrb_value +io_pid(mrb_state *mrb, mrb_value io) +{ + struct mrb_io *fptr = io_get_open_fptr(mrb, io); + + if (fptr->pid > 0) { + return mrb_fixnum_value(fptr->pid); + } + + return mrb_nil_value(); +} + +static mrb_io_timeval +time2timeval(mrb_state *mrb, mrb_value time) +{ + mrb_io_timeval t = { 0, 0 }; + + switch (mrb_type(time)) { + case MRB_TT_INTEGER: + t.tv_sec = mrb_integer(time); + t.tv_usec = 0; + break; + +#ifndef MRB_NO_FLOAT + case MRB_TT_FLOAT: + t.tv_sec = (mrb_int)mrb_float(time); + t.tv_usec = (mrb_int)((mrb_float(time) - t.tv_sec) * 1000000.0); + break; +#endif + + default: + mrb_raise(mrb, E_TYPE_ERROR, "wrong argument class"); + } + + return t; +} + +/* + * call-seq: + * IO.new(fd, mode="r") -> io + * + * Returns a new `IO` object for the given integer file descriptor `fd` and + * `mode` string. + * + * f = IO.new(1, "w") # STDOUT + * f.puts "hello" + */ + #if !defined(_WIN32) && !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) static mrb_value -mrb_io_s_pipe(mrb_state *mrb, mrb_value klass) +io_s_pipe(mrb_state *mrb, mrb_value klass) { - mrb_value r = mrb_nil_value(); - mrb_value w = mrb_nil_value(); - struct mrb_io *fptr_r; - struct mrb_io *fptr_w; int pipes[2]; - if (mrb_pipe(mrb, pipes) == -1) { + if (mrb_hal_io_pipe(mrb, pipes) == -1) { mrb_sys_fail(mrb, "pipe"); } - r = mrb_obj_value(mrb_data_object_alloc(mrb, mrb_class_ptr(klass), NULL, &mrb_io_type)); - mrb_iv_set(mrb, r, mrb_intern_lit(mrb, "@buf"), mrb_str_new_cstr(mrb, "")); - fptr_r = mrb_io_alloc(mrb); + mrb_value r = mrb_obj_value(mrb_data_object_alloc(mrb, mrb_class_ptr(klass), NULL, &mrb_io_type)); + struct mrb_io *fptr_r = io_alloc(mrb); fptr_r->fd = pipes[0]; fptr_r->readable = 1; - fptr_r->writable = 0; - fptr_r->sync = 0; DATA_TYPE(r) = &mrb_io_type; DATA_PTR(r) = fptr_r; + io_init_buf(mrb, fptr_r); - w = mrb_obj_value(mrb_data_object_alloc(mrb, mrb_class_ptr(klass), NULL, &mrb_io_type)); - mrb_iv_set(mrb, w, mrb_intern_lit(mrb, "@buf"), mrb_str_new_cstr(mrb, "")); - fptr_w = mrb_io_alloc(mrb); + mrb_value w = mrb_obj_value(mrb_data_object_alloc(mrb, mrb_class_ptr(klass), NULL, &mrb_io_type)); + struct mrb_io *fptr_w = io_alloc(mrb); fptr_w->fd = pipes[1]; - fptr_w->readable = 0; fptr_w->writable = 1; fptr_w->sync = 1; DATA_TYPE(w) = &mrb_io_type; @@ -1158,21 +1336,42 @@ mrb_io_s_pipe(mrb_state *mrb, mrb_value klass) } #endif +static int +mrb_io_read_data_pending(mrb_state *mrb, struct mrb_io *fptr) +{ + if (fptr->buf && fptr->buf->len > 0) return 1; + return 0; +} + +/* + * call-seq: + * IO.select(read_array, write_array=nil, error_array=nil, timeout=nil) -> array or nil + * + * Performs a `select(2)` system call on the given arrays of `IO` objects. + * + * For each array, it can contain `IO` objects or `nil`. + * + * The `timeout` argument is a number of seconds. + * + * It returns a three-element array containing the `IO` objects that are + * ready for reading, writing, or have an error, respectively. + * + * If the `timeout` is reached, it returns `nil`. + * + * r, w = IO.pipe + * IO.select([r], [w]) #=> [[#], [#], []] + */ static mrb_value -mrb_io_s_select(mrb_state *mrb, mrb_value klass) +io_s_select(mrb_state *mrb, mrb_value klass) { const mrb_value *argv; mrb_int argc; - mrb_value read, read_io, write, except, timeout, list; - struct timeval *tp, timerec; - fd_set pset, rset, wset, eset; - fd_set *rp, *wp, *ep; + mrb_value read_io, list; struct mrb_io *fptr; int pending = 0; mrb_value result; int max = 0; int interrupt_flag = 0; - int i, n; mrb_get_args(mrb, "*", &argv, &argc); @@ -1180,37 +1379,44 @@ mrb_io_s_select(mrb_state *mrb, mrb_value klass) mrb_argnum_error(mrb, argc, 1, 4); } - timeout = mrb_nil_value(); - except = mrb_nil_value(); - write = mrb_nil_value(); + mrb_value timeout = mrb_nil_value(); + mrb_value except = mrb_nil_value(); + mrb_value write = mrb_nil_value(); if (argc > 3) timeout = argv[3]; if (argc > 2) except = argv[2]; if (argc > 1) write = argv[1]; - read = argv[0]; + mrb_value read = argv[0]; + mrb_io_timeval *tp, timerec; if (mrb_nil_p(timeout)) { tp = NULL; - } else { + } + else { timerec = time2timeval(mrb, timeout); tp = &timerec; } - FD_ZERO(&pset); + mrb_io_fdset *pset = mrb_hal_io_fdset_alloc(mrb); + mrb_io_fdset *rset = NULL; + mrb_io_fdset *rp = NULL; + mrb_hal_io_fdset_zero(mrb, pset); if (!mrb_nil_p(read)) { mrb_check_type(mrb, read, MRB_TT_ARRAY); - rp = &rset; - FD_ZERO(rp); - for (i = 0; i < RARRAY_LEN(read); i++) { - read_io = RARRAY_PTR(read)[i]; + rset = mrb_hal_io_fdset_alloc(mrb); + rp = rset; + mrb_hal_io_fdset_zero(mrb, rp); + /* Hoist pointer retrieval outside loop */ + mrb_value *read_ptr = RARRAY_PTR(read); + for (int i = 0; i < RARRAY_LEN(read); i++) { + read_io = read_ptr[i]; fptr = io_get_open_fptr(mrb, read_io); - if (fptr->fd >= FD_SETSIZE) continue; - FD_SET(fptr->fd, rp); - if (mrb_io_read_data_pending(mrb, read_io)) { + mrb_hal_io_fdset_set(mrb, fptr->fd, rp); + if (mrb_io_read_data_pending(mrb, fptr)) { pending++; - FD_SET(fptr->fd, &pset); + mrb_hal_io_fdset_set(mrb, fptr->fd, pset); } if (max < fptr->fd) max = fptr->fd; @@ -1219,134 +1425,179 @@ mrb_io_s_select(mrb_state *mrb, mrb_value klass) timerec.tv_sec = timerec.tv_usec = 0; tp = &timerec; } - } else { - rp = NULL; } + mrb_io_fdset *wset = NULL; + mrb_io_fdset *wp = NULL; if (!mrb_nil_p(write)) { mrb_check_type(mrb, write, MRB_TT_ARRAY); - wp = &wset; - FD_ZERO(wp); - for (i = 0; i < RARRAY_LEN(write); i++) { - fptr = io_get_open_fptr(mrb, RARRAY_PTR(write)[i]); - if (fptr->fd >= FD_SETSIZE) continue; - FD_SET(fptr->fd, wp); + wset = mrb_hal_io_fdset_alloc(mrb); + wp = wset; + mrb_hal_io_fdset_zero(mrb, wp); + /* Hoist pointer retrieval outside loop */ + mrb_value *write_ptr = RARRAY_PTR(write); + for (int i = 0; i < RARRAY_LEN(write); i++) { + fptr = io_get_open_fptr(mrb, write_ptr[i]); + mrb_hal_io_fdset_set(mrb, fptr->fd, wp); if (max < fptr->fd) max = fptr->fd; if (fptr->fd2 >= 0) { - FD_SET(fptr->fd2, wp); + mrb_hal_io_fdset_set(mrb, fptr->fd2, wp); if (max < fptr->fd2) max = fptr->fd2; } } - } else { - wp = NULL; } + mrb_io_fdset *eset = NULL; + mrb_io_fdset *ep = NULL; if (!mrb_nil_p(except)) { mrb_check_type(mrb, except, MRB_TT_ARRAY); - ep = &eset; - FD_ZERO(ep); - for (i = 0; i < RARRAY_LEN(except); i++) { - fptr = io_get_open_fptr(mrb, RARRAY_PTR(except)[i]); - if (fptr->fd >= FD_SETSIZE) continue; - FD_SET(fptr->fd, ep); + eset = mrb_hal_io_fdset_alloc(mrb); + ep = eset; + mrb_hal_io_fdset_zero(mrb, ep); + /* Hoist pointer retrieval outside loop */ + mrb_value *except_ptr = RARRAY_PTR(except); + for (int i = 0; i < RARRAY_LEN(except); i++) { + fptr = io_get_open_fptr(mrb, except_ptr[i]); + mrb_hal_io_fdset_set(mrb, fptr->fd, ep); if (max < fptr->fd) max = fptr->fd; if (fptr->fd2 >= 0) { - FD_SET(fptr->fd2, ep); + mrb_hal_io_fdset_set(mrb, fptr->fd2, ep); if (max < fptr->fd2) max = fptr->fd2; } } - } else { - ep = NULL; } max++; + int n; retry: - n = select(max, rp, wp, ep, tp); + n = mrb_hal_io_select(mrb, max, rp, wp, ep, tp); if (n < 0) { - if (errno != EINTR) + if (errno != EINTR) { + mrb_hal_io_fdset_free(mrb, pset); + mrb_hal_io_fdset_free(mrb, rset); + mrb_hal_io_fdset_free(mrb, wset); + mrb_hal_io_fdset_free(mrb, eset); mrb_sys_fail(mrb, "select failed"); + } if (tp == NULL) goto retry; interrupt_flag = 1; } - if (!pending && n == 0) + if (!pending && n == 0) { + mrb_hal_io_fdset_free(mrb, pset); + mrb_hal_io_fdset_free(mrb, rset); + mrb_hal_io_fdset_free(mrb, wset); + mrb_hal_io_fdset_free(mrb, eset); return mrb_nil_value(); + } result = mrb_ary_new_capa(mrb, 3); - mrb_ary_push(mrb, result, rp? mrb_ary_new(mrb) : mrb_ary_new_capa(mrb, 0)); - mrb_ary_push(mrb, result, wp? mrb_ary_new(mrb) : mrb_ary_new_capa(mrb, 0)); - mrb_ary_push(mrb, result, ep? mrb_ary_new(mrb) : mrb_ary_new_capa(mrb, 0)); + mrb_ary_push(mrb, result, rp ? mrb_ary_new(mrb) : mrb_ary_new_capa(mrb, 0)); + mrb_ary_push(mrb, result, wp ? mrb_ary_new(mrb) : mrb_ary_new_capa(mrb, 0)); + mrb_ary_push(mrb, result, ep ? mrb_ary_new(mrb) : mrb_ary_new_capa(mrb, 0)); if (interrupt_flag == 0) { if (rp) { list = RARRAY_PTR(result)[0]; - for (i = 0; i < RARRAY_LEN(read); i++) { - fptr = io_get_open_fptr(mrb, RARRAY_PTR(read)[i]); - if (FD_ISSET(fptr->fd, rp) || - FD_ISSET(fptr->fd, &pset)) { - mrb_ary_push(mrb, list, RARRAY_PTR(read)[i]); + /* Hoist pointer retrieval outside loop */ + mrb_value *read_ptr = RARRAY_PTR(read); + for (int i = 0; i < RARRAY_LEN(read); i++) { + mrb_value io = read_ptr[i]; + fptr = io_get_open_fptr(mrb, io); + if (mrb_hal_io_fdset_isset(mrb, fptr->fd, rp) || + mrb_hal_io_fdset_isset(mrb, fptr->fd, pset)) { + mrb_ary_push(mrb, list, io); } } } if (wp) { list = RARRAY_PTR(result)[1]; - for (i = 0; i < RARRAY_LEN(write); i++) { - fptr = io_get_open_fptr(mrb, RARRAY_PTR(write)[i]); - if (FD_ISSET(fptr->fd, wp)) { - mrb_ary_push(mrb, list, RARRAY_PTR(write)[i]); - } else if (fptr->fd2 >= 0 && FD_ISSET(fptr->fd2, wp)) { - mrb_ary_push(mrb, list, RARRAY_PTR(write)[i]); + /* Hoist pointer retrieval outside loop */ + mrb_value *write_ptr = RARRAY_PTR(write); + for (int i = 0; i < RARRAY_LEN(write); i++) { + mrb_value io = write_ptr[i]; + fptr = io_get_open_fptr(mrb, io); + if (mrb_hal_io_fdset_isset(mrb, fptr->fd, wp)) { + mrb_ary_push(mrb, list, io); + } + else if (fptr->fd2 >= 0 && mrb_hal_io_fdset_isset(mrb, fptr->fd2, wp)) { + mrb_ary_push(mrb, list, io); } } } if (ep) { list = RARRAY_PTR(result)[2]; - for (i = 0; i < RARRAY_LEN(except); i++) { - fptr = io_get_open_fptr(mrb, RARRAY_PTR(except)[i]); - if (FD_ISSET(fptr->fd, ep)) { - mrb_ary_push(mrb, list, RARRAY_PTR(except)[i]); - } else if (fptr->fd2 >= 0 && FD_ISSET(fptr->fd2, ep)) { - mrb_ary_push(mrb, list, RARRAY_PTR(except)[i]); + /* Hoist pointer retrieval outside loop */ + mrb_value *except_ptr = RARRAY_PTR(except); + for (int i = 0; i < RARRAY_LEN(except); i++) { + mrb_value io = except_ptr[i]; + fptr = io_get_open_fptr(mrb, io); + if (mrb_hal_io_fdset_isset(mrb, fptr->fd, ep)) { + mrb_ary_push(mrb, list, io); + } + else if (fptr->fd2 >= 0 && mrb_hal_io_fdset_isset(mrb, fptr->fd2, ep)) { + mrb_ary_push(mrb, list, io); } } } } + mrb_hal_io_fdset_free(mrb, pset); + mrb_hal_io_fdset_free(mrb, rset); + mrb_hal_io_fdset_free(mrb, wset); + mrb_hal_io_fdset_free(mrb, eset); + return result; } int mrb_io_fileno(mrb_state *mrb, mrb_value io) { - struct mrb_io *fptr; - fptr = io_get_open_fptr(mrb, io); + struct mrb_io *fptr = io_get_open_fptr(mrb, io); return fptr->fd; } +/* + * call-seq: + * ios.fileno -> integer + * + * Returns the integer file descriptor number for the `IO` object. + * + * $stdin.fileno #=> 0 + * $stdout.fileno #=> 1 + */ static mrb_value -mrb_io_fileno_m(mrb_state *mrb, mrb_value io) +io_fileno(mrb_state *mrb, mrb_value io) { int fd = mrb_io_fileno(mrb, io); return mrb_fixnum_value(fd); } #if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC) +/* + * call-seq: + * ios.close_on_exec? -> true or false + * + * Returns `true` if the `FD_CLOEXEC` flag is set for the `IO` object, `false` + * otherwise. + * + * f = IO.new(1, "w") + * f.close_on_exec? #=> true + */ static mrb_value -mrb_io_close_on_exec_p(mrb_state *mrb, mrb_value self) +io_close_on_exec_p(mrb_state *mrb, mrb_value io) { - struct mrb_io *fptr; + struct mrb_io *fptr = io_get_open_fptr(mrb, io); int ret; - fptr = io_get_open_fptr(mrb, self); - if (fptr->fd2 >= 0) { if ((ret = fcntl(fptr->fd2, F_GETFD)) == -1) mrb_sys_fail(mrb, "F_GETFD failed"); if (!(ret & FD_CLOEXEC)) return mrb_false_value(); @@ -1357,20 +1608,31 @@ mrb_io_close_on_exec_p(mrb_state *mrb, mrb_value self) return mrb_true_value(); } #else -# define mrb_io_close_on_exec_p mrb_notimplement_m +# define io_close_on_exec_p mrb_notimplement_m #endif #if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC) +/* + * call-seq: + * ios.close_on_exec = bool -> bool + * + * Sets the `FD_CLOEXEC` flag on the `IO` object. + * + * f = IO.new(1, "w") + * f.close_on_exec = false + * f.close_on_exec? #=> false + */ static mrb_value -mrb_io_set_close_on_exec(mrb_state *mrb, mrb_value self) +io_set_close_on_exec(mrb_state *mrb, mrb_value io) { - struct mrb_io *fptr; - int flag, ret; + + struct mrb_io *fptr = io_get_open_fptr(mrb, io); mrb_bool b; - fptr = io_get_open_fptr(mrb, self); mrb_get_args(mrb, "b", &b); - flag = b ? FD_CLOEXEC : 0; + + int flag = b ? FD_CLOEXEC : 0; + int ret; if (fptr->fd2 >= 0) { if ((ret = fcntl(fptr->fd2, F_GETFD)) == -1) mrb_sys_fail(mrb, "F_GETFD failed"); @@ -1392,32 +1654,51 @@ mrb_io_set_close_on_exec(mrb_state *mrb, mrb_value self) return mrb_bool_value(b); } #else -# define mrb_io_set_close_on_exec mrb_notimplement_m +# define io_set_close_on_exec mrb_notimplement_m #endif +/* + * call-seq: + * ios.sync = bool -> bool + * + * Sets the sync mode for the `IO` object. + * + * If `true`, all output is immediately flushed to the underlying operating + * system and is not buffered internally. + * + * f = File.new("testfile", "w") + * f.sync = true + */ static mrb_value -mrb_io_set_sync(mrb_state *mrb, mrb_value self) +io_set_sync(mrb_state *mrb, mrb_value io) { - struct mrb_io *fptr; + struct mrb_io *fptr = io_get_open_fptr(mrb, io); mrb_bool b; - fptr = io_get_open_fptr(mrb, self); mrb_get_args(mrb, "b", &b); fptr->sync = b; return mrb_bool_value(b); } +/* + * call-seq: + * ios.sync -> true or false + * + * Returns the sync mode for the `IO` object. + * + * f = File.new("testfile", "w") + * f.sync #=> false + */ static mrb_value -mrb_io_sync(mrb_state *mrb, mrb_value self) +io_sync(mrb_state *mrb, mrb_value io) { - struct mrb_io *fptr; - fptr = io_get_open_fptr(mrb, self); + struct mrb_io *fptr = io_get_open_fptr(mrb, io); return mrb_bool_value(fptr->sync); } -#ifndef MRB_WITH_IO_PREAD_PWRITE -# define mrb_io_pread mrb_notimplement_m -# define mrb_io_pwrite mrb_notimplement_m +#ifndef MRB_USE_IO_PREAD_PWRITE +# define io_pread mrb_notimplement_m +# define io_pwrite mrb_notimplement_m #else static off_t value2off(mrb_state *mrb, mrb_value offv) @@ -1430,7 +1711,7 @@ value2off(mrb_state *mrb, mrb_value offv) * pread(maxlen, offset, outbuf = "") -> outbuf */ static mrb_value -mrb_io_pread(mrb_state *mrb, mrb_value io) +io_pread(mrb_state *mrb, mrb_value io) { mrb_value buf = mrb_nil_value(); mrb_value off; @@ -1438,7 +1719,7 @@ mrb_io_pread(mrb_state *mrb, mrb_value io) mrb_get_args(mrb, "io|S!", &maxlen, &off, &buf); - return mrb_io_sysread_common(mrb, pread, io, buf, maxlen, value2off(mrb, off)); + return io_read_common(mrb, pread, io, buf, maxlen, value2off(mrb, off)); } /* @@ -1446,122 +1727,589 @@ mrb_io_pread(mrb_state *mrb, mrb_value io) * pwrite(buffer, offset) -> wrote_bytes */ static mrb_value -mrb_io_pwrite(mrb_state *mrb, mrb_value io) +io_pwrite(mrb_state *mrb, mrb_value io) { mrb_value buf, off; mrb_get_args(mrb, "So", &buf, &off); - return mrb_io_syswrite_common(mrb, pwrite, io, buf, value2off(mrb, off)); + return io_write_common(mrb, pwrite, io_get_write_fptr(mrb, io), RSTRING_PTR(buf), RSTRING_LEN(buf), value2off(mrb, off)); } -#endif /* MRB_WITH_IO_PREAD_PWRITE */ +#endif /* MRB_USE_IO_PREAD_PWRITE */ -static mrb_value -io_bufread(mrb_state *mrb, mrb_value str, mrb_int len) +/* + * call-seq: + * ios.ungetc(string) -> nil + * + * Pushes back characters (passed as a parameter) onto ios, such that a + * subsequent buffered character read will return it. Has no effect with + * unbuffered reads (such as IO#sysread). + * + * f = File.new("testfile") #=> # + * c = f.getc #=> "H" + * f.ungetc(c) #=> nil + * f.getc #=> "H" + */ +/* Helper function for ungetc operations with raw data */ +static void +io_unget_data(mrb_state *mrb, struct mrb_io *fptr, const char *ptr, mrb_int len) { - mrb_value str2; - mrb_int newlen; - struct RString *s; - char *p; - - s = RSTRING(str); - mrb_str_modify(mrb, s); - p = RSTR_PTR(s); - str2 = mrb_str_new(mrb, p, len); - newlen = RSTR_LEN(s)-len; - memmove(p, p+len, newlen); - p[newlen] = '\0'; - RSTR_SET_LEN(s, newlen); + struct mrb_io_buf *buf = fptr->buf; - return str2; + if (len > SHRT_MAX) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "string too long to ungetc"); + } + if (buf->len + len > SHRT_MAX) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "total ungetc buffer exceeds maximum size"); + } + if (buf->len + len > MRB_IO_BUF_SIZE) { + fptr->buf = (struct mrb_io_buf*)mrb_realloc(mrb, buf, sizeof(struct mrb_io_buf)+buf->len+len-MRB_IO_BUF_SIZE); + buf = fptr->buf; + } + memmove(buf->mem+len, buf->mem+buf->start, buf->len); + memcpy(buf->mem, ptr, len); + buf->start = 0; + buf->len += (short)len; } static mrb_value -mrb_io_bufread(mrb_state *mrb, mrb_value self) +io_ungetc(mrb_state *mrb, mrb_value io) { + struct mrb_io *fptr = io_get_read_fptr(mrb, io); mrb_value str; - mrb_int len; - mrb->c->ci->mid = 0; - mrb_get_args(mrb, "Si", &str, &len); - mrb_assert(RSTRING_LEN(str) > 0); - mrb_assert(RSTRING_PTR(str) != NULL); - mrb_str_modify(mrb, RSTRING(str)); - return io_bufread(mrb, str, len); + mrb_get_args(mrb, "S", &str); + io_unget_data(mrb, fptr, RSTRING_PTR(str), RSTRING_LEN(str)); + return mrb_nil_value(); } +/* + * call-seq: + * ios.ungetbyte(string) -> nil + * ios.ungetbyte(integer) -> nil + * + * Pushes back bytes (passed as a parameter) onto ios, such that a subsequent + * buffered character read will return it. Only one byte may be pushed back + * before a subsequent read operation (that is, you will be able to read only + * the last of several bytes that have been pushed back). Has no effect with + * unbuffered reads (such as IO#sysread). + */ static mrb_value -mrb_io_readchar(mrb_state *mrb, mrb_value self) +io_ungetbyte(mrb_state *mrb, mrb_value io) { - mrb_value buf; - mrb_int len = 1; + struct mrb_io *fptr = io_get_read_fptr(mrb, io); + mrb_value c = mrb_get_arg1(mrb); + unsigned char byte_val; + + if (mrb_string_p(c)) { + if (RSTRING_LEN(c) == 0) { + return mrb_nil_value(); /* Empty string, do nothing */ + } + byte_val = (unsigned char)RSTRING_PTR(c)[0]; + } + else { + mrb_int val = mrb_integer(c); + byte_val = (unsigned char)(val & 0xff); + } + + /* Use helper function with single byte */ + io_unget_data(mrb, fptr, (const char*)&byte_val, 1); + return mrb_nil_value(); +} + +static void +io_buf_reset(struct mrb_io_buf *buf) +{ + buf->start = 0; + buf->len = 0; +} + +static void +io_buf_shift(struct mrb_io_buf *buf, mrb_int n) +{ + mrb_assert(n <= SHRT_MAX); + buf->start += (short)n; + buf->len -= (short)n; +} + #ifdef MRB_UTF8_STRING - unsigned char c; +static void +io_fill_buf_comp(mrb_state *mrb, struct mrb_io *fptr) +{ + struct mrb_io_buf *buf = fptr->buf; + int keep = buf->len; + + memmove(buf->mem, buf->mem+buf->start, keep); + int n = read(fptr->fd, buf->mem+keep, MRB_IO_BUF_SIZE-keep); + if (n < 0) mrb_sys_fail(mrb, 0); + if (n == 0) fptr->eof = 1; + buf->start = 0; + buf->len += (short)n; +} #endif - mrb->c->ci->mid = 0; - mrb_get_args(mrb, "S", &buf); - mrb_assert(RSTRING_LEN(buf) > 0); - mrb_assert(RSTRING_PTR(buf) != NULL); - mrb_str_modify(mrb, RSTRING(buf)); +static void +io_fill_buf(mrb_state *mrb, struct mrb_io *fptr) +{ + struct mrb_io_buf *buf = fptr->buf; + + if (buf->len > 0) return; + + int n = read(fptr->fd, buf->mem, MRB_IO_BUF_SIZE); + if (n < 0) mrb_sys_fail(mrb, 0); + if (n == 0) fptr->eof = 1; + buf->start = 0; + buf->len = (short)n; +} + +static mrb_value +io_eof(mrb_state *mrb, mrb_value io) +{ + struct mrb_io *fptr = io_get_read_fptr(mrb, io); + + if (fptr->eof) return mrb_true_value(); + if (fptr->buf->len > 0) return mrb_false_value(); + io_fill_buf(mrb, fptr); + return mrb_bool_value(fptr->eof); +} + +static void +io_buf_cat(mrb_state *mrb, mrb_value outbuf, struct mrb_io_buf *buf, mrb_int n) +{ + mrb_assert(n <= buf->len); + mrb_str_cat(mrb, outbuf, buf->mem+buf->start, n); + io_buf_shift(buf, n); +} + +static void +io_buf_cat_all(mrb_state *mrb, mrb_value outbuf, struct mrb_io_buf *buf) +{ + mrb_str_cat(mrb, outbuf, buf->mem+buf->start, buf->len); + io_buf_reset(buf); +} + +static mrb_value +io_read_all(mrb_state *mrb, struct mrb_io *fptr, mrb_value outbuf) +{ + for (;;) { + io_fill_buf(mrb, fptr); + if (fptr->eof) { + return outbuf; + } + io_buf_cat_all(mrb, outbuf, fptr->buf); + } +} + +static mrb_value +io_reset_outbuf(mrb_state *mrb, mrb_value outbuf, mrb_int len) +{ + if (mrb_nil_p(outbuf)) { + outbuf = mrb_str_new(mrb, NULL, 0); + } + else { + mrb_str_modify(mrb, mrb_str_ptr(outbuf)); + RSTR_SET_LEN(mrb_str_ptr(outbuf), 0); + } + return outbuf; +} + +/* + * call-seq: + * ios.read(length = nil, outbuf = "") -> string, outbuf, or nil + * + * Reads `length` bytes from the I/O stream. + * + * If `length` is `nil`, it reads until end of file. + * If `outbuf` is given, it will be used as the buffer. + * + * f = File.new("testfile") + * f.read(16) #=> "This is line one" + */ +static mrb_value +io_read(mrb_state *mrb, mrb_value io) +{ + mrb_value outbuf = mrb_nil_value(); + mrb_value len; + mrb_int length = 0; + mrb_bool length_given; + struct mrb_io *fptr = io_get_read_fptr(mrb, io); + + mrb_get_args(mrb, "|o?S", &len, &length_given, &outbuf); + if (length_given) { + if (mrb_nil_p(len)) { + length_given = FALSE; + } + else { + length = mrb_as_int(mrb, len); + if (length < 0) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "negative length %i given", length); + } + if (length == 0) { + return io_reset_outbuf(mrb, outbuf, 0); + } + } + } + + outbuf = io_reset_outbuf(mrb, outbuf, MRB_IO_BUF_SIZE); + if (!length_given) { /* read as much as possible */ + return io_read_all(mrb, fptr, outbuf); + } + + struct mrb_io_buf *buf = fptr->buf; + + for (;;) { + io_fill_buf(mrb, fptr); + if (fptr->eof || length == 0) { + if (RSTRING_LEN(outbuf) == 0) + return mrb_nil_value(); + return outbuf; + } + if (buf->len < length) { + length -= buf->len; + io_buf_cat_all(mrb, outbuf, buf); + } + else { + io_buf_cat(mrb, outbuf, buf, length); + return outbuf; + } + } +} + +static mrb_int +io_find_index(struct mrb_io *fptr, const char *rs, mrb_int rslen) +{ + struct mrb_io_buf *buf = fptr->buf; + + mrb_assert(rslen > 0); + const char c = rs[0]; + const mrb_int limit = buf->len - rslen + 1; + const char *p = buf->mem+buf->start; + for (mrb_int i=0; ibuf; + + mrb_get_args(mrb, "|o?i?", &rs, &rs_given, &limit, &limit_given); + + if (limit_given == FALSE) { + if (rs_given) { + if (mrb_nil_p(rs)) { + rs_given = FALSE; + } + else if (mrb_integer_p(rs)) { + limit = mrb_integer(rs); + limit_given = TRUE; + rs = mrb_nil_value(); + } + else if (!mrb_string_p(rs)) { + mrb_ensure_int_type(mrb, rs); + } + } + } + if (rs_given) { + if (mrb_nil_p(rs)) { + rs_given = FALSE; + } + else { + mrb_ensure_string_type(mrb, rs); + if (RSTRING_LEN(rs) == 0) { /* paragraph mode */ + rs = mrb_str_new_lit(mrb, "\n\n"); + } + } + } + else { + rs = mrb_str_new_lit(mrb, "\n"); + rs_given = TRUE; + } + + /* from now on rs_given==FALSE means no RS */ + if (mrb_nil_p(rs) && !limit_given) { + return io_read_all(mrb, fptr, mrb_str_new_capa(mrb, MRB_IO_BUF_SIZE)); + } + + io_fill_buf(mrb, fptr); + if (fptr->eof) return mrb_nil_value(); + + mrb_value outbuf; + if (limit_given) { + if (limit < 0) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "negative length %i given", limit); + } + if (limit == 0) return mrb_str_new(mrb, NULL, 0); + outbuf = mrb_str_new_capa(mrb, limit); + } + else { + outbuf = mrb_str_new(mrb, NULL, 0); + } + + for (;;) { + if (rs_given) { /* with RS */ + mrb_int rslen = RSTRING_LEN(rs); + mrb_int idx = io_find_index(fptr, RSTRING_PTR(rs), rslen); + if (idx >= 0) { /* found */ + mrb_int n = idx+rslen; + if (limit_given && limit < n) { + n = limit; + } + io_buf_cat(mrb, outbuf, buf, n); + return outbuf; + } + } + if (limit_given) { + if (limit <= buf->len) { + io_buf_cat(mrb, outbuf, buf, limit); + return outbuf; + } + limit -= buf->len; + } + io_buf_cat_all(mrb, outbuf, buf); + io_fill_buf(mrb, fptr); + if (fptr->eof) { + if (RSTRING_LEN(outbuf) == 0) return mrb_nil_value(); + return outbuf; + } + } +} + +static mrb_value +io_readline(mrb_state *mrb, mrb_value io) +{ + mrb_value result = io_gets(mrb, io); + if (mrb_nil_p(result)) { + eof_error(mrb); + } + return result; +} + +static mrb_value +io_readlines(mrb_state *mrb, mrb_value io) +{ + mrb_value ary = mrb_ary_new(mrb); + for (;;) { + mrb_value line = io_gets(mrb, io); + + if (mrb_nil_p(line)) return ary; + mrb_ary_push(mrb, ary, line); + } +} + +static mrb_value +io_getc(mrb_state *mrb, mrb_value io) +{ + mrb_int len = 1; + struct mrb_io *fptr = io_get_read_fptr(mrb, io); + struct mrb_io_buf *buf = fptr->buf; + + io_fill_buf(mrb, fptr); + if (fptr->eof) return mrb_nil_value(); #ifdef MRB_UTF8_STRING - c = RSTRING_PTR(buf)[0]; - if (c & 0x80) { - len = mrb_utf8len(RSTRING_PTR(buf), RSTRING_END(buf)); - if (len == 1 && RSTRING_LEN(buf) < 4) { /* partial UTF-8 */ - mrb_int blen = RSTRING_LEN(buf); - ssize_t n; - struct mrb_io *fptr = io_get_read_fptr(mrb, self); - - /* refill the buffer */ - mrb_str_resize(mrb, buf, 4096); - n = read(fptr->fd, RSTRING_PTR(buf)+blen, 4096-blen); - if (n < 0) mrb_sys_fail(mrb, "sysread failed"); - mrb_str_resize(mrb, buf, blen+n); + const char *p = &buf->mem[buf->start]; + if ((*p) & 0x80) { + len = mrb_utf8len(p, p+buf->len); + if (len == 1 && buf->len < 4) { /* partial UTF-8 */ + io_fill_buf_comp(mrb, fptr); + p = &buf->mem[buf->start]; + len = mrb_utf8len(p, p+buf->len); } - len = mrb_utf8len(RSTRING_PTR(buf), RSTRING_END(buf)); } #endif - return io_bufread(mrb, buf, len); + mrb_value str = mrb_str_new(mrb, buf->mem+buf->start, len); + io_buf_shift(buf, len); + return str; } -void -mrb_init_io(mrb_state *mrb) +static mrb_value +io_readchar(mrb_state *mrb, mrb_value io) { - struct RClass *io; + mrb_value result = io_getc(mrb, io); + if (mrb_nil_p(result)) { + eof_error(mrb); + } + return result; +} + +/* + * call-seq: + * ios.getbyte -> integer or nil + * + * Reads a byte from the `IO` stream. + * + * Returns the byte as an integer, or `nil` at end of file. + * + * f = File.new("testfile") + * f.getbyte #=> 72 + */ +static mrb_value +io_getbyte(mrb_state *mrb, mrb_value io) +{ + struct mrb_io *fptr = io_get_read_fptr(mrb, io); + struct mrb_io_buf *buf = fptr->buf; - io = mrb_define_class(mrb, "IO", mrb->object_class); - MRB_SET_INSTANCE_TT(io, MRB_TT_DATA); + io_fill_buf(mrb, fptr); + if (fptr->eof) return mrb_nil_value(); - mrb_include_module(mrb, io, mrb_module_get(mrb, "Enumerable")); /* 15.2.20.3 */ - mrb_define_class_method(mrb, io, "_popen", mrb_io_s_popen, MRB_ARGS_ARG(1,2)); - mrb_define_class_method(mrb, io, "_sysclose", mrb_io_s_sysclose, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, io, "for_fd", mrb_io_s_for_fd, MRB_ARGS_ARG(1,2)); - mrb_define_class_method(mrb, io, "select", mrb_io_s_select, MRB_ARGS_ARG(1,3)); - mrb_define_class_method(mrb, io, "sysopen", mrb_io_s_sysopen, MRB_ARGS_ARG(1,2)); + unsigned char c = buf->mem[buf->start]; + io_buf_shift(buf, 1); + return mrb_int_value(mrb, (mrb_int)c); +} + +/* + * call-seq: + * ios.readbyte -> integer + * + * Reads a byte from the `IO` stream. + * + * Returns the byte as an integer. Raises `EOFError` at end of file. + * + * f = File.new("testfile") + * f.readbyte #=> 72 + */ +static mrb_value +io_readbyte(mrb_state *mrb, mrb_value io) +{ + mrb_value result = io_getbyte(mrb, io); + if (mrb_nil_p(result)) { + eof_error(mrb); + } + return result; +} + +/* + * call-seq: + * ios.flush -> ios + * + * Flushes any buffered data within the `IO` object to the underlying + * operating system. + * + * $stdout.print "no newline" + * $stdout.flush + */ +static mrb_value +io_flush(mrb_state *mrb, mrb_value io) +{ + io_get_open_fptr(mrb, io); + return io; +} + +/* + * call-seq: + * ios.autoclose = bool -> bool + * + * Sets the autoclose flag. + * + * If the autoclose flag is set, the underlying file descriptor(s) of +ios+ + * will be closed when +ios+ is closed (explicitly via +#close+, or implicitly + * when the +IO+ object is garbage-collected). When unset, the file + * descriptor(s) are left open. + * + * f = File.open("testfile") + * IO.for_fd(f.fileno).autoclose = false + */ +static mrb_value +io_set_autoclose(mrb_state *mrb, mrb_value io) +{ + struct mrb_io *fptr = io_get_open_fptr(mrb, io); + mrb_bool b; + + mrb_get_args(mrb, "b", &b); + fptr->close_fd = b; + fptr->close_fd2 = b; + return mrb_bool_value(b); +} + +/* + * call-seq: + * ios.autoclose? -> true or false + * + * Returns +true+ if the underlying file descriptor of +ios+ will be closed + * when +ios+ is closed, otherwise +false+. + * + * f = File.open("testfile") + * f.autoclose? #=> true + * f.autoclose = false + * f.autoclose? #=> false + */ +static mrb_value +io_autoclose_p(mrb_state *mrb, mrb_value io) +{ + struct mrb_io *fptr = io_get_open_fptr(mrb, io); + return mrb_bool_value(fptr->close_fd); +} + +/* ---------------------------*/ +static const mrb_mt_entry io_rom_entries[] = { + MRB_MT_ENTRY(io_init, MRB_SYM(initialize), MRB_ARGS_ARG(1,2)), + MRB_MT_ENTRY(io_init_copy, MRB_SYM(initialize_copy), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(io_isatty, MRB_SYM(isatty), MRB_ARGS_NONE()), + MRB_MT_ENTRY(io_eof, MRB_SYM_Q(eof), MRB_ARGS_NONE()), /* 15.2.20.5.6 */ + MRB_MT_ENTRY(io_getc, MRB_SYM(getc), MRB_ARGS_NONE()), /* 15.2.20.5.8 */ + MRB_MT_ENTRY(io_gets, MRB_SYM(gets), MRB_ARGS_OPT(2)), /* 15.2.20.5.9 */ + MRB_MT_ENTRY(io_read, MRB_SYM(read), MRB_ARGS_OPT(2)), /* 15.2.20.5.14 */ + MRB_MT_ENTRY(io_readchar, MRB_SYM(readchar), MRB_ARGS_NONE()), /* 15.2.20.5.15 */ + MRB_MT_ENTRY(io_readline, MRB_SYM(readline), MRB_ARGS_OPT(2)), /* 15.2.20.5.16 */ + MRB_MT_ENTRY(io_readlines, MRB_SYM(readlines), MRB_ARGS_OPT(2)), /* 15.2.20.5.17 */ + MRB_MT_ENTRY(io_sync, MRB_SYM(sync), MRB_ARGS_NONE()), /* 15.2.20.5.18 */ + MRB_MT_ENTRY(io_set_sync, MRB_SYM_E(sync), MRB_ARGS_REQ(1)), /* 15.2.20.5.19 */ + MRB_MT_ENTRY(io_sysread, MRB_SYM(sysread), MRB_ARGS_ARG(1,1)), + MRB_MT_ENTRY(io_sysseek, MRB_SYM(sysseek), MRB_ARGS_ARG(1,1)), + MRB_MT_ENTRY(io_syswrite, MRB_SYM(syswrite), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(io_seek, MRB_SYM(seek), MRB_ARGS_ARG(1,1)), + MRB_MT_ENTRY(io_close, MRB_SYM(close), MRB_ARGS_NONE()), /* 15.2.20.5.1 */ + MRB_MT_ENTRY(io_close_write, MRB_SYM(close_write), MRB_ARGS_NONE()), + MRB_MT_ENTRY(io_set_close_on_exec, MRB_SYM_E(close_on_exec), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(io_close_on_exec_p, MRB_SYM_Q(close_on_exec), MRB_ARGS_NONE()), + MRB_MT_ENTRY(io_closed, MRB_SYM_Q(closed), MRB_ARGS_NONE()), /* 15.2.20.5.2 */ + MRB_MT_ENTRY(io_flush, MRB_SYM(flush), MRB_ARGS_NONE()), /* 15.2.20.5.7 */ + MRB_MT_ENTRY(io_ungetc, MRB_SYM(ungetc), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(io_ungetbyte, MRB_SYM(ungetbyte), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(io_pos, MRB_SYM(pos), MRB_ARGS_NONE()), + MRB_MT_ENTRY(io_pid, MRB_SYM(pid), MRB_ARGS_NONE()), + MRB_MT_ENTRY(io_fileno, MRB_SYM(fileno), MRB_ARGS_NONE()), + MRB_MT_ENTRY(io_write, MRB_SYM(write), MRB_ARGS_ANY()), /* 15.2.20.5.20 */ + MRB_MT_ENTRY(io_puts, MRB_SYM(puts), MRB_ARGS_ANY()), + MRB_MT_ENTRY(io_print, MRB_SYM(print), MRB_ARGS_ANY()), + MRB_MT_ENTRY(io_putc, MRB_SYM(putc), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(io_lshift, MRB_OPSYM(lshift), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(io_pread, MRB_SYM(pread), MRB_ARGS_ANY()), /* Ruby 2.5 feature */ + MRB_MT_ENTRY(io_pwrite, MRB_SYM(pwrite), MRB_ARGS_ANY()), /* Ruby 2.5 feature */ + MRB_MT_ENTRY(io_getbyte, MRB_SYM(getbyte), MRB_ARGS_NONE()), + MRB_MT_ENTRY(io_readbyte, MRB_SYM(readbyte), MRB_ARGS_NONE()), + MRB_MT_ENTRY(io_set_autoclose, MRB_SYM_E(autoclose), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(io_autoclose_p, MRB_SYM_Q(autoclose), MRB_ARGS_NONE()), +}; + +void +mrb_init_io(mrb_state *mrb) +{ + struct RClass *io = mrb_define_class_id(mrb, MRB_SYM(IO), mrb->object_class); + MRB_SET_INSTANCE_TT(io, MRB_TT_CDATA); + + mrb_include_module(mrb, io, mrb_module_get_id(mrb, MRB_SYM(Enumerable))); /* 15.2.20.3 */ + mrb_define_class_method_id(mrb, io, MRB_SYM(_popen), io_s_popen, MRB_ARGS_ARG(1,2)); + mrb_define_class_method_id(mrb, io, MRB_SYM(_sysclose), io_s_sysclose, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, io, MRB_SYM(for_fd), io_s_for_fd, MRB_ARGS_ARG(1,2)); + mrb_define_class_method_id(mrb, io, MRB_SYM(select), io_s_select, MRB_ARGS_ARG(1,3)); + mrb_define_class_method_id(mrb, io, MRB_SYM(sysopen), io_s_sysopen, MRB_ARGS_ARG(1,2)); #if !defined(_WIN32) && !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) - mrb_define_class_method(mrb, io, "_pipe", mrb_io_s_pipe, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, io, MRB_SYM(_pipe), io_s_pipe, MRB_ARGS_NONE()); #endif - mrb_define_method(mrb, io, "initialize", mrb_io_initialize, MRB_ARGS_ARG(1,2)); /* 15.2.20.5.21 (x)*/ - mrb_define_method(mrb, io, "initialize_copy", mrb_io_initialize_copy, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, io, "_check_readable", mrb_io_check_readable, MRB_ARGS_NONE()); - mrb_define_method(mrb, io, "isatty", mrb_io_isatty, MRB_ARGS_NONE()); - mrb_define_method(mrb, io, "sync", mrb_io_sync, MRB_ARGS_NONE()); - mrb_define_method(mrb, io, "sync=", mrb_io_set_sync, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, io, "sysread", mrb_io_sysread, MRB_ARGS_ARG(1,1)); - mrb_define_method(mrb, io, "sysseek", mrb_io_sysseek, MRB_ARGS_ARG(1,1)); - mrb_define_method(mrb, io, "syswrite", mrb_io_syswrite, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, io, "close", mrb_io_close, MRB_ARGS_NONE()); /* 15.2.20.5.1 */ - mrb_define_method(mrb, io, "close_write", mrb_io_close_write, MRB_ARGS_NONE()); - mrb_define_method(mrb, io, "close_on_exec=", mrb_io_set_close_on_exec, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, io, "close_on_exec?", mrb_io_close_on_exec_p, MRB_ARGS_NONE()); - mrb_define_method(mrb, io, "closed?", mrb_io_closed, MRB_ARGS_NONE()); /* 15.2.20.5.2 */ - mrb_define_method(mrb, io, "pid", mrb_io_pid, MRB_ARGS_NONE()); /* 15.2.20.5.2 */ - mrb_define_method(mrb, io, "fileno", mrb_io_fileno_m, MRB_ARGS_NONE()); - mrb_define_method(mrb, io, "pread", mrb_io_pread, MRB_ARGS_ANY()); /* ruby 2.5 feature */ - mrb_define_method(mrb, io, "pwrite", mrb_io_pwrite, MRB_ARGS_ANY()); /* ruby 2.5 feature */ - - mrb_define_method(mrb, io, "_readchar", mrb_io_readchar, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, io, "_bufread", mrb_io_bufread, MRB_ARGS_REQ(2)); + MRB_MT_INIT_ROM(mrb, io, io_rom_entries); + + mrb_define_const_id(mrb, io, MRB_SYM(SEEK_SET), mrb_fixnum_value(SEEK_SET)); + mrb_define_const_id(mrb, io, MRB_SYM(SEEK_CUR), mrb_fixnum_value(SEEK_CUR)); + mrb_define_const_id(mrb, io, MRB_SYM(SEEK_END), mrb_fixnum_value(SEEK_END)); } diff --git a/mrbgems/mruby-io/src/mruby_io_gem.c b/mrbgems/mruby-io/src/mruby_io_gem.c index 6880e66780..4e01442a20 100644 --- a/mrbgems/mruby-io/src/mruby_io_gem.c +++ b/mrbgems/mruby-io/src/mruby_io_gem.c @@ -1,4 +1,5 @@ -#include "mruby.h" +#include +#include void mrb_init_io(mrb_state *mrb); void mrb_init_file(mrb_state *mrb); @@ -9,12 +10,15 @@ void mrb_init_file_test(mrb_state *mrb); void mrb_mruby_io_gem_init(mrb_state* mrb) { + mrb_hal_io_init(mrb); mrb_init_io(mrb); DONE; mrb_init_file(mrb); DONE; mrb_init_file_test(mrb); DONE; } +#undef DONE void mrb_mruby_io_gem_final(mrb_state* mrb) { + mrb_hal_io_final(mrb); } diff --git a/mrbgems/mruby-io/test/file.rb b/mrbgems/mruby-io/test/file.rb index c138506070..8da7eb14c4 100644 --- a/mrbgems/mruby-io/test/file.rb +++ b/mrbgems/mruby-io/test/file.rb @@ -3,6 +3,15 @@ MRubyIOTestUtil.io_test_setup +def assert_dirname_with_level(path, results) + assert('dirname with level') do + assert_raise(ArgumentError) { File.dirname path, -1 } + results.each do |level, expect| + assert_equal expect, File.dirname(path, level) + end + end +end + assert('File.class', '15.2.21') do assert_equal Class, File.class end @@ -36,17 +45,114 @@ assert_raise(ArgumentError) { File.basename("/a/b\0") } end +assert('File.basename with suffix') do + assert_equal 'foo', File.basename('foo.rb', '.rb') + assert_equal 'foo.rb', File.basename('foo.rb', '.py') + assert_equal 'foo.tar', File.basename('foo.tar.gz', '.gz') + assert_equal 'foo.tar.gz', File.basename('foo.tar.gz', '.zip') + assert_equal '.profile', File.basename('.profile', '.sh') + assert_equal 'foo', File.basename('foo.', '.') + assert_equal 'foo', File.basename('foo', '') + assert_equal 'foo.rb', File.basename('foo.rb', '') + assert_equal 'foo.rb', File.basename('foo.rb', '.RB') # case-sensitive +end + +if MRubyIOTestUtil.win? + assert('File.basename (for Windows)') do + assert_equal '/', File.basename('/') + assert_equal '/', File.basename('//a') + assert_equal '/', File.basename('//a/') + assert_equal '/', File.basename('//a/b') + assert_equal '/', File.basename('//a/b/') + assert_equal 'c', File.basename('//a/b/c') + assert_equal 'c', File.basename('//a/b/c/') + assert_equal '/', File.basename("\\\\a\\b") + assert_equal '', File.basename('c:') + assert_equal '/', File.basename('c:/') + assert_equal 'a', File.basename('c:/a') + assert_equal 'a', File.basename('c:/a/') + assert_equal 'b', File.basename('c:/a/b') + assert_equal '/', File.basename("c:\\") + end +else + assert('File.basename (for generic)') do + assert_equal '/', File.basename('/') + assert_equal 'a', File.basename('//a') + assert_equal 'a', File.basename('//a/') + assert_equal 'b', File.basename('//a/b') + assert_equal 'b', File.basename('//a/b/') + assert_equal 'c', File.basename('//a/b/c') + assert_equal 'c', File.basename('//a/b/c/') + assert_equal 'c:', File.basename('c:') + assert_equal 'c:', File.basename('c:/') + assert_equal 'a', File.basename('c:/a') + assert_equal 'a', File.basename('c:/a/') + assert_equal 'b', File.basename('c:/a/b') + end +end + assert('File.dirname') do assert_equal '.', File.dirname('') assert_equal '.', File.dirname('a') assert_equal '/', File.dirname('/a') + assert_equal '/', File.dirname('/a/') assert_equal 'a', File.dirname('a/b') + assert_equal 'a', File.dirname('a/b/') + assert_equal 'a/b', File.dirname('a/b/c') assert_equal '/a', File.dirname('/a/b') + assert_equal '/a', File.dirname('/a/b/') + assert_equal '/a/b', File.dirname('/a/b/c') + assert_equal '/a/b', File.dirname('/a/b/c/') + assert_equal '/', File.dirname('/a//') + assert_equal '/a', File.dirname('/a//b') + assert_equal '/a/b', File.dirname('/a/b//c//') +end + +assert('File.dirname (with level)') do + assert_dirname_with_level '', { 0 => '.', 1 => '.', 2 => '.' } + assert_dirname_with_level 'a', { 0 => 'a', 1 => '.', 2 => '.' } + assert_dirname_with_level '/a', { 0 => '/a', 1 => '/', 2 => '/' } + assert_dirname_with_level '/a/', { 0 => '/a/', 1 => '/', 2 => '/' } + assert_dirname_with_level 'a/b', { 0 => 'a/b', 1 => 'a', 2 => '.' } + assert_dirname_with_level 'a/b/', { 0 => 'a/b/', 1 => 'a', 2 => '.' } + assert_dirname_with_level 'a/b/c', { 0 => 'a/b/c', 1 => 'a/b', 2 => 'a' } + assert_dirname_with_level 'a/b/c/d', { 0 => 'a/b/c/d', 1 => 'a/b/c', 2 => 'a/b' } +end + +unless MRubyIOTestUtil.win? + assert('File.dirname (not Windows)') do + assert_equal '/a', File.dirname('//a//b/') + end +else + assert('File.dirname (on Windows)') do + assert_equal 'c:.', File.dirname('c:') + assert_equal 'c:.', File.dirname('c:a') + assert_equal 'c:.', File.dirname('c:a/') + assert_equal 'c:a', File.dirname('c:a/b') + assert_equal 'c:/', File.dirname('c:/') + assert_equal 'c:/', File.dirname('c:/a') + assert_equal 'c:/', File.dirname('c:/a/') + assert_equal 'c:/a', File.dirname('c:/a/b') + assert_equal '//.', File.dirname('//.') + assert_equal '//.', File.dirname('//./') + assert_equal '//./a', File.dirname('//./a') + assert_equal '//./a', File.dirname('//./a/') + assert_equal '//./a', File.dirname('//./a/b') + assert_equal '//./a/b', File.dirname('//./a/b/c') + assert_equal '//?', File.dirname('//?/') + assert_equal '//?/a', File.dirname('//?/a') + assert_equal '//?/a', File.dirname('//?/a/') + assert_equal '//?/a', File.dirname('//?/a/b') + assert_equal '//host1', File.dirname('//host1/') + assert_equal '//host1/a', File.dirname('//host1/a') + assert_equal '//host1/a', File.dirname('//host1/a/') + assert_equal '//host1/a', File.dirname('//host1/a/b') + end end assert('File.extname') do assert_equal '.txt', File.extname('foo/foo.txt') - assert_equal '.gz', File.extname('foo/foo.tar.gz') + assert_equal '.gz', File.extname('foo/foo.tar.gz') assert_equal '', File.extname('foo/bar') assert_equal '', File.extname('foo/.bar') assert_equal '', File.extname('foo.txt/bar') @@ -69,6 +175,32 @@ end end +assert('File#atime') do + begin + File.open("#{$mrbtest_io_wfname}.atime", 'w') do |f| + assert_equal Time, f.mtime.class + File.open("#{$mrbtest_io_wfname}.atime", 'r') do |f2| + assert_equal true, f.atime == f2.atime + end + end + ensure + File.delete("#{$mrbtest_io_wfname}.atime") + end +end + +assert('File#ctime') do + begin + File.open("#{$mrbtest_io_wfname}.ctime", 'w') do |f| + assert_equal Time, f.ctime.class + File.open("#{$mrbtest_io_wfname}.ctime", 'r') do |f2| + assert_equal true, f.ctime == f2.ctime + end + end + ensure + File.delete("#{$mrbtest_io_wfname}.ctime") + end +end + assert('File#mtime') do begin File.open("#{$mrbtest_io_wfname}.mtime", 'w') do |f| @@ -116,7 +248,7 @@ begin sep = File::ALT_SEPARATOR || File::SEPARATOR relative_path = "#{File.basename(dir)}#{sep}realpath_test" - path = "#{File._getwd}#{sep}#{relative_path}" + path = "#{MRubyIOTestUtil.getwd}#{sep}#{relative_path}" File.open(path, "w"){} assert_equal path, File.realpath(relative_path) @@ -165,32 +297,84 @@ end end -assert('File.expand_path') do - assert_equal "/", File.expand_path("..", "/tmp"), "parent path with base_dir (1)" - assert_equal "/tmp", File.expand_path("..", "/tmp/mruby"), "parent path with base_dir (2)" +if MRubyIOTestUtil.win? + assert('File.expand_path (for windows)') do + drive = MRubyIOTestUtil.getwd[0, 2] + alt1 = (drive.downcase != "c:") ? "c:" : "x:" + alt2 = (drive.downcase != "d:") ? "d:" : "y:" + + assert_equal "#{drive}/", File.expand_path("..", "/tmp"), "parent path with base_dir (1)" + assert_equal "#{drive}/tmp", File.expand_path("..", "/tmp/mruby"), "parent path with base_dir (2)" + + assert_equal "#{drive}/home", File.expand_path("/home"), "absolute" + assert_equal "#{drive}/home", File.expand_path("/home", "."), "absolute with base_dir" + + assert_equal "#{drive}/hoge", File.expand_path("/tmp/..//hoge") + assert_equal "//tmp/hoge", File.expand_path("////tmp/..///////hoge") + + assert_equal "#{drive}/", File.expand_path("../../../..", "/") + + assert_equal "#{drive}/", File.expand_path(([".."] * 100).join("/")) - assert_equal "/home", File.expand_path("/home"), "absolute" - assert_equal "/home", File.expand_path("/home", "."), "absolute with base_dir" + assert_equal "#{alt1}/x/y", File.expand_path("#{alt1}y", "/x") + assert_equal "#{alt2}/x/y", File.expand_path("#{alt2}y", "/x") + assert_equal "#{alt1}/x", File.expand_path("#{alt1}x", "#{alt2}y") + assert_equal "#{alt1}/y/x", File.expand_path("#{alt1}x", "./y") + end +else + assert('File.expand_path') do + assert_equal "/", File.expand_path("..", "/tmp"), "parent path with base_dir (1)" + assert_equal "/tmp", File.expand_path("..", "/tmp/mruby"), "parent path with base_dir (2)" + + assert_equal "/home", File.expand_path("/home"), "absolute" + assert_equal "/home", File.expand_path("/home", "."), "absolute with base_dir" - assert_equal "/hoge", File.expand_path("/tmp/..//hoge") - assert_equal "/hoge", File.expand_path("////tmp/..///////hoge") + assert_equal "/hoge", File.expand_path("/tmp/..//hoge") + assert_equal "/hoge", File.expand_path("////tmp/..///////hoge") + + assert_equal "/", File.expand_path("../../../..", "/") - assert_equal "/", File.expand_path("../../../..", "/") - if File._getwd[1] == ":" - drive_letter = File._getwd[0] - assert_equal drive_letter + ":\\", File.expand_path(([".."] * 100).join("/")) - else assert_equal "/", File.expand_path(([".."] * 100).join("/")) + + assert_equal "/x/c:y", File.expand_path("c:y", "/x") end end -assert('File.expand_path (with ENV)') do - skip unless Object.const_defined?(:ENV) && ENV['HOME'] +assert('File.expand_path (with getenv(3))') do + skip unless MRubyIOTestUtil.const_defined?(:ENV_HOME) + + assert_equal MRubyIOTestUtil::ENV_HOME, File.expand_path("~/"), "home" + assert_equal MRubyIOTestUtil::ENV_HOME, File.expand_path("~/", "/"), "home with base_dir" + + assert_equal "#{MRubyIOTestUtil::ENV_HOME}/user", File.expand_path("user", MRubyIOTestUtil::ENV_HOME), "relative with base_dir" +end - assert_equal ENV['HOME'], File.expand_path("~/"), "home" - assert_equal ENV['HOME'], File.expand_path("~/", "/"), "home with base_dir" +assert('File.absolute_path') do + assert_equal File.expand_path("./~"), File.absolute_path("~") + assert_equal File.expand_path("./~user1"), File.absolute_path("~user1") +end - assert_equal "#{ENV['HOME']}/user", File.expand_path("user", ENV['HOME']), "relative with base_dir" +if MRubyIOTestUtil.win? + assert('File.absolute_path? (for windows)') do + assert_true File.absolute_path?("c:/") + assert_true File.absolute_path?("c:/a") + assert_true File.absolute_path?("//") + assert_true File.absolute_path?("//?") + assert_true File.absolute_path?("//?/") + assert_false File.absolute_path?("") + assert_false File.absolute_path?("/") + assert_false File.absolute_path?("c:") + end +else + assert('File.absolute_path?') do + assert_true File.absolute_path?("/") + assert_true File.absolute_path?("/a") + assert_true File.absolute_path?("/a/b/") + assert_false File.absolute_path?("") + assert_false File.absolute_path?("a") + assert_false File.absolute_path?("a/b/") + assert_false File.absolute_path?("c:/") + end end assert('File.path') do @@ -230,8 +414,36 @@ begin assert_equal 1, File.chmod(0400, "#{$mrbtest_io_wfname}.chmod-test") ensure + # On Windows, must restore write permission before deletion + File.chmod(0600, "#{$mrbtest_io_wfname}.chmod-test") rescue nil File.delete("#{$mrbtest_io_wfname}.chmod-test") end end +assert('File.open with "x" mode') do + File.unlink $mrbtest_io_wfname rescue nil + assert_nothing_raised do + File.open($mrbtest_io_wfname, "wx") {} + end + assert_raise(RuntimeError) do + File.open($mrbtest_io_wfname, "wx") {} + end + + File.unlink $mrbtest_io_wfname rescue nil + assert_nothing_raised do + File.open($mrbtest_io_wfname, "w+x") {} + end + assert_raise(RuntimeError) do + File.open($mrbtest_io_wfname, "w+x") {} + end + + assert_raise(ArgumentError) do + File.open($mrbtest_io_wfname, "rx") {} + end + + assert_raise(ArgumentError) do + File.open($mrbtest_io_wfname, "ax") {} + end +end + MRubyIOTestUtil.io_test_cleanup diff --git a/mrbgems/mruby-io/test/io.rb b/mrbgems/mruby-io/test/io.rb index 88a89fee41..2aa7480dbd 100644 --- a/mrbgems/mruby-io/test/io.rb +++ b/mrbgems/mruby-io/test/io.rb @@ -2,7 +2,7 @@ # IO Test MRubyIOTestUtil.io_test_setup -$cr, $crlf, $cmd = MRubyIOTestUtil.win? ? [1, "\r\n", "cmd /c "] : [0, "\n", ""] +$cr, $cmd = MRubyIOTestUtil.win? ? [1, "cmd /c "] : [0, ""] def assert_io_open(meth) assert "assert_io_open" do @@ -16,14 +16,15 @@ def assert_io_open(meth) io1.close end - io2 = IO.__send__(meth, IO.sysopen($mrbtest_io_rfname))do |io| - if meth == :open - assert_equal $mrbtest_io_msg, io.read - else - flunk "IO.#{meth} does not take block" + if meth == :open + io2 = IO.__send__(meth, IO.sysopen($mrbtest_io_rfname))do |io| + if meth == :open + assert_equal $mrbtest_io_msg, io.read + else + flunk "IO.#{meth} does not take block" + end end end - io2.close unless meth == :open assert_raise(RuntimeError) { IO.__send__(meth, 1023) } # For Windows assert_raise(RuntimeError) { IO.__send__(meth, 1 << 26) } @@ -140,14 +141,39 @@ def assert_io_open(meth) end assert "IO#read(n) with n > IO::BUF_SIZE" do + buf_size = 4096 # copied from io.c skip "pipe is not supported on this platform" if MRubyIOTestUtil.win? IO.pipe do |r,w| - n = IO::BUF_SIZE+1 + n = buf_size+1 w.write 'a'*n assert_equal 'a'*n, r.read(n) end end +assert "IO#read(n, buf)" do + IO.open(IO.sysopen($mrbtest_io_rfname)) do |io| + buf = "12345" + assert_same buf, io.read(0, buf) + assert_equal "", buf + + buf = "12345" + assert_same buf, io.read(5, buf) + assert_equal "mruby", buf + + buf = "12345" + assert_same buf, io.read(nil, buf) + assert_equal " io test\n", buf + + buf = "12345" + assert_nil io.read(99, buf) + assert_equal "", buf + + buf = "12345" + assert_same buf, io.read(0, buf) + assert_equal "", buf + end +end + assert('IO#readchar', '15.2.20.5.15') do # almost same as IO#getc IO.open(IO.sysopen($mrbtest_io_rfname)) do |io| @@ -186,8 +212,8 @@ def assert_io_open(meth) end assert('IO#write', '15.2.20.5.20') do - io = IO.open(IO.sysopen($mrbtest_io_wfname)) - assert_equal 0, io.write("") + io = IO.open(IO.sysopen($mrbtest_io_wfname, "w"), "w") + assert_equal 1, io.write("a") io.close io = IO.open(IO.sysopen($mrbtest_io_wfname, "r+"), "r+") @@ -201,9 +227,9 @@ def assert_io_open(meth) end assert('IO#<<') do - io = IO.open(IO.sysopen($mrbtest_io_wfname)) - io << "" << "" - assert_equal 0, io.pos + io = IO.open(IO.sysopen($mrbtest_io_wfname, "w"), "w") + io << "a" << "b" + assert_equal 2, io.pos io.close end @@ -264,7 +290,7 @@ def assert_io_open(meth) assert('IO.sysopen, IO#sysread') do fd = IO.sysopen $mrbtest_io_rfname - io = IO.new fd + io = IO.new(fd) str1 = " " str2 = io.sysread(5, str1) assert_equal $mrbtest_io_msg[0,5], str1 @@ -285,14 +311,14 @@ def assert_io_open(meth) io.closed? fd = IO.sysopen $mrbtest_io_wfname, "w" - io = IO.new fd, "w" + io = IO.new(fd, "w") assert_raise(IOError) { io.sysread(1) } io.close end assert('IO.sysopen, IO#syswrite') do fd = IO.sysopen $mrbtest_io_wfname, "w" - io = IO.new fd, "w" + io = IO.new(fd, "w") str = "abcdefg" len = io.syswrite(str) assert_equal str.size, len @@ -303,26 +329,11 @@ def assert_io_open(meth) io.close end -assert('IO#_read_buf') do - fd = IO.sysopen $mrbtest_io_rfname - io = IO.new fd - def io._buf - @buf - end - msg_len = $mrbtest_io_msg.size - assert_equal '', io._buf - assert_equal $mrbtest_io_msg, io._read_buf - assert_equal $mrbtest_io_msg, io._buf - assert_equal 'mruby', io.read(5) - assert_equal 5, io.pos - assert_equal msg_len - 5, io._buf.size - assert_equal $mrbtest_io_msg[5,100], io.read - assert_equal 0, io._buf.size - assert_raise EOFError do - io._read_buf - end - assert_equal true, io.eof - assert_equal true, io.eof? +assert('IO#ungetc') do + io = IO.new(IO.sysopen($mrbtest_io_rfname)) + assert_equal 'm', io.getc + assert_nothing_raised{io.ungetc("M")} + assert_equal 'M', io.getc io.close end @@ -347,7 +358,7 @@ def io._buf assert('IO#pos=, IO#seek') do fd = IO.sysopen $mrbtest_io_rfname - io = IO.new fd + io = IO.new(fd) def io._buf @buf end @@ -360,7 +371,7 @@ def io._buf assert('IO#rewind') do fd = IO.sysopen $mrbtest_io_rfname - io = IO.new fd + io = IO.new(fd) assert_equal 'm', io.getc assert_equal 1, io.pos assert_equal 0, io.rewind @@ -370,7 +381,7 @@ def io._buf assert('IO#gets') do fd = IO.sysopen $mrbtest_io_rfname - io = IO.new fd + io = IO.new(fd) # gets without arguments assert_equal $mrbtest_io_msg, io.gets, "gets without arguments" @@ -383,6 +394,7 @@ def io._buf # gets with rs io.pos = 0 assert_equal $mrbtest_io_msg[0, 6], io.gets(' '), "gets with rs" + assert_equal $mrbtest_io_msg[6, 3], io.gets(' '), "gets with rs(2)" # gets with rs, limit io.pos = 0 @@ -392,14 +404,14 @@ def io._buf # reading many-lines file. fd = IO.sysopen $mrbtest_io_wfname, "w" - io = IO.new fd, "w" + io = IO.new(fd, "w") io.write "0123456789" * 2 + "\na" assert_equal 22 + $cr, io.pos io.close assert_equal true, io.closed? fd = IO.sysopen $mrbtest_io_wfname - io = IO.new fd + io = IO.new(fd) line = io.gets # gets first line @@ -418,7 +430,7 @@ def io._buf assert('IO#gets - paragraph mode') do fd = IO.sysopen $mrbtest_io_wfname, "w" - io = IO.new fd, "w" + io = IO.new(fd, "w") io.write "0" * 10 + "\n" io.write "1" * 10 + "\n\n" io.write "2" * 10 + "\n" @@ -426,7 +438,7 @@ def io._buf io.close fd = IO.sysopen $mrbtest_io_wfname - io = IO.new fd + io = IO.new(fd) para1 = "#{'0' * 10}\n#{'1' * 10}\n\n" text1 = io.gets("") assert_equal para1, text1 @@ -501,14 +513,14 @@ def io._buf assert('IO.read') do # empty file fd = IO.sysopen $mrbtest_io_wfname, "w" - io = IO.new fd, "w" + io = IO.new(fd, "w") io.close assert_equal "", IO.read($mrbtest_io_wfname) assert_equal nil, IO.read($mrbtest_io_wfname, 1) # one byte file fd = IO.sysopen $mrbtest_io_wfname, "w" - io = IO.new fd, "w" + io = IO.new(fd, "w") io.write "123" io.close assert_equal "123", IO.read($mrbtest_io_wfname) @@ -523,7 +535,7 @@ def io._buf assert('IO#fileno') do fd = IO.sysopen $mrbtest_io_rfname - io = IO.new fd + io = IO.new(fd) assert_equal io.fileno, fd assert_equal io.to_i, fd io.close @@ -531,7 +543,7 @@ def io._buf assert('IO#close_on_exec') do fd = IO.sysopen $mrbtest_io_wfname, "w" - io = IO.new fd, "w" + io = IO.new(fd, "w") begin # IO.sysopen opens a file descriptor with O_CLOEXEC flag. assert_true io.close_on_exec? @@ -577,7 +589,7 @@ def io._buf end assert('IO#pread') do - skip "IO#pread is not implemented on this configuration" unless MRubyIOTestUtil::MRB_WITH_IO_PREAD_PWRITE + skip "IO#pread is not implemented on this configuration" unless MRubyIOTestUtil::MRB_USE_IO_PREAD_PWRITE IO.open(IO.sysopen($mrbtest_io_rfname, 'r'), 'r') do |io| assert_equal $mrbtest_io_msg.byteslice(5, 8), io.pread(8, 5) @@ -589,7 +601,7 @@ def io._buf end assert('IO#pwrite') do - skip "IO#pwrite is not implemented on this configuration" unless MRubyIOTestUtil::MRB_WITH_IO_PREAD_PWRITE + skip "IO#pwrite is not implemented on this configuration" unless MRubyIOTestUtil::MRB_USE_IO_PREAD_PWRITE IO.open(IO.sysopen($mrbtest_io_wfname, 'w+'), 'w+') do |io| assert_equal 6, io.pwrite("Warld!", 7) @@ -646,10 +658,21 @@ def io._buf assert('`cmd`') do begin - assert_equal `#{$cmd}echo foo`, "foo#{$crlf}" + result = `#{$cmd}echo foo` + assert_equal "foo", result.chomp rescue NotImplementedError => e skip e.message end end +assert('IO#autoclose?, IO#autoclose=') do + io = IO.new(IO.sysopen($mrbtest_io_rfname), "r") + assert_true io.autoclose? + io.autoclose = false + assert_false io.autoclose? + io.autoclose = true + assert_true io.autoclose? + io.close +end + MRubyIOTestUtil.io_test_cleanup diff --git a/mrbgems/mruby-io/test/mruby_io_test.c b/mrbgems/mruby-io/test/mruby_io_test.c index cf99260e24..ea288ae5d7 100644 --- a/mrbgems/mruby-io/test/mruby_io_test.c +++ b/mrbgems/mruby-io/test/mruby_io_test.c @@ -3,7 +3,7 @@ #include #include -#if defined(_WIN32) || defined(_WIN64) +#if defined(_WIN32) #include #include @@ -60,12 +60,12 @@ mkdtemp(char *temp) #include #include -#include "mruby.h" -#include "mruby/array.h" -#include "mruby/error.h" -#include "mruby/string.h" -#include "mruby/variable.h" -#include +#include +#include +#include +#include +#include +#include static mrb_value mrb_io_test_io_setup(mrb_state *mrb, mrb_value self) @@ -79,7 +79,7 @@ mrb_io_test_io_setup(mrb_state *mrb, mrb_value self) mode_t mask; FILE *fp; int i; -#if !defined(_WIN32) && !defined(_WIN64) +#if !defined(_WIN32) struct sockaddr_un sun0 = { 0 }; /* Initialize them all because it is environment dependent */ #endif @@ -88,7 +88,7 @@ mrb_io_test_io_setup(mrb_state *mrb, mrb_value self) mask = umask(077); for (i = 0; i < IDX_COUNT; i++) { mrb_value fname = mrb_str_new_capa(mrb, 0); -#if !defined(_WIN32) && !defined(_WIN64) +#if !defined(_WIN32) /* * Workaround for not being able to bind a socket to some file systems * (e.g. vboxsf, NFS). [#4981] @@ -97,7 +97,8 @@ mrb_io_test_io_setup(mrb_state *mrb, mrb_value self) if (tmpdir && strlen(tmpdir) > 0) { mrb_str_cat_cstr(mrb, fname, tmpdir); if (*(RSTRING_END(fname)-1) != '/') mrb_str_cat_lit(mrb, fname, "/"); - } else { + } + else { mrb_str_cat_lit(mrb, fname, "/tmp/"); } #endif @@ -128,7 +129,7 @@ mrb_io_test_io_setup(mrb_state *mrb, mrb_value self) } fclose(fp); -#if !defined(_WIN32) && !defined(_WIN64) +#if !defined(_WIN32) unlink(fnames[IDX_LINK]); if (symlink(basename(fnames[IDX_READ]), fnames[IDX_LINK]) == -1) { mrb_raise(mrb, E_RUNTIME_ERROR, "can't make a symbolic link"); @@ -142,7 +143,7 @@ mrb_io_test_io_setup(mrb_state *mrb, mrb_value self) sun0.sun_family = AF_UNIX; strncpy(sun0.sun_path, fnames[IDX_SOCKET], sizeof(sun0.sun_path)-1); sun0.sun_path[sizeof(sun0.sun_path)-1] = 0; - if (bind(fds[IDX_SOCKET], (struct sockaddr *)&sun0, sizeof(sun0)) == -1) { + if (bind(fds[IDX_SOCKET], (struct sockaddr*)&sun0, sizeof(sun0)) == -1) { mrb_raisef(mrb, E_RUNTIME_ERROR, "can't bind AF_UNIX socket to %s: %d", sun0.sun_path, errno); @@ -213,7 +214,7 @@ mrb_io_test_rmdir(mrb_state *mrb, mrb_value klass) static mrb_value mrb_io_win_p(mrb_state *mrb, mrb_value klass) { -#if defined(_WIN32) || defined(_WIN64) +#if defined(_WIN32) # if defined(__CYGWIN__) || defined(__CYGWIN32__) return mrb_false_value(); # else @@ -224,10 +225,34 @@ mrb_io_win_p(mrb_state *mrb, mrb_value klass) #endif } -#ifdef MRB_WITH_IO_PREAD_PWRITE -# define MRB_WITH_IO_PREAD_PWRITE_ENABLED TRUE +#if defined(_WIN32) +#define MAXPATHLEN 1024 +#define getcwd _getcwd #else -# define MRB_WITH_IO_PREAD_PWRITE_ENABLED FALSE +#include +#include +#include +#endif + +static mrb_value +mrb_io_test_getwd(mrb_state *mrb, mrb_value klass) +{ + char buf[MAXPATHLEN]; + + mrb->c->ci->mid = 0; + if (getcwd(buf, MAXPATHLEN) == NULL) { + mrb_sys_fail(mrb, "getcwd(2)"); + } + char *utf8 = mrb_utf8_from_locale(buf, -1); + mrb_value path = mrb_str_new_cstr(mrb, utf8); + mrb_utf8_free(utf8); + return path; +} + +#ifdef MRB_USE_IO_PREAD_PWRITE +# define MRB_USE_IO_PREAD_PWRITE_ENABLED TRUE +#else +# define MRB_USE_IO_PREAD_PWRITE_ENABLED FALSE #endif void @@ -237,9 +262,32 @@ mrb_mruby_io_gem_test(mrb_state* mrb) mrb_define_class_method(mrb, io_test, "io_test_setup", mrb_io_test_io_setup, MRB_ARGS_NONE()); mrb_define_class_method(mrb, io_test, "io_test_cleanup", mrb_io_test_io_cleanup, MRB_ARGS_NONE()); + mrb_define_class_method(mrb, io_test, "getwd", mrb_io_test_getwd, MRB_ARGS_NONE()); mrb_define_class_method(mrb, io_test, "mkdtemp", mrb_io_test_mkdtemp, MRB_ARGS_REQ(1)); mrb_define_class_method(mrb, io_test, "rmdir", mrb_io_test_rmdir, MRB_ARGS_REQ(1)); mrb_define_class_method(mrb, io_test, "win?", mrb_io_win_p, MRB_ARGS_NONE()); - mrb_define_const(mrb, io_test, "MRB_WITH_IO_PREAD_PWRITE", mrb_bool_value(MRB_WITH_IO_PREAD_PWRITE_ENABLED)); + mrb_define_const(mrb, io_test, "MRB_USE_IO_PREAD_PWRITE", mrb_bool_value(MRB_USE_IO_PREAD_PWRITE_ENABLED)); + + const char *env_home = getenv("HOME"); +#ifdef _WIN32 + if (!env_home) { + env_home = getenv("USERPROFILE"); + } +#endif + if (env_home) { + char *utf8 = mrb_utf8_from_locale(env_home, -1); + mrb_value path = mrb_str_new_cstr(mrb, utf8); +#ifdef _WIN32 + char *pathp = RSTRING_PTR(path); + const char *const pathend = pathp + RSTRING_LEN(path); + for (;;) { + pathp = (char*)memchr(pathp, '\\', pathend - pathp); + if (!pathp) break; + *pathp++ = '/'; + } +#endif + mrb_define_const(mrb, io_test, "ENV_HOME", path); + mrb_utf8_free(utf8); + } } diff --git a/mrbgems/mruby-kernel-ext/README.md b/mrbgems/mruby-kernel-ext/README.md new file mode 100644 index 0000000000..30b3c58aaa --- /dev/null +++ b/mrbgems/mruby-kernel-ext/README.md @@ -0,0 +1,165 @@ +# mruby-kernel-ext + +This mrbgem extends the `Kernel` module in mruby with additional useful methods. + +## Methods + +### `fail(*args)` + +Raises a `RuntimeError`. This is an alias for `raise`. + +Example: + +```ruby +fail "Something went wrong" +# Raises RuntimeError: Something went wrong +``` + +### `caller(start=1, length=nil) -> array | nil` + +### `caller(range) -> array | nil` + +Returns the current execution stack (backtrace). + +- If `start` is provided, it indicates the number of frames to skip. +- If `length` is provided, it limits the number of frames returned. +- If a `range` is provided, it specifies the portion of the stack to return. + +Returns `nil` if `start` is greater than or equal to the number of frames in the stack. + +Example: + +```ruby +def foo + bar +end + +def bar + puts caller(0) # Show all frames starting from the current one + puts caller(1) # Skip one frame +end + +foo +``` + +### `__method__ -> symbol | nil` + +Returns the name of the current method as a `Symbol`. If called outside of a method, it returns `nil`. + +Example: + +```ruby +class MyClass + def my_method + puts __method__ + end +end + +MyClass.new.my_method +# Output: :my_method +``` + +### `__callee__ -> symbol | nil` + +Returns the called name of the current method as a `Symbol`. If called outside of a method, it returns `nil`. This can be different from `__method__` when using aliases. + +Example: + +```ruby +class MyClass + def original_method + puts __callee__ + end + + alias aliased_method original_method +end + +obj = MyClass.new +obj.original_method # Output: :original_method +obj.aliased_method # Output: :aliased_method +``` + +### `Integer(arg, base=0) -> integer` + +Converts `arg` to an `Integer`. + +- Numeric types are converted directly (floating-point numbers are truncated). +- If `arg` is a `String`, `base` (0, or between 2 and 36) is used as the base for conversion. + - If `base` is omitted or zero, radix indicators (`0`, `0b`, `0x`) in the string are honored. +- Strings must strictly conform to numeric representation, unlike `String#to_i`. +- Passing `nil` raises a `TypeError`. + +Examples: + +```ruby +Integer(123.999) #=> 123 +Integer("0x1a") #=> 26 +Integer("0930", 10) #=> 930 +Integer("111", 2) #=> 7 +# Integer(nil) #=> TypeError +# Integer("invalid") #=> ArgumentError +``` + +### `Float(arg) -> float` + +Converts `arg` to a `Float`. + +- Numeric types are converted directly. +- Other types are converted using `arg.to_f`. +- Passing `nil` raises a `TypeError`. + +Examples: + +```ruby +Float(1) #=> 1.0 +Float(123.456) #=> 123.456 +Float("123.456") #=> 123.456 +# Float(nil) #=> TypeError +# Float("invalid") #=> ArgumentError +``` + +### `String(arg) -> string` + +Converts `arg` to a `String` using its `to_s` method. + +Examples: + +```ruby +String(self) #=> "main" +String(self.class) #=> "Object" +String(123456) #=> "123456" +String(:symbol) #=> "symbol" +``` + +### `Array(arg) -> array` + +Converts `arg` to an `Array`. + +- If `arg` responds to `to_a`, it calls `to_a` to convert. +- Otherwise, it returns a new array containing `arg` as its single element. + +Examples: + +```ruby +Array(1..5) #=> [1, 2, 3, 4, 5] +Array([1, 2, 3]) #=> [1, 2, 3] +Array("hello") #=> ["hello"] # If String does not have to_a +Array({ a: 1, b: 2 }) #=> [[:a, 1], [:b, 2]] # If Hash has to_a +``` + +### `Hash(arg) -> hash` + +Converts `arg` to a `Hash`. + +- If `arg` is already a `Hash`, it is returned. +- If `arg` is `nil` or an empty `Array`, an empty `Hash` is returned. +- Otherwise, it raises a `TypeError`. + +Examples: + +```ruby +Hash({ key: :value }) #=> { key: :value } +Hash(nil) #=> {} +Hash([]) #=> {} +# Hash([1, 2, 3]) #=> TypeError +``` diff --git a/mrbgems/mruby-kernel-ext/src/kernel.c b/mrbgems/mruby-kernel-ext/src/kernel.c index 89116f63aa..1ba93a33c5 100644 --- a/mrbgems/mruby-kernel-ext/src/kernel.c +++ b/mrbgems/mruby-kernel-ext/src/kernel.c @@ -6,9 +6,30 @@ #include #include #include +#include #include -#include +/* + * call-seq: + * caller(start = 1) -> array or nil + * caller(range) -> array or nil + * caller(start, length) -> array or nil + * + * Returns the current execution stack as an array of strings in the form + * "file:line" or "file:line:in `method'". The optional start parameter + * determines the number of initial stack entries to omit from the top of the stack. + * + * def a(skip) + * caller(skip) + * end + * def b + * a(0) + * end + * def c + * b + * end + * c #=> ["prog:2:in `a'", "prog:5:in `b'", "prog:8:in `c'", "prog:10:in `
'"] + */ static mrb_value mrb_f_caller(mrb_state *mrb, mrb_value self) { @@ -59,7 +80,7 @@ mrb_f_caller(mrb_state *mrb, mrb_value self) if (n < 0) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "negative size (%d)", n); } - if (n == 0 || bt_len <= lev) { + if (n == 0) { return mrb_ary_new(mrb); } if (bt_len <= n + lev) n = bt_len - lev - 1; @@ -71,7 +92,7 @@ mrb_f_caller(mrb_state *mrb, mrb_value self) * __method__ -> symbol * * Returns the called name of the current method as a Symbol. - * If called outside of a method, it returns nil. + * If called outside of a method, it returns `nil`. * */ static mrb_value @@ -92,7 +113,7 @@ mrb_f_method(mrb_state *mrb, mrb_value self) * __callee__ -> symbol * * Returns the called name of the current method as a Symbol. - * If called outside of a method, it returns nil. + * If called outside of a method, it returns `nil`. * */ static mrb_value @@ -110,16 +131,16 @@ mrb_f_callee(mrb_state *mrb, mrb_value self) * call-seq: * Integer(arg,base=0) -> integer * - * Converts arg to a Integer. + * Converts *arg* to a `Integer`. * Numeric types are converted directly (with floating-point numbers - * being truncated). base (0, or between 2 and 36) is a base for - * integer string representation. If arg is a String, - * when base is omitted or equals to zero, radix indicators - * (0, 0b, and 0x) are honored. + * being truncated). *base* (0, or between 2 and 36) is a base for + * integer string representation. If *arg* is a `String`, + * when *base* is omitted or equals to zero, radix indicators + * (`0`, `0b`, and `0x`) are honored. * In any case, strings should be strictly conformed to numeric * representation. This behavior is different from that of - * String#to_i. Non string values will be treated as integers. - * Passing nil raises a TypeError. + * `String#to_i`. Non string values will be treated as integers. + * Passing `nil` raises a TypeError. * * Integer(123.999) #=> 123 * Integer("0x1a") #=> 26 @@ -128,6 +149,12 @@ mrb_f_callee(mrb_state *mrb, mrb_value self) * Integer("111", 2) #=> 7 * Integer(nil) #=> TypeError */ +static mrb_noreturn void +arg_error(mrb_state *mrb) +{ + mrb_raise(mrb, E_ARGUMENT_ERROR, "base specified for non string value"); +} + static mrb_value mrb_f_integer(mrb_state *mrb, mrb_value self) { @@ -136,18 +163,18 @@ mrb_f_integer(mrb_state *mrb, mrb_value self) mrb_get_args(mrb, "o|i", &val, &base); if (mrb_nil_p(val)) { - if (base != 0) goto arg_error; + if (base != 0) arg_error(mrb); mrb_raise(mrb, E_TYPE_ERROR, "can't convert nil into Integer"); } switch (mrb_type(val)) { #ifndef MRB_NO_FLOAT case MRB_TT_FLOAT: - if (base != 0) goto arg_error; + if (base != 0) arg_error(mrb); return mrb_float_to_integer(mrb, val); #endif case MRB_TT_INTEGER: - if (base != 0) goto arg_error; + if (base != 0) arg_error(mrb); return val; case MRB_TT_STRING: @@ -163,8 +190,7 @@ mrb_f_integer(mrb_state *mrb, mrb_value self) val = tmp; goto string_conv; } -arg_error: - mrb_raise(mrb, E_ARGUMENT_ERROR, "base specified for non string value"); + arg_error(mrb); } /* to raise TypeError */ return mrb_ensure_integer_type(mrb, val); @@ -175,8 +201,8 @@ mrb_f_integer(mrb_state *mrb, mrb_value self) * call-seq: * Float(arg) -> float * - * Returns arg converted to a float. Numeric types are converted - * directly, the rest are converted using arg.to_f. + * Returns *arg* converted to a float. Numeric types are converted + * directly, the rest are converted using *arg*.to_f. * * Float(1) #=> 1.0 * Float(123.456) #=> 123.456 @@ -199,8 +225,8 @@ mrb_f_float(mrb_state *mrb, mrb_value self) * call-seq: * String(arg) -> string * - * Returns arg as an String. - * converted using to_s method. + * Returns *arg* as an `String`. + * converted using `to_s` method. * * String(self) #=> "main" * String(self.class) #=> "Object" @@ -210,9 +236,7 @@ static mrb_value mrb_f_string(mrb_state *mrb, mrb_value self) { mrb_value arg = mrb_get_arg1(mrb); - mrb_value tmp; - - tmp = mrb_type_convert(mrb, arg, MRB_TT_STRING, MRB_SYM(to_s)); + mrb_value tmp = mrb_type_convert(mrb, arg, MRB_TT_STRING, MRB_SYM(to_s)); return tmp; } @@ -220,7 +244,7 @@ mrb_f_string(mrb_state *mrb, mrb_value self) * call-seq: * Array(arg) -> array * - * Returns +arg+ as an Array using to_a method. + * Returns `arg` as an Array using to_a method. * * Array(1..5) #=> [1, 2, 3, 4, 5] * @@ -229,9 +253,7 @@ static mrb_value mrb_f_array(mrb_state *mrb, mrb_value self) { mrb_value arg = mrb_get_arg1(mrb); - mrb_value tmp; - - tmp = mrb_type_convert_check(mrb, arg, MRB_TT_ARRAY, MRB_SYM(to_a)); + mrb_value tmp = mrb_type_convert_check(mrb, arg, MRB_TT_ARRAY, MRB_SYM(to_a)); if (mrb_nil_p(tmp)) { return mrb_ary_new_from_values(mrb, 1, &arg); } @@ -243,9 +265,9 @@ mrb_f_array(mrb_state *mrb, mrb_value self) * call-seq: * Hash(arg) -> hash * - * Returns a Hash if arg is a Hash. - * Returns an empty Hash when arg is nil - * or []. + * Returns a `Hash` if *arg* is a `Hash`. + * Returns an empty `Hash` when *arg* is `nil` + * or `[]`. * * Hash([]) #=> {} * Hash(nil) #=> {} @@ -265,22 +287,26 @@ mrb_f_hash(mrb_state *mrb, mrb_value self) return arg; } +static const mrb_mt_entry kernel_ext_rom_entries[] = { + MRB_MT_ENTRY(mrb_f_raise, MRB_SYM(fail), MRB_ARGS_OPT(2) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_f_caller, MRB_SYM(caller), MRB_ARGS_OPT(2) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_f_method, MRB_SYM(__method__), MRB_ARGS_NONE() | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_f_callee, MRB_SYM(__callee__), MRB_ARGS_NONE() | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_f_integer, MRB_SYM(Integer), MRB_ARGS_ARG(1,1) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_f_string, MRB_SYM(String), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_f_array, MRB_SYM(Array), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), +#ifndef MRB_NO_FLOAT + MRB_MT_ENTRY(mrb_f_float, MRB_SYM(Float), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), +#endif + MRB_MT_ENTRY(mrb_f_hash, MRB_SYM(Hash), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), +}; + void mrb_mruby_kernel_ext_gem_init(mrb_state *mrb) { struct RClass *krn = mrb->kernel_module; - mrb_define_module_function(mrb, krn, "fail", mrb_f_raise, MRB_ARGS_OPT(2)); - mrb_define_module_function(mrb, krn, "caller", mrb_f_caller, MRB_ARGS_OPT(2)); - mrb_define_method(mrb, krn, "__method__", mrb_f_method, MRB_ARGS_NONE()); - mrb_define_method(mrb, krn, "__callee__", mrb_f_callee, MRB_ARGS_NONE()); - mrb_define_module_function(mrb, krn, "Integer", mrb_f_integer, MRB_ARGS_ARG(1,1)); -#ifndef MRB_NO_FLOAT - mrb_define_module_function(mrb, krn, "Float", mrb_f_float, MRB_ARGS_REQ(1)); -#endif - mrb_define_module_function(mrb, krn, "String", mrb_f_string, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, krn, "Array", mrb_f_array, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, krn, "Hash", mrb_f_hash, MRB_ARGS_REQ(1)); + MRB_MT_INIT_ROM(mrb, krn, kernel_ext_rom_entries); } void diff --git a/mrbgems/mruby-kernel-ext/test/kernel.rb b/mrbgems/mruby-kernel-ext/test/kernel.rb index e5876f9762..27176f2a01 100644 --- a/mrbgems/mruby-kernel-ext/test/kernel.rb +++ b/mrbgems/mruby-kernel-ext/test/kernel.rb @@ -1,6 +1,5 @@ -assert('Kernel.fail, Kernel#fail') do +assert('Kernel#fail') do assert_raise(RuntimeError) { fail } - assert_raise(RuntimeError) { Kernel.fail } end assert('Kernel.caller, Kernel#caller') do @@ -50,17 +49,19 @@ def m1; __method__ end assert_equal(:m2, c.new.m4) end +# CRuby __callee__ always gives the calling method name +# mruby __callee__ gives the original method name for aliases assert('Kernel#__callee__') do c = Class.new do def m1; __callee__ end define_method(:m2) {__callee__} - alias m3 m1 - alias_method :m4, :m2 +# alias m3 m1 +# alias_method :m4, :m2 end assert_equal(:m1, c.new.m1) assert_equal(:m2, c.new.m2) - assert_equal(:m3, c.new.m3) - assert_equal(:m4, c.new.m4) +# assert_equal(:m3, c.new.m3) +# assert_equal(:m4, c.new.m4) end assert('Kernel#Integer') do @@ -119,10 +120,10 @@ def m1; __callee__ end end assert('Kernel#Array') do - assert_equal([1], Kernel.Array(1)) - assert_equal([1, 2, 3, 4, 5], Kernel.Array([1, 2, 3, 4, 5])) - assert_equal([1, 2, 3, 4, 5], Kernel.Array(1..5)) - assert_equal([[:a, 1], [:b, 2], [:c, 3]], Kernel.Array({a:1, b:2, c:3})) + assert_equal([1], Array(1)) + assert_equal([1, 2, 3, 4, 5], Array([1, 2, 3, 4, 5])) + assert_equal([1, 2, 3, 4, 5], Array(1..5)) + assert_equal([[:a, 1], [:b, 2], [:c, 3]], Array({a:1, b:2, c:3})) end assert('Kernel#Hash') do diff --git a/mrbgems/mruby-math/README.md b/mrbgems/mruby-math/README.md new file mode 100644 index 0000000000..b1cf18a418 --- /dev/null +++ b/mrbgems/mruby-math/README.md @@ -0,0 +1,77 @@ +# mruby-math + +This mrbgem provides a comprehensive set of mathematical functions for mruby. It allows you to perform common mathematical operations within your mruby applications. + +This gem is a standard component of mruby and provides functionalities similar to the `Math` module in standard Ruby. + +## Available Functions + +### Trigonometric Functions + +- `Math.sin(x)`: Computes the sine of x (expressed in radians). Returns -1..1. +- `Math.cos(x)`: Computes the cosine of x (expressed in radians). Returns -1..1. +- `Math.tan(x)`: Returns the tangent of x (expressed in radians). + +### Inverse Trigonometric Functions + +- `Math.asin(x)`: Computes the arc sine of x. Returns computed value between `-(PI/2)` and `(PI/2)`. +- `Math.acos(x)`: Computes the arc cosine of x. Returns 0..PI. +- `Math.atan(x)`: Computes the arc tangent of x. Returns `-(PI/2) .. (PI/2)`. +- `Math.atan2(y, x)`: Computes the arc tangent given y and x. Returns -PI..PI. + +### Hyperbolic Trigonometric Functions + +- `Math.sinh(x)`: Computes the hyperbolic sine of x (expressed in radians). +- `Math.cosh(x)`: Computes the hyperbolic cosine of x (expressed in radians). +- `Math.tanh(x)`: Computes the hyperbolic tangent of x (expressed in radians). + +### Inverse Hyperbolic Trigonometric Functions + +- `Math.asinh(x)`: Computes the inverse hyperbolic sine of x. +- `Math.acosh(x)`: Computes the inverse hyperbolic cosine of x. +- `Math.atanh(x)`: Computes the inverse hyperbolic tangent of x. + +### Exponential and Logarithmic Functions + +- `Math.exp(x)`: Returns e\*\*x. +- `Math.log(numeric)` or `Math.log(num,base)`: Returns the natural logarithm of numeric. If additional second argument is given, it will be the base of logarithm. +- `Math.log2(numeric)`: Returns the base 2 logarithm of numeric. +- `Math.log10(numeric)`: Returns the base 10 logarithm of numeric. + +### Other Functions + +- `Math.sqrt(numeric)`: Returns the square root of numeric. +- `Math.cbrt(numeric)`: Returns the cube root of numeric. +- `Math.frexp(numeric)`: Returns a two-element array containing the normalized fraction (a Float) and exponent (a Integer) of numeric. +- `Math.ldexp(flt, int)`: Returns the value of flt\*(2\*\*int). +- `Math.hypot(x, y)`: Returns sqrt(x**2 + y**2), the hypotenuse of a right-angled triangle with sides x and y. +- `Math.erf(x)`: Calculates the error function of x. +- `Math.erfc(x)`: Calculates the complementary error function of x. + +## Usage Examples + +```ruby +# Basic trigonometric functions +puts Math.sin(Math::PI / 2) # Output: 1.0 +puts Math.cos(0) # Output: 1.0 +puts Math.tan(Math::PI / 4) # Output: 1.0 (approximately, due to float precision) + +# Square root +puts Math.sqrt(16) # Output: 4.0 +puts Math.sqrt(2) # Output: 1.4142135623730951 + +# Cube root +puts Math.cbrt(8) # Output: 2.0 +puts Math.cbrt(27) # Output: 3.0 + +# Exponential and Logarithm +puts Math.exp(0) # Output: 1.0 +puts Math.exp(1) # Output: 2.718281828459045 (Math::E) +puts Math.log(Math::E) # Output: 1.0 +puts Math.log(100, 10) # Output: 2.0 +puts Math.log2(16) # Output: 4.0 +puts Math.log10(1000) # Output: 3.0 + +# Hypotenuse +puts Math.hypot(3, 4) # Output: 5.0 +``` diff --git a/mrbgems/mruby-math/src/math.c b/mrbgems/mruby-math/src/math.c index 164bfe131c..e746b7ce47 100644 --- a/mrbgems/mruby-math/src/math.c +++ b/mrbgems/mruby-math/src/math.c @@ -11,7 +11,6 @@ #endif #include -#include static void domain_error(mrb_state *mrb, const char *func) @@ -74,7 +73,7 @@ atanh(double x) } else { /* Basic formula for atanh */ - y = 0.5 * (log(1.0+x) - log(1.0-x)); + y = 0.5 * (log1p(x) - log1p(-x)); } return y; @@ -105,7 +104,7 @@ double erfc(double x); double erf(double x) { - static const double two_sqrtpi = 1.128379167095512574; + static const double two_sqrtpi = 1.128379167095512574; double sum = x; double term = x; double xsqr = x*x; @@ -116,10 +115,10 @@ erf(double x) do { term *= xsqr/j; sum -= term/(2*j+1); - ++j; + j++; term *= xsqr/j; sum += term/(2*j+1); - ++j; + j++; if (sum == 0) break; } while (fabs(term/sum) > DBL_EPSILON); return two_sqrtpi*sum; @@ -129,7 +128,7 @@ erf(double x) double erfc(double x) { - static const double one_sqrtpi= 0.564189583547756287; + static const double one_sqrtpi = 0.564189583547756287; double a = 1; double b = x; double c = x; @@ -137,15 +136,15 @@ erfc(double x) double q1; double q2 = b/d; double n = 1.0; - double t; + if (fabs(x) < 2.2) { - return 1.0 - erf(x); + return erfc(x); } if (x < 0.0) { /*signbit(x)*/ return 2.0 - erfc(-x); } do { - t = a*n+b*x; + double t = a*n+b*x; a = b; b = t; t = c*n+d*x; @@ -174,6 +173,8 @@ log2(double x) #endif +#define get_float_arg(mrb) mrb_as_float((mrb), mrb_get_arg1(mrb)) + /* TRIGONOMETRIC FUNCTIONS */ @@ -182,17 +183,13 @@ log2(double x) * call-seq: * Math.sin(x) -> float * - * Computes the sine of x (expressed in radians). Returns + * Computes the sine of *x* (expressed in radians). Returns * -1..1. */ static mrb_value math_sin(mrb_state *mrb, mrb_value obj) { - mrb_float x; - - mrb_get_args(mrb, "f", &x); - x = sin(x); - + mrb_float x = sin(get_float_arg(mrb)); return mrb_float_value(mrb, x); } @@ -200,17 +197,13 @@ math_sin(mrb_state *mrb, mrb_value obj) * call-seq: * Math.cos(x) -> float * - * Computes the cosine of x (expressed in radians). Returns + * Computes the cosine of *x* (expressed in radians). Returns * -1..1. */ static mrb_value math_cos(mrb_state *mrb, mrb_value obj) { - mrb_float x; - - mrb_get_args(mrb, "f", &x); - x = cos(x); - + mrb_float x = cos(get_float_arg(mrb)); return mrb_float_value(mrb, x); } @@ -218,16 +211,12 @@ math_cos(mrb_state *mrb, mrb_value obj) * call-seq: * Math.tan(x) -> float * - * Returns the tangent of x (expressed in radians). + * Returns the tangent of *x* (expressed in radians). */ static mrb_value math_tan(mrb_state *mrb, mrb_value obj) { - mrb_float x; - - mrb_get_args(mrb, "f", &x); - x = tan(x); - + mrb_float x = tan(get_float_arg(mrb)); return mrb_float_value(mrb, x); } @@ -239,15 +228,14 @@ math_tan(mrb_state *mrb, mrb_value obj) * call-seq: * Math.asin(x) -> float * - * Computes the arc sine of x. + * Computes the arc sine of *x*. * @return computed value between `-(PI/2)` and `(PI/2)`. */ static mrb_value math_asin(mrb_state *mrb, mrb_value obj) { - mrb_float x; + mrb_float x = get_float_arg(mrb); - mrb_get_args(mrb, "f", &x); if (x < -1.0 || x > 1.0) { domain_error(mrb, "asin"); } @@ -260,14 +248,13 @@ math_asin(mrb_state *mrb, mrb_value obj) * call-seq: * Math.acos(x) -> float * - * Computes the arc cosine of x. Returns 0..PI. + * Computes the arc cosine of *x*. Returns 0..PI. */ static mrb_value math_acos(mrb_state *mrb, mrb_value obj) { - mrb_float x; + mrb_float x = get_float_arg(mrb); - mrb_get_args(mrb, "f", &x); if (x < -1.0 || x > 1.0) { domain_error(mrb, "acos"); } @@ -280,16 +267,12 @@ math_acos(mrb_state *mrb, mrb_value obj) * call-seq: * Math.atan(x) -> float * - * Computes the arc tangent of x. Returns `-(PI/2) .. (PI/2)`. + * Computes the arc tangent of *x*. Returns `-(PI/2) .. (PI/2)`. */ static mrb_value math_atan(mrb_state *mrb, mrb_value obj) { - mrb_float x; - - mrb_get_args(mrb, "f", &x); - x = atan(x); - + mrb_float x = atan(get_float_arg(mrb)); return mrb_float_value(mrb, x); } @@ -297,7 +280,7 @@ math_atan(mrb_state *mrb, mrb_value obj) * call-seq: * Math.atan2(y, x) -> float * - * Computes the arc tangent given y and x. Returns + * Computes the arc tangent given *y* and *x*. Returns * -PI..PI. * * Math.atan2(-0.0, -1.0) #=> -3.141592653589793 @@ -330,17 +313,13 @@ math_atan2(mrb_state *mrb, mrb_value obj) * call-seq: * Math.sinh(x) -> float * - * Computes the hyperbolic sine of x (expressed in + * Computes the hyperbolic sine of *x* (expressed in * radians). */ static mrb_value math_sinh(mrb_state *mrb, mrb_value obj) { - mrb_float x; - - mrb_get_args(mrb, "f", &x); - x = sinh(x); - + mrb_float x = sinh(get_float_arg(mrb)); return mrb_float_value(mrb, x); } @@ -348,16 +327,12 @@ math_sinh(mrb_state *mrb, mrb_value obj) * call-seq: * Math.cosh(x) -> float * - * Computes the hyperbolic cosine of x (expressed in radians). + * Computes the hyperbolic cosine of *x* (expressed in radians). */ static mrb_value math_cosh(mrb_state *mrb, mrb_value obj) { - mrb_float x; - - mrb_get_args(mrb, "f", &x); - x = cosh(x); - + mrb_float x = cosh(get_float_arg(mrb)); return mrb_float_value(mrb, x); } @@ -365,17 +340,13 @@ math_cosh(mrb_state *mrb, mrb_value obj) * call-seq: * Math.tanh() -> float * - * Computes the hyperbolic tangent of x (expressed in + * Computes the hyperbolic tangent of *x* (expressed in * radians). */ static mrb_value math_tanh(mrb_state *mrb, mrb_value obj) { - mrb_float x; - - mrb_get_args(mrb, "f", &x); - x = tanh(x); - + mrb_float x = tanh(get_float_arg(mrb)); return mrb_float_value(mrb, x); } @@ -388,17 +359,12 @@ math_tanh(mrb_state *mrb, mrb_value obj) * call-seq: * Math.asinh(x) -> float * - * Computes the inverse hyperbolic sine of x. + * Computes the inverse hyperbolic sine of *x*. */ static mrb_value math_asinh(mrb_state *mrb, mrb_value obj) { - mrb_float x; - - mrb_get_args(mrb, "f", &x); - - x = asinh(x); - + mrb_float x = asinh(get_float_arg(mrb)); return mrb_float_value(mrb, x); } @@ -406,14 +372,13 @@ math_asinh(mrb_state *mrb, mrb_value obj) * call-seq: * Math.acosh(x) -> float * - * Computes the inverse hyperbolic cosine of x. + * Computes the inverse hyperbolic cosine of *x*. */ static mrb_value math_acosh(mrb_state *mrb, mrb_value obj) { - mrb_float x; + mrb_float x = get_float_arg(mrb); - mrb_get_args(mrb, "f", &x); if (x < 1.0) { domain_error(mrb, "acosh"); } @@ -426,14 +391,13 @@ math_acosh(mrb_state *mrb, mrb_value obj) * call-seq: * Math.atanh(x) -> float * - * Computes the inverse hyperbolic tangent of x. + * Computes the inverse hyperbolic tangent of *x*. */ static mrb_value math_atanh(mrb_state *mrb, mrb_value obj) { - mrb_float x; + mrb_float x = get_float_arg(mrb); - mrb_get_args(mrb, "f", &x); if (x < -1.0 || x > 1.0) { domain_error(mrb, "atanh"); } @@ -460,11 +424,37 @@ math_atanh(mrb_state *mrb, mrb_value obj) static mrb_value math_exp(mrb_state *mrb, mrb_value obj) { - mrb_float x; + mrb_float x = exp(get_float_arg(mrb)); + return mrb_float_value(mrb, x); +} - mrb_get_args(mrb, "f", &x); - x = exp(x); +/* + * call-seq: + * Math.expm1(x) -> float + * + * Returns exp(x) - 1. + */ +static mrb_value +math_expm1(mrb_state *mrb, mrb_value obj) +{ + mrb_float x = expm1(get_float_arg(mrb)); + return mrb_float_value(mrb, x); +} +/* + * call-seq: + * Math.log1p(x) -> float + * + * Returns log(1 + x). + */ +static mrb_value +math_log1p(mrb_state *mrb, mrb_value obj) +{ + mrb_float x = get_float_arg(mrb); + if (x < -1.0) { + domain_error(mrb, "log1p"); + } + x = log1p(x); return mrb_float_value(mrb, x); } @@ -473,7 +463,7 @@ math_exp(mrb_state *mrb, mrb_value obj) * Math.log(numeric) -> float * Math.log(num,base) -> float * - * Returns the natural logarithm of numeric. + * Returns the natural logarithm of *numeric*. * If additional second argument is given, it will be the base * of logarithm. * @@ -487,9 +477,8 @@ static mrb_value math_log(mrb_state *mrb, mrb_value obj) { mrb_float x, base; - mrb_int argc; + mrb_int argc = mrb_get_args(mrb, "f|f", &x, &base); - argc = mrb_get_args(mrb, "f|f", &x, &base); if (x < 0.0) { domain_error(mrb, "log"); } @@ -507,7 +496,7 @@ math_log(mrb_state *mrb, mrb_value obj) * call-seq: * Math.log2(numeric) -> float * - * Returns the base 2 logarithm of numeric. + * Returns the base 2 logarithm of *numeric*. * * Math.log2(1) #=> 0.0 * Math.log2(2) #=> 1.0 @@ -518,9 +507,8 @@ math_log(mrb_state *mrb, mrb_value obj) static mrb_value math_log2(mrb_state *mrb, mrb_value obj) { - mrb_float x; + mrb_float x = get_float_arg(mrb); - mrb_get_args(mrb, "f", &x); if (x < 0.0) { domain_error(mrb, "log2"); } @@ -533,7 +521,7 @@ math_log2(mrb_state *mrb, mrb_value obj) * call-seq: * Math.log10(numeric) -> float * - * Returns the base 10 logarithm of numeric. + * Returns the base 10 logarithm of *numeric*. * * Math.log10(1) #=> 0.0 * Math.log10(10) #=> 1.0 @@ -543,9 +531,8 @@ math_log2(mrb_state *mrb, mrb_value obj) static mrb_value math_log10(mrb_state *mrb, mrb_value obj) { - mrb_float x; + mrb_float x = get_float_arg(mrb); - mrb_get_args(mrb, "f", &x); if (x < 0.0) { domain_error(mrb, "log10"); } @@ -558,15 +545,14 @@ math_log10(mrb_state *mrb, mrb_value obj) * call-seq: * Math.sqrt(numeric) -> float * - * Returns the square root of numeric. + * Returns the square root of *numeric*. * */ static mrb_value math_sqrt(mrb_state *mrb, mrb_value obj) { - mrb_float x; + mrb_float x = get_float_arg(mrb); - mrb_get_args(mrb, "f", &x); if (x < 0.0) { domain_error(mrb, "sqrt"); } @@ -580,7 +566,7 @@ math_sqrt(mrb_state *mrb, mrb_value obj) * call-seq: * Math.cbrt(numeric) -> float * - * Returns the cube root of numeric. + * Returns the cube root of *numeric*. * * -9.upto(9) {|x| * p [x, Math.cbrt(x), Math.cbrt(x)**3] @@ -610,11 +596,7 @@ math_sqrt(mrb_state *mrb, mrb_value obj) static mrb_value math_cbrt(mrb_state *mrb, mrb_value obj) { - mrb_float x; - - mrb_get_args(mrb, "f", &x); - x = cbrt(x); - + mrb_float x = cbrt(get_float_arg(mrb)); return mrb_float_value(mrb, x); } @@ -624,8 +606,8 @@ math_cbrt(mrb_state *mrb, mrb_value obj) * Math.frexp(numeric) -> [ fraction, exponent ] * * Returns a two-element array containing the normalized fraction (a - * Float) and exponent (a Integer) of - * numeric. + * `Float`) and exponent (a `Integer`) of + * *numeric*. * * fraction, exponent = Math.frexp(1234) #=> [0.6025390625, 11] * fraction * 2**exponent #=> 1234.0 @@ -633,10 +615,9 @@ math_cbrt(mrb_state *mrb, mrb_value obj) static mrb_value math_frexp(mrb_state *mrb, mrb_value obj) { - mrb_float x; + mrb_float x = get_float_arg(mrb); int exp; - mrb_get_args(mrb, "f", &x); x = frexp(x, &exp); return mrb_assoc_new(mrb, mrb_float_value(mrb, x), mrb_fixnum_value(exp)); @@ -646,7 +627,7 @@ math_frexp(mrb_state *mrb, mrb_value obj) * call-seq: * Math.ldexp(flt, int) -> float * - * Returns the value of flt*(2**int). + * Returns the value of *flt**(2***int*). * * fraction, exponent = Math.frexp(1234) * Math.ldexp(fraction, exponent) #=> 1234.0 @@ -668,7 +649,7 @@ math_ldexp(mrb_state *mrb, mrb_value obj) * Math.hypot(x, y) -> float * * Returns sqrt(x**2 + y**2), the hypotenuse of a right-angled triangle - * with sides x and y. + * with sides *x* and *y*. * * Math.hypot(3, 4) #=> 5.0 */ @@ -692,11 +673,7 @@ math_hypot(mrb_state *mrb, mrb_value obj) static mrb_value math_erf(mrb_state *mrb, mrb_value obj) { - mrb_float x; - - mrb_get_args(mrb, "f", &x); - x = erf(x); - + mrb_float x = erf(get_float_arg(mrb)); return mrb_float_value(mrb, x); } @@ -710,11 +687,7 @@ math_erf(mrb_state *mrb, mrb_value obj) static mrb_value math_erfc(mrb_state *mrb, mrb_value obj) { - mrb_float x; - - mrb_get_args(mrb, "f", &x); - x = erfc(x); - + mrb_float x = erfc(get_float_arg(mrb)); return mrb_float_value(mrb, x); } @@ -722,54 +695,55 @@ math_erfc(mrb_state *mrb, mrb_value obj) void mrb_mruby_math_gem_init(mrb_state* mrb) { - struct RClass *mrb_math; - mrb_math = mrb_define_module(mrb, "Math"); + struct RClass *math = mrb_define_module_id(mrb, MRB_SYM(Math)); - mrb_define_class_under_id(mrb, mrb_math, MRB_SYM(DomainError), mrb->eStandardError_class); + mrb_define_class_under_id(mrb, math, MRB_SYM(DomainError), E_STANDARD_ERROR); #ifdef M_PI - mrb_define_const_id(mrb, mrb_math, MRB_SYM(PI), mrb_float_value(mrb, M_PI)); + mrb_define_const_id(mrb, math, MRB_SYM(PI), mrb_float_value(mrb, M_PI)); #else - mrb_define_const_id(mrb, mrb_math, MRB_SYM(PI), mrb_float_value(mrb, atan(1.0)*4.0)); + mrb_define_const_id(mrb, math, MRB_SYM(PI), mrb_float_value(mrb, atan(1.0)*4.0)); #endif #ifdef M_E - mrb_define_const_id(mrb, mrb_math, MRB_SYM(E), mrb_float_value(mrb, M_E)); + mrb_define_const_id(mrb, math, MRB_SYM(E), mrb_float_value(mrb, M_E)); #else - mrb_define_const_id(mrb, mrb_math, MRB_SYM(E), mrb_float_value(mrb, exp(1.0))); + mrb_define_const_id(mrb, math, MRB_SYM(E), mrb_float_value(mrb, exp(1.0))); #endif - mrb_define_module_function(mrb, mrb_math, "sin", math_sin, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "cos", math_cos, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "tan", math_tan, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(sin), math_sin, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(cos), math_cos, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(tan), math_tan, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "asin", math_asin, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "acos", math_acos, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "atan", math_atan, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "atan2", math_atan2, MRB_ARGS_REQ(2)); + mrb_define_module_function_id(mrb, math, MRB_SYM(asin), math_asin, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(acos), math_acos, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(atan), math_atan, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(atan2), math_atan2, MRB_ARGS_REQ(2)); - mrb_define_module_function(mrb, mrb_math, "sinh", math_sinh, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "cosh", math_cosh, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "tanh", math_tanh, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(sinh), math_sinh, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(cosh), math_cosh, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(tanh), math_tanh, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "asinh", math_asinh, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "acosh", math_acosh, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "atanh", math_atanh, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(asinh), math_asinh, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(acosh), math_acosh, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(atanh), math_atanh, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "exp", math_exp, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "log", math_log, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - mrb_define_module_function(mrb, mrb_math, "log2", math_log2, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "log10", math_log10, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "sqrt", math_sqrt, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "cbrt", math_cbrt, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(exp), math_exp, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(expm1), math_expm1, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(log1p), math_log1p, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(log), math_log, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(log2), math_log2, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(log10), math_log10, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(sqrt), math_sqrt, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(cbrt), math_cbrt, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "frexp", math_frexp, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "ldexp", math_ldexp, MRB_ARGS_REQ(2)); + mrb_define_module_function_id(mrb, math, MRB_SYM(frexp), math_frexp, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(ldexp), math_ldexp, MRB_ARGS_REQ(2)); - mrb_define_module_function(mrb, mrb_math, "hypot", math_hypot, MRB_ARGS_REQ(2)); + mrb_define_module_function_id(mrb, math, MRB_SYM(hypot), math_hypot, MRB_ARGS_REQ(2)); - mrb_define_module_function(mrb, mrb_math, "erf", math_erf, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, mrb_math, "erfc", math_erfc, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(erf), math_erf, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, math, MRB_SYM(erfc), math_erfc, MRB_ARGS_REQ(1)); } void diff --git a/mrbgems/mruby-metaprog/README.md b/mrbgems/mruby-metaprog/README.md new file mode 100644 index 0000000000..c164fd7c88 --- /dev/null +++ b/mrbgems/mruby-metaprog/README.md @@ -0,0 +1,123 @@ +# mruby-metaprog + +This mrbgem provides a collection of methods for metaprogramming in mruby. Metaprogramming is the ability of a program to inspect, modify, and create its own code at runtime. + +## Functionality + +The `mruby-metaprog` mrbgem offers methods for: + +- **Instance Variable Manipulation:** + - `instance_variable_defined?(symbol)`: Checks if an instance variable is defined in an object. + - `instance_variable_get(symbol)`: Retrieves the value of an instance variable. + - `instance_variable_set(symbol, value)`: Sets the value of an instance variable. + - `instance_variables()`: Returns an array of instance variable names in an object. +- **Method Inspection:** + - `methods(regular=true)`: Returns a list of public and protected methods of an object. + - `private_methods(all=true)`: Returns a list of private methods accessible to an object. + - `protected_methods(all=true)`: Returns a list of protected methods accessible to an object. + - `public_methods(all=true)`: Returns a list of public methods accessible to an object. + - `singleton_methods(all=true)`: Returns an array of singleton method names for an object. +- **Singleton Method Definition:** + - `define_singleton_method(name, &block)`: Defines a singleton method for an object. +- **Class Variable Manipulation:** + - `class_variable_defined?(symbol)`: Checks if a class variable is defined in a module. + - `class_variable_get(symbol)`: Retrieves the value of a class variable. + - `class_variable_set(symbol, value)`: Sets the value of a class variable. + - `class_variables(inherit=true)`: Returns an array of class variable names in a module. + - `remove_class_variable(symbol)`: Removes a class variable from a module. +- **Module Inspection:** + - `included_modules()`: Returns a list of modules included in a module or class. + - `instance_methods(include_super=true)`: Returns an array of public and protected instance methods in a module or class. + - `public_instance_methods(include_super=true)`: Returns an array of public instance methods in a module or class. + - `private_instance_methods(include_super=true)`: Returns an array of private instance methods in a module or class. + - `protected_instance_methods(include_super=true)`: Returns an array of protected instance methods in a module or class. + - `undefined_instance_methods()`: Returns an array of undefined instance methods in a module/class. +- **Method Removal:** + - `remove_method(symbol)`: Removes a method from a class. +- **Constant Inspection:** + - `constants(inherit=true)`: Returns an array of constant names defined in a module. +- **Module Nesting:** + - `Module.nesting()`: Returns an array representing the current module nesting. +- **Message Sending:** + - `send(symbol, *args, &block)`: Invokes a method on an object. + - `public_send(symbol, *args, &block)`: Invokes a public method on an object. +- **Variable Inspection:** + - `global_variables()`: Returns an array of global variable names. + - `local_variables()`: Returns an array of local variable names in the current scope. + +## Examples + +### Working with Instance Variables + +```ruby +class MyClass + def initialize(value) + @my_var = value + end +end + +obj = MyClass.new(10) + +p obj.instance_variable_defined?(:@my_var) #=> true +p obj.instance_variable_get(:@my_var) #=> 10 +obj.instance_variable_set(:@another_var, "hello") +p obj.instance_variables #=> [:@my_var, :@another_var] +``` + +### Inspecting Methods + +```ruby +class AnotherClass + def public_method; end + protected + def protected_method; end + private + def private_method; end +end + +obj = AnotherClass.new +p obj.public_methods(false) #=> [:public_method] +p obj.protected_methods(false) #=> [:protected_method] +p obj.private_methods(false) #=> [:private_method] + +def obj.singleton_method_example; end +p obj.singleton_methods #=> [:singleton_method_example] +``` + +### Defining Singleton Methods + +```ruby +obj = Object.new +obj.define_singleton_method(:greet) do |name| + "Hello, #{name}!" +end +p obj.greet("World") #=> "Hello, World!" +``` + +### Working with Class Variables + +```ruby +class ParentClass + @@parent_cvar = 100 +end + +class ChildClass < ParentClass + @@child_cvar = 200 + + def self.get_parent_cvar + class_variable_get(:@@parent_cvar) + end +end + +p ChildClass.class_variable_defined?(:@@child_cvar) #=> true +p ChildClass.class_variable_get(:@@child_cvar) #=> 200 +ChildClass.class_variable_set(:@@new_cvar, 300) +p ChildClass.class_variables.sort #=> [:@@child_cvar, :@@new_cvar, :@@parent_cvar] (order may vary) +p ChildClass.get_parent_cvar #=> 100 +ChildClass.remove_class_variable(:@@new_cvar) +p ChildClass.class_variables.sort #=> [:@@child_cvar, :@@parent_cvar] (order may vary) +``` + +## License + +This mrbgem is licensed under the MIT License. diff --git a/mrbgems/mruby-metaprog/src/metaprog.c b/mrbgems/mruby-metaprog/src/metaprog.c index 70da56c47f..ecddd96cb6 100644 --- a/mrbgems/mruby-metaprog/src/metaprog.c +++ b/mrbgems/mruby-metaprog/src/metaprog.c @@ -1,28 +1,15 @@ -#include "mruby.h" -#include "mruby/array.h" -#include "mruby/hash.h" -#include "mruby/variable.h" -#include "mruby/proc.h" -#include "mruby/class.h" -#include "mruby/string.h" +#include +#include +#include +#include +#include +#include +#include #include -#include "mruby/presym.h" - -typedef enum { - NOEX_PUBLIC = 0x00, - NOEX_NOSUPER = 0x01, - NOEX_PRIVATE = 0x02, - NOEX_PROTECTED = 0x04, - NOEX_MASK = 0x06, - NOEX_BASIC = 0x08, - NOEX_UNDEF = NOEX_NOSUPER, - NOEX_MODFUNC = 0x12, - NOEX_SUPER = 0x20, - NOEX_VCALL = 0x40, - NOEX_RESPONDS = 0x80 -} mrb_method_flag_t; - -mrb_value mrb_proc_local_variables(mrb_state *mrb, const struct RProc *proc); +#include + +#define MT_PROTECTED MRB_METHOD_PROTECTED_FL +#define MT_NOPRIV (MRB_METHOD_PRIVATE_FL|MT_PROTECTED) static mrb_value mrb_f_nil(mrb_state *mrb, mrb_value cv) @@ -35,8 +22,8 @@ mrb_f_nil(mrb_state *mrb, mrb_value cv) * call-seq: * obj.instance_variable_defined?(symbol) -> true or false * - * Returns true if the given instance variable is - * defined in obj. + * Returns `true` if the given instance variable is + * defined in *obj*. * * class Fred * def initialize(p1, p2) @@ -64,9 +51,9 @@ mrb_obj_ivar_defined(mrb_state *mrb, mrb_value self) * obj.instance_variable_get(symbol) -> obj * * Returns the value of the given instance variable, or nil if the - * instance variable is not set. The @ part of the + * instance variable is not set. The `@` part of the * variable name should be included for regular instance - * variables. Throws a NameError exception if the + * variables. Throws a `NameError` exception if the * supplied symbol is not valid as an instance variable name. * * class Fred @@ -93,8 +80,8 @@ mrb_obj_ivar_get(mrb_state *mrb, mrb_value self) * call-seq: * obj.instance_variable_set(symbol, obj) -> obj * - * Sets the instance variable names by symbol to - * object, thereby frustrating the efforts of the class's + * Sets the instance variable names by *symbol* to + * *object*, thereby frustrating the efforts of the class's * author to attempt to provide proper encapsulation. The variable * did not have to exist prior to this call. * @@ -139,70 +126,62 @@ mrb_local_variables(mrb_state *mrb, mrb_value self) return mrb_proc_local_variables(mrb, mrb->c->ci[-1].proc); } -KHASH_DECLARE(st, mrb_sym, char, FALSE) -KHASH_DEFINE(st, mrb_sym, char, FALSE, kh_int_hash_func, kh_int_hash_equal) +KHASH_DECLARE(st, mrb_sym, char, TRUE) +KHASH_DEFINE(st, mrb_sym, char, TRUE, kh_int_hash_func, kh_int_hash_equal) struct mt_set { + unsigned int visibility; khash_t(st) *set; - khash_t(st) *undef; }; +#define vicheck(flags, visi) (((visi)==MT_NOPRIV) ? (((flags)&MRB_METHOD_VISIBILITY_MASK)!=MRB_METHOD_PRIVATE_FL) : (((flags)&MRB_METHOD_VISIBILITY_MASK)==(visi))) + static int method_entry_i(mrb_state *mrb, mrb_sym mid, mrb_method_t m, void *p) { struct mt_set *s = (struct mt_set*)p; - if (MRB_METHOD_UNDEF_P(m)) { - if (s->undef) { - kh_put(st, mrb, s->undef, mid); - } - } - else if (s->undef == NULL || - kh_get(st, mrb, s->undef, mid) == kh_end(s->undef)) { - kh_put(st, mrb, s->set, mid); + if (vicheck(m.flags, s->visibility) && kh_get(st, mrb, s->set, mid) == kh_end(s->set)) { + khint_t k = kh_put(st, mrb, s->set, mid); + kh_val(st, s->set, k) = !MRB_METHOD_UNDEF_P(m); } return 0; } static void -method_entry_loop(mrb_state *mrb, struct RClass *klass, khash_t(st) *set, khash_t(st) *undef) +method_entry_loop(mrb_state *mrb, struct RClass *klass, khash_t(st) *set, unsigned int visibility) { - struct mt_set s; + struct mt_set s = {visibility, set}; - s.set = set; - s.undef = undef; mrb_mt_foreach(mrb, klass, method_entry_i, (void*)&s); } static mrb_value -mrb_class_instance_method_list(mrb_state *mrb, mrb_bool recur, struct RClass *klass) +mrb_class_instance_method_list(mrb_state *mrb, mrb_bool recur, struct RClass *klass, unsigned int flags) { mrb_value ary; - struct RClass *oldklass; khash_t(st) *set = kh_init(st, mrb); if (!recur) { if (klass->flags & MRB_FL_CLASS_IS_PREPENDED) { MRB_CLASS_ORIGIN(klass); } - method_entry_loop(mrb, klass, set, NULL); + method_entry_loop(mrb, klass, set, flags); } else { - khash_t(st) *undef = kh_init(st, mrb); + struct RClass *oldklass = NULL; - oldklass = NULL; while (klass && (klass != oldklass)) { - method_entry_loop(mrb, klass, set, undef); + method_entry_loop(mrb, klass, set, flags); oldklass = klass; klass = klass->super; } - kh_destroy(st, mrb, undef); } ary = mrb_ary_new_capa(mrb, kh_size(set)); - for (khint_t i=0; i array + * obj.methods(regular=true) -> array * - * Returns a list of the names of methods publicly accessible in - * obj. This will include all the methods accessible in - * obj's ancestors. + * Returns a list of the names of public and protected methods of + * `obj`. This will include all the methods accessible in + * `obj`'s ancestors. + * If the optional parameter is `false`, it + * returns an array of `obj`'s public and protected singleton methods, + * the array will not include methods in modules included in `obj`. * * class Klass * def kMethod() @@ -237,9 +223,7 @@ mrb_obj_methods(mrb_state *mrb, mrb_bool recur, mrb_value obj, mrb_method_flag_t static mrb_value mrb_obj_methods_m(mrb_state *mrb, mrb_value self) { - mrb_bool recur = TRUE; - mrb_get_args(mrb, "|b", &recur); - return mrb_obj_methods(mrb, recur, self, (mrb_method_flag_t)0); /* everything but private */ + return mrb_obj_methods(mrb, self, MT_NOPRIV); } /* 15.3.1.3.36 */ @@ -247,16 +231,14 @@ mrb_obj_methods_m(mrb_state *mrb, mrb_value self) * call-seq: * obj.private_methods(all=true) -> array * - * Returns the list of private methods accessible to obj. If - * the all parameter is set to false, only those methods + * Returns the list of private methods accessible to *obj*. If + * the *all* parameter is set to `false`, only those methods * in the receiver will be listed. */ static mrb_value mrb_obj_private_methods(mrb_state *mrb, mrb_value self) { - mrb_bool recur = TRUE; - mrb_get_args(mrb, "|b", &recur); - return mrb_obj_methods(mrb, recur, self, NOEX_PRIVATE); /* private attribute not define */ + return mrb_obj_methods(mrb, self, MRB_METHOD_PRIVATE_FL); } /* 15.3.1.3.37 */ @@ -264,16 +246,14 @@ mrb_obj_private_methods(mrb_state *mrb, mrb_value self) * call-seq: * obj.protected_methods(all=true) -> array * - * Returns the list of protected methods accessible to obj. If - * the all parameter is set to false, only those methods + * Returns the list of protected methods accessible to *obj*. If + * the *all* parameter is set to `false`, only those methods * in the receiver will be listed. */ static mrb_value mrb_obj_protected_methods(mrb_state *mrb, mrb_value self) { - mrb_bool recur = TRUE; - mrb_get_args(mrb, "|b", &recur); - return mrb_obj_methods(mrb, recur, self, NOEX_PROTECTED); /* protected attribute not define */ + return mrb_obj_methods(mrb, self, MT_PROTECTED); } /* 15.3.1.3.38 */ @@ -281,48 +261,41 @@ mrb_obj_protected_methods(mrb_state *mrb, mrb_value self) * call-seq: * obj.public_methods(all=true) -> array * - * Returns the list of public methods accessible to obj. If - * the all parameter is set to false, only those methods + * Returns the list of public methods accessible to *obj*. If + * the *all* parameter is set to `false`, only those methods * in the receiver will be listed. */ static mrb_value mrb_obj_public_methods(mrb_state *mrb, mrb_value self) { - mrb_bool recur = TRUE; - mrb_get_args(mrb, "|b", &recur); - return mrb_obj_methods(mrb, recur, self, NOEX_PUBLIC); /* public attribute not define */ + return mrb_obj_methods(mrb, self, MRB_MT_PUBLIC); } static mrb_value mrb_obj_singleton_methods(mrb_state *mrb, mrb_bool recur, mrb_value obj) { - khint_t i; mrb_value ary; struct RClass *klass; khash_t(st) *set = kh_init(st, mrb); - khash_t(st) *undef = (recur ? kh_init(st, mrb) : NULL); klass = mrb_class(mrb, obj); if (klass && (klass->tt == MRB_TT_SCLASS)) { - method_entry_loop(mrb, klass, set, undef); + method_entry_loop(mrb, klass, set, MRB_MT_PUBLIC); klass = klass->super; } if (recur) { while (klass && ((klass->tt == MRB_TT_SCLASS) || (klass->tt == MRB_TT_ICLASS))) { - method_entry_loop(mrb, klass, set, undef); + method_entry_loop(mrb, klass, set, MRB_MT_PUBLIC); klass = klass->super; } } ary = mrb_ary_new(mrb); - for (i=0;i array * - * Returns an array of the names of singleton methods for obj. - * If the optional all parameter is true, the list will include - * methods in modules included in obj. + * Returns an array of the names of singleton methods for *obj*. + * If the optional *all* parameter is true, the list will include + * methods in modules included in *obj*. * Only public and protected singleton methods are returned. * * module Other @@ -398,7 +371,7 @@ check_cv_name_sym(mrb_state *mrb, mrb_sym id) * call-seq: * remove_class_variable(sym) -> obj * - * Removes the definition of the sym, returning that + * Removes the definition of the *sym*, returning that * constant's value. * * class Dummy @@ -443,8 +416,8 @@ mrb_mod_remove_cvar(mrb_state *mrb, mrb_value mod) * call-seq: * obj.class_variable_defined?(symbol) -> true or false * - * Returns true if the given class variable is defined - * in obj. + * Returns `true` if the given class variable is defined + * in *obj*. * * class Fred * @@foo = 99 @@ -469,7 +442,7 @@ mrb_mod_cvar_defined(mrb_state *mrb, mrb_value mod) * mod.class_variable_get(symbol) -> obj * * Returns the value of the given class variable (or throws a - * NameError exception). The @@ part of the + * `NameError` exception). The `@@` part of the * variable name should be included for regular class variables * * class Fred @@ -493,8 +466,8 @@ mrb_mod_cvar_get(mrb_state *mrb, mrb_value mod) * call-seq: * obj.class_variable_set(symbol, obj) -> obj * - * Sets the class variable names by symbol to - * object. + * Sets the class variable names by *symbol* to + * *object*. * * class Fred * @@foo = 99 @@ -539,6 +512,15 @@ mrb_mod_included_modules(mrb_state *mrb, mrb_value self) return result; } +static mrb_value +mod_instance_methods(mrb_state *mrb, mrb_value mod, unsigned int visibility) +{ + struct RClass *c = mrb_class_ptr(mod); + mrb_bool recur = TRUE; + mrb_get_args(mrb, "|b", &recur); + return mrb_class_instance_method_list(mrb, recur, c, visibility); +} + /* 15.2.2.4.33 */ /* * call-seq: @@ -547,9 +529,9 @@ mrb_mod_included_modules(mrb_state *mrb, mrb_value self) * Returns an array containing the names of the public and protected instance * methods in the receiver. For a module, these are the public and protected methods; * for a class, they are the instance (not singleton) methods. With no - * argument, or with an argument that is false, the - * instance methods in mod are returned, otherwise the methods - * in mod and mod's superclasses are returned. + * argument, or with an argument that is `false`, the + * instance methods in *mod* are returned, otherwise the methods + * in *mod* and *mod*'s superclasses are returned. * * module A * def method1() end @@ -570,10 +552,25 @@ mrb_mod_included_modules(mrb_state *mrb, mrb_value self) static mrb_value mrb_mod_instance_methods(mrb_state *mrb, mrb_value mod) { - struct RClass *c = mrb_class_ptr(mod); - mrb_bool recur = TRUE; - mrb_get_args(mrb, "|b", &recur); - return mrb_class_instance_method_list(mrb, recur, c); + return mod_instance_methods(mrb, mod, MT_NOPRIV); +} + +static mrb_value +mrb_mod_public_instance_methods(mrb_state *mrb, mrb_value mod) +{ + return mod_instance_methods(mrb, mod, MRB_MT_PUBLIC); +} + +static mrb_value +mrb_mod_private_instance_methods(mrb_state *mrb, mrb_value mod) +{ + return mod_instance_methods(mrb, mod, MRB_METHOD_PRIVATE_FL); +} + +static mrb_value +mrb_mod_protected_instance_methods(mrb_state *mrb, mrb_value mod) +{ + return mod_instance_methods(mrb, mod, MT_PROTECTED); } static int @@ -615,7 +612,7 @@ mrb_mod_undefined_methods(mrb_state *mrb, mrb_value mod) * remove_method(symbol) -> self * * Removes the method identified by _symbol_ from the current - * class. For an example, see Module.undef_method. + * class. For an example, see `Module.undef_method`. */ static mrb_value @@ -626,10 +623,11 @@ mrb_mod_remove_method(mrb_state *mrb, mrb_value mod) struct RClass *c = mrb_class_ptr(mod); mrb_get_args(mrb, "*", &argv, &argc); - mrb_check_frozen(mrb, mrb_obj_ptr(mod)); + mrb_check_frozen(mrb, c); + int ai = mrb_gc_arena_save(mrb); while (argc--) { mrb_remove_method(mrb, c, mrb_obj_to_sym(mrb, *argv)); - mrb_funcall_id(mrb, mod, MRB_SYM(method_removed), 1, *argv); + mrb_gc_arena_restore(mrb, ai); argv++; } return mod; @@ -638,8 +636,29 @@ mrb_mod_remove_method(mrb_state *mrb, mrb_value mod) static mrb_value mrb_mod_s_constants(mrb_state *mrb, mrb_value mod) { - mrb_raise(mrb, E_NOTIMP_ERROR, "Module.constants not implemented"); - return mrb_nil_value(); /* not reached */ + if (mrb_get_argc(mrb) > 0 || mrb_class_ptr(mod) != mrb->module_class) { + return mrb_mod_constants(mrb, mod); + } + + const struct RProc *proc = mrb->c->ci[-1].proc; + struct RClass *c = MRB_PROC_TARGET_CLASS(proc); + mrb_value ary = mrb_ary_new(mrb); + + if (!c) c = mrb->object_class; + mrb_mod_const_at(mrb, c, ary); + proc = proc->upper; + while (proc) { + struct RClass *c2 = MRB_PROC_TARGET_CLASS(proc); + if (!c2) c2 = mrb->object_class; + mrb_mod_const_at(mrb, c2, ary); + proc = proc->upper; + } + while (c) { + mrb_mod_const_at(mrb, c, ary); + c = c->super; + if (c == mrb->object_class) break; + } + return ary; } static mrb_value @@ -665,8 +684,41 @@ mrb_mod_s_nesting(mrb_state *mrb, mrb_value mod) return ary; } -/* implementation of #send method */ -mrb_value mrb_f_send(mrb_state *mrb, mrb_value self); +/* ---------------------------*/ +static const mrb_mt_entry metaprog_krn_rom_entries[] = { + MRB_MT_ENTRY(mrb_f_global_variables, MRB_SYM(global_variables), MRB_ARGS_NONE() | MRB_MT_PRIVATE), /* 15.3.1.3.14 (15.3.1.2.4) */ + MRB_MT_ENTRY(mrb_local_variables, MRB_SYM(local_variables), MRB_ARGS_NONE() | MRB_MT_PRIVATE), /* 15.3.1.3.28 (15.3.1.2.7) */ + MRB_MT_ENTRY(mrb_singleton_class, MRB_SYM(singleton_class), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_obj_ivar_defined, MRB_SYM_Q(instance_variable_defined), MRB_ARGS_REQ(1)), /* 15.3.1.3.20 */ + MRB_MT_ENTRY(mrb_obj_ivar_get, MRB_SYM(instance_variable_get), MRB_ARGS_REQ(1)), /* 15.3.1.3.21 */ + MRB_MT_ENTRY(mrb_obj_ivar_set, MRB_SYM(instance_variable_set), MRB_ARGS_REQ(2)), /* 15.3.1.3.22 */ + MRB_MT_ENTRY(mrb_obj_instance_variables, MRB_SYM(instance_variables), MRB_ARGS_NONE()), /* 15.3.1.3.23 */ + MRB_MT_ENTRY(mrb_obj_methods_m, MRB_SYM(methods), MRB_ARGS_OPT(1)), /* 15.3.1.3.31 */ + MRB_MT_ENTRY(mrb_obj_private_methods, MRB_SYM(private_methods), MRB_ARGS_OPT(1)), /* 15.3.1.3.36 */ + MRB_MT_ENTRY(mrb_obj_protected_methods, MRB_SYM(protected_methods), MRB_ARGS_OPT(1)), /* 15.3.1.3.37 */ + MRB_MT_ENTRY(mrb_obj_public_methods, MRB_SYM(public_methods), MRB_ARGS_OPT(1)), /* 15.3.1.3.38 */ + MRB_MT_ENTRY(mrb_obj_singleton_methods_m, MRB_SYM(singleton_methods), MRB_ARGS_OPT(1)), /* 15.3.1.3.45 */ + MRB_MT_ENTRY(mod_define_singleton_method, MRB_SYM(define_singleton_method), MRB_ARGS_REQ(1)|MRB_ARGS_BLOCK()), + MRB_MT_ENTRY(mrb_f_send, MRB_SYM(send), MRB_ARGS_REQ(1)|MRB_ARGS_REST()|MRB_ARGS_BLOCK()), /* 15.3.1.3.44 */ + MRB_MT_ENTRY(mrb_f_public_send, MRB_SYM(public_send), MRB_ARGS_REQ(1)|MRB_ARGS_REST()|MRB_ARGS_BLOCK()), +}; + +static const mrb_mt_entry metaprog_mod_rom_entries[] = { + MRB_MT_ENTRY(mrb_mod_class_variables, MRB_SYM(class_variables), MRB_ARGS_OPT(1)), /* 15.2.2.4.19 */ + MRB_MT_ENTRY(mrb_mod_remove_cvar, MRB_SYM(remove_class_variable), MRB_ARGS_REQ(1)), /* 15.2.2.4.39 */ + MRB_MT_ENTRY(mrb_mod_cvar_defined, MRB_SYM_Q(class_variable_defined), MRB_ARGS_REQ(1)), /* 15.2.2.4.16 */ + MRB_MT_ENTRY(mrb_mod_cvar_get, MRB_SYM(class_variable_get), MRB_ARGS_REQ(1)), /* 15.2.2.4.17 */ + MRB_MT_ENTRY(mrb_mod_cvar_set, MRB_SYM(class_variable_set), MRB_ARGS_REQ(2)), /* 15.2.2.4.18 */ + MRB_MT_ENTRY(mrb_mod_included_modules, MRB_SYM(included_modules), MRB_ARGS_NONE()), /* 15.2.2.4.30 */ + MRB_MT_ENTRY(mrb_mod_instance_methods, MRB_SYM(instance_methods), MRB_ARGS_ANY()), /* 15.2.2.4.33 */ + MRB_MT_ENTRY(mrb_mod_public_instance_methods, MRB_SYM(public_instance_methods), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(mrb_mod_private_instance_methods, MRB_SYM(private_instance_methods), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(mrb_mod_protected_instance_methods, MRB_SYM(protected_instance_methods), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(mrb_mod_undefined_methods, MRB_SYM(undefined_instance_methods), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_mod_remove_method, MRB_SYM(remove_method), MRB_ARGS_ANY()), /* 15.2.2.4.41 */ + MRB_MT_ENTRY(mrb_f_nil, MRB_SYM(method_removed), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_mod_constants, MRB_SYM(constants), MRB_ARGS_OPT(1)), /* 15.2.2.4.24 */ +}; void mrb_mruby_metaprog_gem_init(mrb_state* mrb) @@ -674,35 +726,10 @@ mrb_mruby_metaprog_gem_init(mrb_state* mrb) struct RClass *krn = mrb->kernel_module; struct RClass *mod = mrb->module_class; - mrb_define_method(mrb, krn, "global_variables", mrb_f_global_variables, MRB_ARGS_NONE()); /* 15.3.1.3.14 (15.3.1.2.4) */ - mrb_define_method(mrb, krn, "local_variables", mrb_local_variables, MRB_ARGS_NONE()); /* 15.3.1.3.28 (15.3.1.2.7) */ - - mrb_define_method(mrb, krn, "singleton_class", mrb_singleton_class, MRB_ARGS_NONE()); - mrb_define_method(mrb, krn, "instance_variable_defined?", mrb_obj_ivar_defined, MRB_ARGS_REQ(1)); /* 15.3.1.3.20 */ - mrb_define_method(mrb, krn, "instance_variable_get", mrb_obj_ivar_get, MRB_ARGS_REQ(1)); /* 15.3.1.3.21 */ - mrb_define_method(mrb, krn, "instance_variable_set", mrb_obj_ivar_set, MRB_ARGS_REQ(2)); /* 15.3.1.3.22 */ - mrb_define_method(mrb, krn, "instance_variables", mrb_obj_instance_variables, MRB_ARGS_NONE()); /* 15.3.1.3.23 */ - mrb_define_method(mrb, krn, "methods", mrb_obj_methods_m, MRB_ARGS_OPT(1)); /* 15.3.1.3.31 */ - mrb_define_method(mrb, krn, "private_methods", mrb_obj_private_methods, MRB_ARGS_OPT(1)); /* 15.3.1.3.36 */ - mrb_define_method(mrb, krn, "protected_methods", mrb_obj_protected_methods, MRB_ARGS_OPT(1)); /* 15.3.1.3.37 */ - mrb_define_method(mrb, krn, "public_methods", mrb_obj_public_methods, MRB_ARGS_OPT(1)); /* 15.3.1.3.38 */ - mrb_define_method(mrb, krn, "singleton_methods", mrb_obj_singleton_methods_m, MRB_ARGS_OPT(1)); /* 15.3.1.3.45 */ - mrb_define_method(mrb, krn, "define_singleton_method", mod_define_singleton_method, MRB_ARGS_REQ(1)|MRB_ARGS_BLOCK()); - mrb_define_method(mrb, krn, "send", mrb_f_send, MRB_ARGS_REQ(1)|MRB_ARGS_REST()|MRB_ARGS_BLOCK()); /* 15.3.1.3.44 */ - - mrb_define_method(mrb, mod, "class_variables", mrb_mod_class_variables, MRB_ARGS_OPT(1)); /* 15.2.2.4.19 */ - mrb_define_method(mrb, mod, "remove_class_variable", mrb_mod_remove_cvar, MRB_ARGS_REQ(1)); /* 15.2.2.4.39 */ - mrb_define_method(mrb, mod, "class_variable_defined?", mrb_mod_cvar_defined, MRB_ARGS_REQ(1)); /* 15.2.2.4.16 */ - mrb_define_method(mrb, mod, "class_variable_get", mrb_mod_cvar_get, MRB_ARGS_REQ(1)); /* 15.2.2.4.17 */ - mrb_define_method(mrb, mod, "class_variable_set", mrb_mod_cvar_set, MRB_ARGS_REQ(2)); /* 15.2.2.4.18 */ - mrb_define_method(mrb, mod, "included_modules", mrb_mod_included_modules, MRB_ARGS_NONE()); /* 15.2.2.4.30 */ - mrb_define_method(mrb, mod, "instance_methods", mrb_mod_instance_methods, MRB_ARGS_ANY()); /* 15.2.2.4.33 */ - mrb_define_method(mrb, mod, "undefined_instance_methods", mrb_mod_undefined_methods, MRB_ARGS_NONE()); - mrb_define_method(mrb, mod, "remove_method", mrb_mod_remove_method, MRB_ARGS_ANY()); /* 15.2.2.4.41 */ - mrb_define_method(mrb, mod, "method_removed", mrb_f_nil, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, mod, "constants", mrb_mod_constants, MRB_ARGS_OPT(1)); /* 15.2.2.4.24 */ - mrb_define_class_method(mrb, mod, "constants", mrb_mod_s_constants, MRB_ARGS_ANY()); /* 15.2.2.3.1 */ - mrb_define_class_method(mrb, mod, "nesting", mrb_mod_s_nesting, MRB_ARGS_NONE()); /* 15.2.2.3.2 */ + MRB_MT_INIT_ROM(mrb, krn, metaprog_krn_rom_entries); + MRB_MT_INIT_ROM(mrb, mod, metaprog_mod_rom_entries); + mrb_define_class_method_id(mrb, mod, MRB_SYM(constants), mrb_mod_s_constants, MRB_ARGS_ANY()); /* 15.2.2.3.1 */ + mrb_define_class_method_id(mrb, mod, MRB_SYM(nesting), mrb_mod_s_nesting, MRB_ARGS_NONE()); /* 15.2.2.3.2 */ } void diff --git a/mrbgems/mruby-metaprog/test/metaprog.rb b/mrbgems/mruby-metaprog/test/metaprog.rb index 4d26517025..3cb147a013 100644 --- a/mrbgems/mruby-metaprog/test/metaprog.rb +++ b/mrbgems/mruby-metaprog/test/metaprog.rb @@ -56,7 +56,7 @@ end ivars = o.instance_variables - assert_equal Array, ivars.class, + assert_equal Array, ivars.class assert_equal(2, ivars.size) assert_true ivars.include?(:@a) assert_true ivars.include?(:@b) @@ -70,29 +70,35 @@ assert('Kernel#private_methods', '15.3.1.3.36') do assert_equal Array, private_methods.class + c = Class.new do + private def foo + end + end + assert_equal [:foo], c.new.private_methods(false) end assert('Kernel#protected_methods', '15.3.1.3.37') do assert_equal Array, protected_methods.class + c = Class.new do + protected def foo + end + end + assert_equal [:foo], c.new.protected_methods(false) end assert('Kernel#public_methods', '15.3.1.3.38') do assert_equal Array, public_methods.class - class Foo + c = Class.new do def foo end end - assert_equal [:foo], Foo.new.public_methods(false) + assert_equal [:foo], c.new.public_methods(false) end assert('Kernel#singleton_methods', '15.3.1.3.45') do assert_equal singleton_methods.class, Array end -assert('Kernel.global_variables', '15.3.1.2.4') do - assert_equal Array, Kernel.global_variables.class -end - assert('Kernel#global_variables', '15.3.1.3.14') do variables1 = global_variables assert_equal Array, variables1.class @@ -104,6 +110,8 @@ def foo assert_equal(1, variables2.size - variables1.size) end +# Kernel.global_variables is not provided by mruby. '15.3.1.2.4' + assert('Kernel#local_variables', '15.3.1.3.28') do assert_equal Array, local_variables.class @@ -115,28 +123,7 @@ def local_var_list assert_equal [:a], local_var_list end -assert('Kernel.local_variables', '15.3.1.2.7') do - a, b = 0, 1 - a += b - - vars = Kernel.local_variables.sort - assert_equal [:a, :b, :vars], vars - - assert_equal [:a, :b, :c, :vars], Proc.new { |a, b| - c = 2 - # Kernel#local_variables: 15.3.1.3.28 - local_variables.sort - }.call(-1, -2) - - a = Object.new - def a.hoge(vars, *, **) - Proc.new { - x, y = 1, 2 - local_variables.sort - } - end - assert_equal([:vars, :x, :y]) { a.hoge(0).call } -end +# Kernel.local_variables is not provided by mruby. '15.3.1.2.7' assert('Kernel#define_singleton_method') do o = Object.new @@ -193,6 +180,18 @@ class Test4ClassVariableDefined assert_true Test4ClassVariableDefined.class_variable_defined?(:@@cv) assert_false Test4ClassVariableDefined.class_variable_defined?(:@@noexisting) assert_raise(NameError) { Test4ClassVariableDefined.class_variable_defined?("@@2") } + + # shared empty iv_tbl (include) + m = Module.new + c = Class.new{include m} + m.class_variable_set(:@@cv2, 2) + assert_true c.class_variable_defined?(:@@cv2) + + # shared empty iv_tbl (prepend) + m = Module.new + c = Class.new{prepend m} + m.class_variable_set(:@@cv2, 2) + assert_true c.class_variable_defined?(:@@cv2) end assert('Module#class_variable_get', '15.2.2.4.17') do @@ -205,6 +204,26 @@ class Test4ClassVariableGet %w[@@a? @@! @a a].each do |n| assert_raise(NameError) { Test4ClassVariableGet.class_variable_get(n) } end + + # shared empty iv_tbl (include) + m = Module.new + class Test4ClassVariableGet end + Test4ClassVariableGet.include m + m.class_variable_set(:@@cv2, 2) + assert_equal 2, Test4ClassVariableGet.class_variable_get(:@@cv2) + class Test4ClassVariableGet + assert_equal 2, @@cv2 + end + + # shared empty iv_tbl (prepend) + m = Module.new + class Test4ClassVariableGet end + Test4ClassVariableGet.prepend m + m.class_variable_set(:@@cv2, 2) + assert_equal 2, Test4ClassVariableGet.class_variable_get(:@@cv2) + class Test4ClassVariableGet + assert_equal 2, @@cv2 + end end assert('Module#class_variable_set', '15.2.2.4.18') do @@ -241,6 +260,18 @@ class Test4ClassVariables2 < Test4ClassVariables1 assert_equal [:@@var1], Test4ClassVariables1.class_variables assert_equal [:@@var2, :@@var1], Test4ClassVariables2.class_variables + + # shared empty iv_tbl (include) + m = Module.new + c = Class.new{include m} + m.class_variable_set(:@@var3, 3) + assert_equal [:@@var3], c.class_variables + + # shared empty iv_tbl (prepend) + m = Module.new + c = Class.new{prepend m} + m.class_variable_set(:@@var3, 3) + assert_equal [:@@var3], c.class_variables end assert('Module#constants', '15.2.2.4.24') do @@ -256,6 +287,18 @@ class TestB assert_equal [ :C ], TestA.constants assert_equal [ :C, :C2 ], $n + + # shared empty iv_tbl (include) + m = Module.new + TestC = Class.new{include m} + m::C3 = 1 + assert_equal [ :C3 ], TestC.constants + + # shared empty iv_tbl (prepend) + m = Module.new + TestC = Class.new{prepend m} + m::C3 = 1 + assert_equal [ :C3 ], TestC.constants end assert('Module#included_modules', '15.2.2.4.30') do @@ -272,13 +315,13 @@ module Test4includedModules2 assert('Module#instance_methods', '15.2.2.4.33') do module Test4InstanceMethodsA - def method1() end + def method1() end end class Test4InstanceMethodsB - def method2() end + def method2() end end class Test4InstanceMethodsC < Test4InstanceMethodsB - def method3() end + def method3() end end r = Test4InstanceMethodsC.instance_methods(true) diff --git a/mrbgems/mruby-method/README.md b/mrbgems/mruby-method/README.md index 41130bb821..8c78b83f04 100644 --- a/mrbgems/mruby-method/README.md +++ b/mrbgems/mruby-method/README.md @@ -1,6 +1,6 @@ # mruby-method -An implementation of class **Method** and **UnboundMethod** for mruby +The `mruby-method` mrbgem provides implementations for the `Method` and `UnboundMethod` classes. These classes allow for powerful introspection and manipulation of methods in mruby. This includes obtaining information like method name, owner, receiver, parameters, and source location, as well as features like creating method objects from existing methods, binding unbound methods to objects, and unbinding methods from their receivers. ```ruby p Enumerable.instance_method(:find_all).source_location @@ -22,38 +22,45 @@ end ## Kernel -* `Kernel#method` -* `Kernel#singleton_method` +- `Kernel#method(sym)`: Returns a Method object corresponding to the method identified by `sym` for the receiver. +- `Kernel#singleton_method(sym)`: Returns a Method object corresponding to the singleton method identified by `sym` for the receiver. ## Module -* `Module#instance_method` +- `Module#instance_method(sym)`: Returns an UnboundMethod object corresponding to the instance method identified by `sym` in the module/class. ## Method class -* `Method#name` -* `Method#call` -* `Method#super_method` -* `Method#arity` -* `Method#unbind` -* `Method#[]` -* `Method#owner` -* `Method#receiver` -* `Method#parameters` -* `Method#source_location` -* `Method#to_proc` +- `Method#name`: Returns the name of the method. +- `Method#owner`: Returns the class or module that defines the method. +- `Method#receiver`: Returns the object to which the method is bound. +- `Method#arity`: Returns an integer indicating the number of arguments accepted by the method. A negative number indicates optional arguments. +- `Method#parameters`: Returns an array of arrays, each describing a parameter (e.g., `[[:req, :foo]]`). +- `Method#source_location`: Returns a two-element array `[filename, line_number]` indicating where the method was defined in Ruby code, or `nil` if not available. (Requires debug mode). +- `Method#call(*args, **opts, &block)`: Invokes the method with the given arguments and block, returning the result. +- `Method#[](*args, **opts, &block)`: Alias for `call`. Invokes the method. +- `Method#super_method`: Returns a Method object representing the superclass's version of this method. Returns `nil` if no superclass method exists. +- `Method#unbind`: Returns an UnboundMethod object based on this method. +- `Method#to_proc`: Converts the Method object into a Proc object that can be called. +- `Method#<< (other_method_or_proc)`: Returns a new Proc that represents the composition of this method and another method or proc (`self(other(...))`). +- `Method#>> (other_method_or_proc)`: Returns a new Proc that represents the composition of another method or proc and this method (`other(self(...))`). +- `Method#== (other)` / `Method#eql?(other)`: Returns `true` if the method is the same as `other` (same receiver, owner, and proc/name). +- `Method#inspect`: Returns a string representation of the Method object, including its class, name, owner, and source location if available. (Alias for `to_s`). ## UnboundMethod class -* `UnboundMethod#name` -* `UnboundMethod#bind` -* `UnboundMethod#super_method` -* `UnboundMethod#arity` -* `UnboundMethod#owner` -* `UnboundMethod#parameters` -* `UnboundMethod#source_location` +- `UnboundMethod#name`: Returns the name of the method. +- `UnboundMethod#owner`: Returns the class or module that originally defined the method. +- `UnboundMethod#arity`: Returns an integer indicating the number of arguments accepted by the method. A negative number indicates optional arguments. +- `UnboundMethod#parameters`: Returns an array of arrays, each describing a parameter (e.g., `[[:req, :foo]]`). +- `UnboundMethod#source_location`: Returns a two-element array `[filename, line_number]` indicating where the method was defined in Ruby code, or `nil` if not available. (Requires debug mode). +- `UnboundMethod#bind(obj)`: Binds the UnboundMethod to `obj` and returns a Method object. Raises `TypeError` if `obj` is not an instance of the method's owner or its descendants. +- `UnboundMethod#bind_call(obj, *args, **opts, &block)`: Binds the UnboundMethod to `obj` and then calls it with the given arguments and block. +- `UnboundMethod#super_method`: Returns an UnboundMethod object representing the superclass's version of this method. Returns `nil` if no superclass method exists. +- `UnboundMethod#== (other)` / `UnboundMethod#eql?(other)`: Returns `true` if the unbound method is the same as `other` (same owner and proc/name). +- `UnboundMethod#inspect`: Returns a string representation of the UnboundMethod object, including its class, name, owner, and source location if available. (Alias for `to_s`). # See also -* -* +- +- diff --git a/mrbgems/mruby-method/mrblib/kernel.rb b/mrbgems/mruby-method/mrblib/kernel.rb deleted file mode 100644 index eb17df5a65..0000000000 --- a/mrbgems/mruby-method/mrblib/kernel.rb +++ /dev/null @@ -1,10 +0,0 @@ -module Kernel - def singleton_method(name) - m = method(name) - sc = (class < proc + # + # Returns a Proc object corresponding to this method. + # + # class Foo + # def bar + # "baz" + # end + # end + # + # m = Foo.new.method(:bar) + # p = m.to_proc + # p.call #=> "baz" + # + # # Can be used with &: + # %w[hello world].map(&:upcase) #=> ["HELLO", "WORLD"] + # def to_proc m = self - lambda { |*args, **opts, &b| + lambda {|*args, **opts, &b| m.call(*args, **opts, &b) } end + # + # call-seq: + # meth << other_proc -> proc + # + # Returns a proc that is the composition of this method and the given + # other_proc. The returned proc takes a variable number of arguments, + # calls other_proc with them then calls this method with the result. + # + # def f(x) + # x * x + # end + # + # def g(x) + # x + x + # end + # + # # (f << g).call(2) == f(g(2)) == f(4) == 16 + # p (method(:f) << method(:g)).call(2) #=> 16 + # def <<(other) ->(*args, **opts, &block) { call(other.call(*args, **opts, &block)) } end + # + # call-seq: + # meth >> other_proc -> proc + # + # Returns a proc that is the composition of this method and the given + # other_proc. The returned proc takes a variable number of arguments, + # calls this method with them then calls other_proc with the result. + # + # def f(x) + # x * x + # end + # + # def g(x) + # x + x + # end + # + # # (f >> g).call(2) == g(f(2)) == g(4) == 8 + # p (method(:f) >> method(:g)).call(2) #=> 8 + # def >>(other) ->(*args, **opts, &block) { other.call(call(*args, **opts, &block)) } end diff --git a/mrbgems/mruby-method/src/method.c b/mrbgems/mruby-method/src/method.c index 3492963f0c..b0fc6019f0 100644 --- a/mrbgems/mruby-method/src/method.c +++ b/mrbgems/mruby-method/src/method.c @@ -1,15 +1,14 @@ -#include "mruby.h" -#include "mruby/array.h" -#include "mruby/class.h" -#include "mruby/variable.h" -#include "mruby/proc.h" -#include "mruby/string.h" -#include "mruby/internal.h" -#include "mruby/presym.h" +#include +#include +#include +#include +#include +#include +#include // Defined by mruby-proc-ext on which mruby-method depends mrb_value mrb_proc_parameters(mrb_state *mrb, mrb_value proc); -mrb_value mrb_proc_source_location(mrb_state *mrb, struct RProc *p); +mrb_value mrb_proc_source_location(mrb_state *mrb, const struct RProc *p); static mrb_value args_shift(mrb_state *mrb) @@ -63,7 +62,7 @@ args_unshift(mrb_state *mrb, mrb_value obj) mrb_ary_unshift(mrb, *argv, obj); } -static struct RProc* +static const struct RProc* method_missing_prepare(mrb_state *mrb, mrb_sym *mid, mrb_value recv, struct RClass **tc) { const mrb_sym id_method_missing = MRB_SYM(method_missing); @@ -83,10 +82,12 @@ method_missing_prepare(mrb_state *mrb, mrb_sym *mid, mrb_value recv, struct RCla goto method_missing; } - struct RProc *proc; + const struct RProc *proc; if (MRB_METHOD_FUNC_P(m)) { - proc = mrb_proc_new_cfunc(mrb, MRB_METHOD_FUNC(m)); - MRB_PROC_SET_TARGET_CLASS(proc, *tc); + struct RProc *p = mrb_proc_new_cfunc(mrb, MRB_METHOD_FUNC(m)); + mrb_proc_set_cfunc_aspec(p, MRB_MT_ASPEC(m.flags)); + MRB_PROC_SET_TARGET_CLASS(p, *tc); + proc = p; } else { proc = MRB_METHOD_PROC(m); @@ -104,7 +105,7 @@ method_object_alloc(mrb_state *mrb, struct RClass *mclass) return MRB_OBJ_ALLOC(mrb, MRB_TT_OBJECT, mclass); } -static struct RProc* +static const struct RProc* method_extract_proc(mrb_state *mrb, mrb_value self) { mrb_value obj = mrb_iv_get(mrb, self, MRB_SYM(_proc)); @@ -154,16 +155,43 @@ bind_check(mrb_state *mrb, mrb_value recv, mrb_value owner) !mrb_obj_is_kind_of(mrb, recv, mrb_class_ptr(owner))) { if (mrb_sclass_p(owner)) { mrb_raise(mrb, E_TYPE_ERROR, "singleton method called for a different object"); - } else { + } + else { mrb_raisef(mrb, E_TYPE_ERROR, "bind argument must be an instance of %v", owner); } } } +/* + * call-seq: + * unbound_method.bind(obj) -> method + * + * Bind unbound_method to obj. If Klass was the class + * from which unbound_method was obtained, + * obj.kind_of?(Klass) must be true. + * + * class A + * def test + * puts "In A" + * end + * end + * class B < A + * end + * um = B.instance_method(:test) + * bm = um.bind(B.new) + * bm.call + * bm = um.bind(A.new) + * bm.call + * + * produces: + * + * In A + * In A + */ + static mrb_value unbound_method_bind(mrb_state *mrb, mrb_value self) { - struct RObject *me; mrb_value owner = mrb_iv_get(mrb, self, MRB_SYM(_owner)); mrb_value name = mrb_iv_get(mrb, self, MRB_SYM(_name)); mrb_value proc = mrb_iv_get(mrb, self, MRB_SYM(_proc)); @@ -171,7 +199,8 @@ unbound_method_bind(mrb_state *mrb, mrb_value self) mrb_value recv = mrb_get_arg1(mrb); bind_check(mrb, recv, owner); - me = method_object_alloc(mrb, mrb_class_get_id(mrb, MRB_SYM(Method))); + + struct RObject *me = method_object_alloc(mrb, mrb_class_get_id(mrb, MRB_SYM(Method))); mrb_obj_iv_set(mrb, me, MRB_SYM(_owner), owner); mrb_obj_iv_set(mrb, me, MRB_SYM(_recv), recv); mrb_obj_iv_set(mrb, me, MRB_SYM(_name), name); @@ -181,63 +210,62 @@ unbound_method_bind(mrb_state *mrb, mrb_value self) return mrb_obj_value(me); } +static mrb_bool +method_p(mrb_state *mrb, struct RClass *c, mrb_value proc) +{ + if (mrb_type(proc) != MRB_TT_OBJECT) return FALSE; + if (!mrb_obj_is_instance_of(mrb, proc, c)) return FALSE; + + struct RObject *p = mrb_obj_ptr(proc); + if (!mrb_obj_iv_defined(mrb, p, MRB_SYM(_owner))) return FALSE; + if (!mrb_obj_iv_defined(mrb, p, MRB_SYM(_recv))) return FALSE; + if (!mrb_obj_iv_defined(mrb, p, MRB_SYM(_name))) return FALSE; + if (!mrb_obj_iv_defined(mrb, p, MRB_SYM(_proc))) return FALSE; + if (!mrb_obj_iv_defined(mrb, p, MRB_SYM(_klass))) return FALSE; + return TRUE; +} + #define IV_GET(value, name) mrb_iv_get(mrb, value, name) +/* + * call-seq: + * method == other_method -> true or false + * method.eql?(other_method) -> true or false + * + * Two method objects are equal if they are bound to the same + * object and refer to the same method definition and their owners are the + * same class or module. + * + * a = "cat" + * b = "cat" + * p a.method(:upcase) == a.method(:upcase) #=> true + * p a.method(:upcase) == b.method(:upcase) #=> false + */ + static mrb_value method_eql(mrb_state *mrb, mrb_value self) { mrb_value other = mrb_get_arg1(mrb); - mrb_value receiver, orig_proc, other_proc; - struct RClass *owner, *klass; - struct RProc *orig_rproc, *other_rproc; - - if (!mrb_obj_is_instance_of(mrb, other, mrb_class(mrb, self))) - return mrb_false_value(); - - if (mrb_class(mrb, self) != mrb_class(mrb, other)) - return mrb_false_value(); + mrb_value orig_proc, other_proc; - klass = mrb_class_ptr(IV_GET(self, MRB_SYM(_klass))); - if (klass != mrb_class_ptr(IV_GET(other, MRB_SYM(_klass)))) + if (!method_p(mrb, mrb_class(mrb, self), other)) return mrb_false_value(); - owner = mrb_class_ptr(IV_GET(self, MRB_SYM(_owner))); - if (owner != mrb_class_ptr(IV_GET(other, MRB_SYM(_owner)))) + if (mrb_class_ptr(IV_GET(self, MRB_SYM(_owner))) != mrb_class_ptr(IV_GET(other, MRB_SYM(_owner)))) return mrb_false_value(); - receiver = IV_GET(self, MRB_SYM(_recv)); - if (!mrb_obj_equal(mrb, receiver, IV_GET(other, MRB_SYM(_recv)))) + if (!mrb_obj_equal(mrb, IV_GET(self, MRB_SYM(_recv)), IV_GET(other, MRB_SYM(_recv)))) return mrb_false_value(); orig_proc = IV_GET(self, MRB_SYM(_proc)); other_proc = IV_GET(other, MRB_SYM(_proc)); - if (mrb_nil_p(orig_proc) && mrb_nil_p(other_proc)) { - if (mrb_symbol(IV_GET(self, MRB_SYM(_name))) == mrb_symbol(IV_GET(other, MRB_SYM(_name)))) - return mrb_true_value(); - else - return mrb_false_value(); + if (mrb_nil_p(orig_proc) && mrb_nil_p(other_proc) && + mrb_symbol(IV_GET(self, MRB_SYM(_name))) == mrb_symbol(IV_GET(other, MRB_SYM(_name)))) { + return mrb_true_value(); } - - if (mrb_nil_p(orig_proc)) - return mrb_false_value(); - if (mrb_nil_p(other_proc)) + if (mrb_nil_p(orig_proc) || mrb_nil_p(other_proc)) { return mrb_false_value(); - - orig_rproc = mrb_proc_ptr(orig_proc); - other_rproc = mrb_proc_ptr(other_proc); - if (MRB_PROC_CFUNC_P(orig_rproc)) { - if (!MRB_PROC_CFUNC_P(other_rproc)) - return mrb_false_value(); - if (orig_rproc->body.func != other_rproc->body.func) - return mrb_false_value(); - } - else { - if (MRB_PROC_CFUNC_P(other_rproc)) - return mrb_false_value(); - if (orig_rproc->body.irep != other_rproc->body.irep) - return mrb_false_value(); } - - return mrb_true_value(); + return mrb_bool_value(mrb_proc_eql(mrb, orig_proc, other_proc)); } #undef IV_GET @@ -245,7 +273,7 @@ method_eql(mrb_state *mrb, mrb_value self) static mrb_value mcall(mrb_state *mrb, mrb_value self, mrb_value recv) { - struct RProc *proc = method_extract_proc(mrb, self); + const struct RProc *proc = method_extract_proc(mrb, self); mrb_sym mid = method_extract_mid(mrb, self); struct RClass *tc = method_extract_owner(mrb, self); @@ -265,12 +293,48 @@ mcall(mrb_state *mrb, mrb_value self, mrb_value recv) return mrb_exec_irep(mrb, recv, proc); } +/* + * call-seq: + * method.call(args, ...) -> obj + * method[args, ...] -> obj + * + * Invokes the method with the specified arguments, returning the + * method's return value. + * + * m = 12.method("+") + * m.call(3) #=> 15 + * m.call(20) #=> 32 + */ + static mrb_value method_call(mrb_state *mrb, mrb_value self) { return mcall(mrb, self, mrb_undef_value()); } +/* + * call-seq: + * unbound_method.bind_call(obj, args, ...) -> result + * + * Bind unbound_method to obj and then invoke the method with the + * specified arguments. This is semantically equivalent to + * unbound_method.bind(obj).call(args, ...). + * + * class A + * def test + * puts "In A" + * end + * end + * class B < A + * end + * um = B.instance_method(:test) + * um.bind_call(B.new) + * + * produces: + * + * In A + */ + static mrb_value method_bcall(mrb_state *mrb, mrb_value self) { @@ -279,16 +343,38 @@ method_bcall(mrb_state *mrb, mrb_value self) return mcall(mrb, self, recv); } +/* + * call-seq: + * method.unbind -> unbound_method + * + * Dissociates method from its current receiver. The resulting + * UnboundMethod can subsequently be bound to a new object + * of the same class (see UnboundMethod). + * + * class A + * def test + * puts "In A" + * end + * end + * a = A.new + * m = a.method(:test) + * um = m.unbind + * um.bind(A.new).call + * + * produces: + * + * In A + */ + static mrb_value method_unbind(mrb_state *mrb, mrb_value self) { - struct RObject *ume; mrb_value owner = mrb_iv_get(mrb, self, MRB_SYM(_owner)); mrb_value name = mrb_iv_get(mrb, self, MRB_SYM(_name)); mrb_value proc = mrb_iv_get(mrb, self, MRB_SYM(_proc)); mrb_value klass = mrb_iv_get(mrb, self, MRB_SYM(_klass)); - ume = method_object_alloc(mrb, mrb_class_get_id(mrb, MRB_SYM(UnboundMethod))); + struct RObject *ume = method_object_alloc(mrb, mrb_class_get_id(mrb, MRB_SYM(UnboundMethod))); mrb_obj_iv_set(mrb, ume, MRB_SYM(_owner), owner); mrb_obj_iv_set(mrb, ume, MRB_SYM(_recv), mrb_nil_value()); mrb_obj_iv_set(mrb, ume, MRB_SYM(_name), name); @@ -298,7 +384,7 @@ method_unbind(mrb_state *mrb, mrb_value self) return mrb_obj_value(ume); } -static struct RProc * +static const struct RProc * method_search_vm(mrb_state *mrb, struct RClass **cp, mrb_sym mid) { mrb_method_t m = mrb_method_search_vm(mrb, cp, mid); @@ -308,12 +394,32 @@ method_search_vm(mrb_state *mrb, struct RClass **cp, mrb_sym mid) return MRB_METHOD_PROC(m); struct RProc *proc = mrb_proc_new_cfunc(mrb, MRB_METHOD_FUNC(m)); - if (MRB_METHOD_NOARG_P(m)) { - proc->flags |= MRB_PROC_NOARG; - } + mrb_proc_set_cfunc_aspec(proc, MRB_MT_ASPEC(m.flags)); return proc; } +/* + * call-seq: + * method.super_method -> method + * + * Returns a Method representing the method in the superclass + * of the method's class. Returns nil if there is no + * superclass method. + * + * class A + * def test + * puts "In A" + * end + * end + * class B < A + * def test + * puts "In B" + * end + * end + * obj = B.new + * obj.method(:test).super_method.call #=> "In A" + */ + static mrb_value method_super_method(mrb_state *mrb, mrb_value self) { @@ -322,39 +428,66 @@ method_super_method(mrb_state *mrb, mrb_value self) mrb_value owner = mrb_iv_get(mrb, self, MRB_SYM(_owner)); mrb_value name = mrb_iv_get(mrb, self, MRB_SYM(_name)); struct RClass *super, *rklass; - struct RProc *proc; - struct RObject *me; - switch (mrb_type(klass)) { - case MRB_TT_SCLASS: - super = mrb_class_ptr(klass)->super->super; - break; - case MRB_TT_ICLASS: - super = mrb_class_ptr(klass)->super; - break; - default: - super = mrb_class_ptr(owner)->super; - break; + if (mrb_type(owner) == MRB_TT_MODULE) { + struct RClass *m = mrb_class_ptr(owner); + rklass = mrb_class_ptr(klass)->super; + while (rklass && rklass->c != m) { + rklass = rklass->super; + } + if (!rklass) return mrb_nil_value(); + super = rklass->super; + } + else { + super = mrb_class_ptr(owner)->super; } - proc = method_search_vm(mrb, &super, mrb_symbol(name)); - if (!proc) - return mrb_nil_value(); + const struct RProc *proc = method_search_vm(mrb, &super, mrb_symbol(name)); + if (!proc) return mrb_nil_value(); - rklass = super; - while (super->tt == MRB_TT_ICLASS) - super = super->c; + if (!super) return mrb_nil_value(); + super = mrb_class_real(super); - me = method_object_alloc(mrb, mrb_obj_class(mrb, self)); + struct RObject *me = method_object_alloc(mrb, mrb_obj_class(mrb, self)); mrb_obj_iv_set(mrb, me, MRB_SYM(_owner), mrb_obj_value(super)); mrb_obj_iv_set(mrb, me, MRB_SYM(_recv), recv); mrb_obj_iv_set(mrb, me, MRB_SYM(_name), name); - mrb_obj_iv_set(mrb, me, MRB_SYM(_proc), mrb_obj_value(proc)); - mrb_obj_iv_set(mrb, me, MRB_SYM(_klass), mrb_obj_value(rklass)); + mrb_obj_iv_set(mrb, me, MRB_SYM(_proc), mrb_obj_value((void*)proc)); + mrb_obj_iv_set(mrb, me, MRB_SYM(_klass), mrb_obj_value(super)); return mrb_obj_value(me); } +/* + * call-seq: + * method.arity -> integer + * + * Returns an indication of the number of arguments accepted by a + * method. Returns a nonnegative integer for methods that take a fixed + * number of arguments. For Ruby methods that take a variable number of + * arguments, returns -n-1, where n is the number of required + * arguments. Keyword arguments will be considered as a single additional + * argument, that argument being mandatory if any keyword argument is + * mandatory. For methods written in C, returns -1 if the call takes a + * variable number of arguments. + * + * class C + * def one; end + * def two(a); end + * def three(*a); end + * def four(a, b); end + * def five(a, b, *c); end + * def six(a, b, *c, &d); end + * end + * c = C.new + * c.method(:one).arity #=> 0 + * c.method(:two).arity #=> 1 + * c.method(:three).arity #=> -1 + * c.method(:four).arity #=> 2 + * c.method(:five).arity #=> -3 + * c.method(:six).arity #=> -3 + */ + static mrb_value method_arity(mrb_state *mrb, mrb_value self) { @@ -363,6 +496,20 @@ method_arity(mrb_state *mrb, mrb_value self) return mrb_fixnum_value(arity); } +/* + * call-seq: + * method.source_location -> [String, Integer] or nil + * + * Returns the Ruby source filename and line number containing this method + * or nil if this method was not defined in Ruby (i.e. native). + * + * def foo; end + * method(:foo).source_location #=> ["test.rb", 1] + * + * Note: You need to enable debug option in your build configuration to use + * this method. + */ + static mrb_value method_source_location(mrb_state *mrb, mrb_value self) { @@ -374,6 +521,22 @@ method_source_location(mrb_state *mrb, mrb_value self) return mrb_proc_source_location(mrb, mrb_proc_ptr(proc)); } +/* + * call-seq: + * method.parameters -> array + * + * Returns the parameter information of this method. + * + * def foo(bar); end + * method(:foo).parameters #=> [[:req, :bar]] + * + * def foo(bar, baz, *qux); end + * method(:foo).parameters #=> [[:req, :bar], [:req, :baz], [:rest, :qux]] + * + * def foo(bar, baz, qux: 42); end + * method(:foo).parameters #=> [[:req, :bar], [:req, :baz], [:keyreq, :qux]] + */ + static mrb_value method_parameters(mrb_state *mrb, mrb_value self) { @@ -388,6 +551,16 @@ method_parameters(mrb_state *mrb, mrb_value self) return mrb_proc_parameters(mrb, proc); } +/* + * call-seq: + * method.to_s -> string + * method.inspect -> string + * + * Returns the name of the underlying method. + * + * "cat".method(:count).inspect #=> "#" + */ + static mrb_value method_to_s(mrb_state *mrb, mrb_value self) { @@ -395,158 +568,318 @@ method_to_s(mrb_state *mrb, mrb_value self) mrb_value klass = mrb_iv_get(mrb, self, MRB_SYM(_klass)); mrb_value name = mrb_iv_get(mrb, self, MRB_SYM(_name)); mrb_value str = mrb_str_new_lit(mrb, "#<"); - struct RClass *rklass; + mrb_value proc = mrb_iv_get(mrb, self, MRB_SYM(_proc)); mrb_str_cat_cstr(mrb, str, mrb_obj_classname(mrb, self)); mrb_str_cat_lit(mrb, str, ": "); - rklass = mrb_class_ptr(klass); - if (mrb_class_ptr(owner) == rklass) { - mrb_str_concat(mrb, str, owner); - mrb_str_cat_lit(mrb, str, "#"); - mrb_str_concat(mrb, str, name); + if (mrb_type(owner) == MRB_TT_SCLASS) { + mrb_value recv = mrb_iv_get(mrb, self, MRB_SYM(_recv)); + if (!mrb_nil_p(recv)) { + mrb_str_concat(mrb, str, recv); + mrb_str_cat_lit(mrb, str, "."); + mrb_str_concat(mrb, str, name); + goto finish; + } } - else { - mrb_str_cat_cstr(mrb, str, mrb_class_name(mrb, rklass)); - mrb_str_cat_lit(mrb, str, "("); - mrb_str_concat(mrb, str, owner); - mrb_str_cat_lit(mrb, str, ")#"); - mrb_str_concat(mrb, str, name); + { + struct RClass *ok = mrb_class_ptr(owner); + struct RClass *rk = mrb_class_ptr(klass); + struct RClass *rklass = mrb_class_real(rk); /* skip internal class */ + if (ok == rk || ok == rklass) { + mrb_str_concat(mrb, str, owner); + mrb_str_cat_lit(mrb, str, "#"); + mrb_str_concat(mrb, str, name); + } + else { + mrb_str_concat(mrb, str, mrb_obj_value(rklass)); + mrb_str_cat_lit(mrb, str, "("); + mrb_str_concat(mrb, str, owner); + mrb_str_cat_lit(mrb, str, ")#"); + mrb_str_concat(mrb, str, name); + } + } + finish:; + if (!mrb_nil_p(proc)) { + const struct RProc *p = mrb_proc_ptr(proc); + if (MRB_PROC_ALIAS_P(p)) { + mrb_sym mid; + while (MRB_PROC_ALIAS_P(p)) { + mid = p->body.mid; + p = p->upper; + } + mrb_str_cat_lit(mrb, str, "("); + mrb_str_concat(mrb, str, mrb_symbol_value(mid)); + mrb_str_cat_lit(mrb, str, ")"); + } + } + mrb_value loc = method_source_location(mrb, self); + if (mrb_array_p(loc) && RARRAY_LEN(loc) == 2) { + mrb_str_cat_lit(mrb, str, " "); + mrb_str_concat(mrb, str, RARRAY_PTR(loc)[0]); + mrb_str_cat_lit(mrb, str, ":"); + mrb_str_concat(mrb, str, RARRAY_PTR(loc)[1]); } mrb_str_cat_lit(mrb, str, ">"); return str; } -static void -mrb_search_method_owner(mrb_state *mrb, struct RClass *c, mrb_value obj, mrb_sym name, struct RClass **owner, struct RProc **proc, mrb_bool unbound) +static mrb_bool +search_method_owner(mrb_state *mrb, struct RClass *c, mrb_value obj, mrb_sym name, struct RClass **owner, const struct RProc **proc, mrb_bool unbound) { - mrb_value ret; - *owner = c; *proc = method_search_vm(mrb, owner, name); if (!*proc) { if (unbound) { - goto name_error; + return FALSE; } if (!mrb_respond_to(mrb, obj, MRB_SYM_Q(respond_to_missing))) { - goto name_error; + return FALSE; } - ret = mrb_funcall_id(mrb, obj, MRB_SYM_Q(respond_to_missing), 2, mrb_symbol_value(name), mrb_true_value()); + mrb_value ret = mrb_funcall_argv2(mrb, obj, MRB_SYM_Q(respond_to_missing), mrb_symbol_value(name), mrb_true_value()); if (!mrb_test(ret)) { - goto name_error; + return FALSE; } *owner = c; } + return TRUE; +} - while ((*owner)->tt == MRB_TT_ICLASS) - *owner = (*owner)->c; - - return; - -name_error: - mrb_raisef(mrb, E_NAME_ERROR, "undefined method '%n' for class '%C'", name, c); +static mrb_noreturn void +singleton_method_error(mrb_state *mrb, mrb_sym name, mrb_value obj) +{ + mrb_raisef(mrb, E_NAME_ERROR, "undefined singleton method '%n' for '%!v'", name, obj); } static mrb_value -mrb_kernel_method(mrb_state *mrb, mrb_value self) +method_alloc(mrb_state *mrb, struct RClass *c, mrb_value obj, mrb_sym name, mrb_bool unbound, mrb_bool singleton) { struct RClass *owner; - struct RProc *proc; - struct RObject *me; - mrb_sym name; + const struct RProc *proc; - mrb_get_args(mrb, "n", &name); - - mrb_search_method_owner(mrb, mrb_class(mrb, self), self, name, &owner, &proc, FALSE); + if (!search_method_owner(mrb, c, obj, name, &owner, &proc, unbound)) { + if (singleton) { + singleton_method_error(mrb, name, obj); + } + else { + mrb_raisef(mrb, E_NAME_ERROR, "undefined method '%n' for class '%C'", name, c); + } + } + if (singleton && (owner->tt != MRB_TT_SCLASS && owner->tt != MRB_TT_ICLASS)) { + singleton_method_error(mrb, name, obj); + } + while ((owner)->tt == MRB_TT_ICLASS) + owner = (owner)->c; - me = method_object_alloc(mrb, mrb_class_get_id(mrb, MRB_SYM(Method))); + struct RObject *me = method_object_alloc(mrb, mrb_class_get_id(mrb, unbound ? MRB_SYM(UnboundMethod) : MRB_SYM(Method))); mrb_obj_iv_set(mrb, me, MRB_SYM(_owner), mrb_obj_value(owner)); - mrb_obj_iv_set(mrb, me, MRB_SYM(_recv), self); + mrb_obj_iv_set(mrb, me, MRB_SYM(_recv), unbound ? mrb_nil_value() : obj); mrb_obj_iv_set(mrb, me, MRB_SYM(_name), mrb_symbol_value(name)); - mrb_obj_iv_set(mrb, me, MRB_SYM(_proc), proc ? mrb_obj_value(proc) : mrb_nil_value()); - mrb_obj_iv_set(mrb, me, MRB_SYM(_klass), mrb_obj_value(mrb_class(mrb, self))); + mrb_obj_iv_set(mrb, me, MRB_SYM(_proc), proc ? mrb_obj_value((void*)proc) : mrb_nil_value()); + mrb_obj_iv_set(mrb, me, MRB_SYM(_klass), mrb_obj_value(c)); return mrb_obj_value(me); } +/* + * call-seq: + * obj.method(sym) -> method + * + * Looks up the named method as a receiver in obj, returning a + * Method object (or raising NameError). The + * Method object acts as a closure in obj's object + * instance, so instance variables and the value of self + * remain available. + * + * class Demo + * def initialize(n) + * @iv = n + * end + * def hello() + * "Hello, @iv = #{@iv}" + * end + * end + * + * k = Demo.new(99) + * m = k.method(:hello) + * m.call #=> "Hello, @iv = 99" + * + * l = Demo.new('Fred') + * m = l.method("hello") + * m.call #=> "Hello, @iv = Fred" + */ + static mrb_value -mrb_module_instance_method(mrb_state *mrb, mrb_value self) +mrb_kernel_method(mrb_state *mrb, mrb_value self) { - struct RClass *owner; - struct RProc *proc; - struct RObject *ume; mrb_sym name; mrb_get_args(mrb, "n", &name); + return method_alloc(mrb, mrb_class(mrb, self), self, name, FALSE, FALSE); +} - mrb_search_method_owner(mrb, mrb_class_ptr(self), self, name, &owner, &proc, TRUE); +/* + * call-seq: + * obj.singleton_method(sym) -> method + * + * Similar to method, searches singleton method only. + * + * class Demo + * def initialize(n) + * @iv = n + * end + * def hello() + * "Hello, @iv = #{@iv}" + * end + * end + * + * k = Demo.new(99) + * def k.hi + * "Hi, @iv = #{@iv}" + * end + * m = k.singleton_method(:hi) + * m.call #=> "Hi, @iv = 99" + * m = k.singleton_method(:hello) #=> NameError + */ - ume = method_object_alloc(mrb, mrb_class_get_id(mrb, MRB_SYM(UnboundMethod))); - mrb_obj_iv_set(mrb, ume, MRB_SYM(_owner), mrb_obj_value(owner)); - mrb_obj_iv_set(mrb, ume, MRB_SYM(_recv), mrb_nil_value()); - mrb_obj_iv_set(mrb, ume, MRB_SYM(_name), mrb_symbol_value(name)); - mrb_obj_iv_set(mrb, ume, MRB_SYM(_proc), proc ? mrb_obj_value(proc) : mrb_nil_value()); - mrb_obj_iv_set(mrb, ume, MRB_SYM(_klass), self); +static mrb_value +mrb_kernel_singleton_method(mrb_state *mrb, mrb_value self) +{ + mrb_sym name; - return mrb_obj_value(ume); + mrb_get_args(mrb, "n", &name); + + struct RClass *c = mrb_class(mrb, self); + return method_alloc(mrb, c, self, name, FALSE, TRUE); } +/* + * call-seq: + * mod.instance_method(symbol) -> unbound_method + * + * Returns an UnboundMethod representing the given + * instance method in mod. + * + * class Interpreter + * def do_a() print "there, "; end + * def do_d() print "Hello "; end + * def do_e() print "!\n"; end + * def do_v() print "world"; end + * end + * Interpreter.instance_method(:do_a).bind(Interpreter.new).call + * Interpreter.instance_method(:do_d).bind(Interpreter.new).call + * Interpreter.instance_method(:do_v).bind(Interpreter.new).call + * Interpreter.instance_method(:do_e).bind(Interpreter.new).call + * + * produces: + * + * there, Hello world! + */ + +static mrb_value +mrb_module_instance_method(mrb_state *mrb, mrb_value self) +{ + mrb_sym name; + + mrb_get_args(mrb, "n", &name); + return method_alloc(mrb, mrb_class_ptr(self), self, name, TRUE, FALSE); +} + +/* + * call-seq: + * method.owner -> class_or_module + * + * Returns the class or module that defines the method. + * + * (1..3).method(:map).owner #=> Enumerable + */ + static mrb_value method_owner(mrb_state *mrb, mrb_value self) { return mrb_iv_get(mrb, self, MRB_SYM(_owner)); } +/* + * call-seq: + * method.receiver -> object + * + * Returns the bound receiver of the method. + * + * "hello".method(:upcase).receiver #=> "hello" + */ + static mrb_value method_receiver(mrb_state *mrb, mrb_value self) { return mrb_iv_get(mrb, self, MRB_SYM(_recv)); } +/* + * call-seq: + * method.name -> symbol + * + * Returns the name of the method. + * + * "hello".method(:upcase).name #=> :upcase + */ + static mrb_value method_name(mrb_state *mrb, mrb_value self) { return mrb_iv_get(mrb, self, MRB_SYM(_name)); } +/* ---------------------------*/ +static const mrb_mt_entry method_ubm_rom_entries[] = { + MRB_MT_ENTRY(unbound_method_bind, MRB_SYM(bind), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(method_super_method, MRB_SYM(super_method), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_eql, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(method_eql, MRB_SYM_Q(eql), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(method_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_to_s, MRB_SYM(inspect), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_arity, MRB_SYM(arity), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_source_location, MRB_SYM(source_location), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_parameters, MRB_SYM(parameters), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_bcall, MRB_SYM(bind_call), MRB_ARGS_REQ(1)|MRB_ARGS_ANY()), + MRB_MT_ENTRY(method_owner, MRB_SYM(owner), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_name, MRB_SYM(name), MRB_ARGS_NONE()), +}; + +static const mrb_mt_entry method_mtd_rom_entries[] = { + MRB_MT_ENTRY(method_eql, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(method_eql, MRB_SYM_Q(eql), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(method_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_to_s, MRB_SYM(inspect), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_call, MRB_SYM(call), MRB_ARGS_ANY()), + MRB_MT_ENTRY(method_call, MRB_OPSYM(aref), MRB_ARGS_ANY()), + MRB_MT_ENTRY(method_unbind, MRB_SYM(unbind), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_super_method, MRB_SYM(super_method), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_arity, MRB_SYM(arity), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_source_location, MRB_SYM(source_location), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_parameters, MRB_SYM(parameters), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_owner, MRB_SYM(owner), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_receiver, MRB_SYM(receiver), MRB_ARGS_NONE()), + MRB_MT_ENTRY(method_name, MRB_SYM(name), MRB_ARGS_NONE()), +}; + void mrb_mruby_method_gem_init(mrb_state* mrb) { struct RClass *unbound_method = mrb_define_class_id(mrb, MRB_SYM(UnboundMethod), mrb->object_class); struct RClass *method = mrb_define_class_id(mrb, MRB_SYM(Method), mrb->object_class); - mrb_undef_class_method(mrb, unbound_method, "new"); - mrb_define_method(mrb, unbound_method, "bind", unbound_method_bind, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, unbound_method, "super_method", method_super_method, MRB_ARGS_NONE()); - mrb_define_method(mrb, unbound_method, "==", method_eql, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, unbound_method, "eql?", method_eql, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, unbound_method, "to_s", method_to_s, MRB_ARGS_NONE()); - mrb_define_method(mrb, unbound_method, "inspect", method_to_s, MRB_ARGS_NONE()); - mrb_define_method(mrb, unbound_method, "arity", method_arity, MRB_ARGS_NONE()); - mrb_define_method(mrb, unbound_method, "source_location", method_source_location, MRB_ARGS_NONE()); - mrb_define_method(mrb, unbound_method, "parameters", method_parameters, MRB_ARGS_NONE()); - mrb_define_method(mrb, unbound_method, "bind_call", method_bcall, MRB_ARGS_REQ(1)|MRB_ARGS_ANY()); - mrb_define_method(mrb, unbound_method, "owner", method_owner, MRB_ARGS_NONE()); - mrb_define_method(mrb, unbound_method, "name", method_name, MRB_ARGS_NONE()); - - mrb_undef_class_method(mrb, method, "new"); - mrb_define_method(mrb, method, "==", method_eql, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, method, "eql?", method_eql, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, method, "to_s", method_to_s, MRB_ARGS_NONE()); - mrb_define_method(mrb, method, "inspect", method_to_s, MRB_ARGS_NONE()); - mrb_define_method(mrb, method, "call", method_call, MRB_ARGS_ANY()); - mrb_define_method(mrb, method, "[]", method_call, MRB_ARGS_ANY()); - mrb_define_method(mrb, method, "unbind", method_unbind, MRB_ARGS_NONE()); - mrb_define_method(mrb, method, "super_method", method_super_method, MRB_ARGS_NONE()); - mrb_define_method(mrb, method, "arity", method_arity, MRB_ARGS_NONE()); - mrb_define_method(mrb, method, "source_location", method_source_location, MRB_ARGS_NONE()); - mrb_define_method(mrb, method, "parameters", method_parameters, MRB_ARGS_NONE()); - mrb_define_method(mrb, method, "owner", method_owner, MRB_ARGS_NONE()); - mrb_define_method(mrb, method, "receiver", method_receiver, MRB_ARGS_NONE()); - mrb_define_method(mrb, method, "name", method_name, MRB_ARGS_NONE()); - - mrb_define_method(mrb, mrb->kernel_module, "method", mrb_kernel_method, MRB_ARGS_REQ(1)); - - mrb_define_method(mrb, mrb->module_class, "instance_method", mrb_module_instance_method, MRB_ARGS_REQ(1)); + MRB_SET_INSTANCE_TT(unbound_method, MRB_TT_OBJECT); + MRB_UNDEF_ALLOCATOR(unbound_method); + mrb_undef_class_method_id(mrb, unbound_method, MRB_SYM(new)); + MRB_MT_INIT_ROM(mrb, unbound_method, method_ubm_rom_entries); + + MRB_SET_INSTANCE_TT(method, MRB_TT_OBJECT); + MRB_UNDEF_ALLOCATOR(method); + mrb_undef_class_method_id(mrb, method, MRB_SYM(new)); + MRB_MT_INIT_ROM(mrb, method, method_mtd_rom_entries); + mrb_define_method_id(mrb, mrb->kernel_module, MRB_SYM(method), mrb_kernel_method, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, mrb->kernel_module, MRB_SYM(singleton_method), mrb_kernel_singleton_method, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, mrb->module_class, MRB_SYM(instance_method), mrb_module_instance_method, MRB_ARGS_REQ(1)); } void diff --git a/mrbgems/mruby-method/test/method.rb b/mrbgems/mruby-method/test/method.rb index 9089c96d6b..93ac30e600 100644 --- a/mrbgems/mruby-method/test/method.rb +++ b/mrbgems/mruby-method/test/method.rb @@ -60,7 +60,7 @@ def run assert_equal(-3, method(:mo7).arity) assert_equal(1, method(:ma1).arity) - assert_equal(-1, method(:__send__).arity) + assert_equal(-2, method(:__send__).arity) assert_equal(-1, method(:nothing).arity) end @@ -138,6 +138,36 @@ def foo assert_raise(NoMethodError) { m.call(:arg1, :arg2) } end +assert 'Method#call with undefined method -- only kwargs' do + c = Class.new { + attr_accessor :m, :argv, :kwargs + def respond_to_missing?(m, b) + m == :foo + end + + def method_missing(m, *argv, **kwargs) + @m = m + @argv = argv + @kwargs = kwargs + super + end + } + cc = c.new + assert_kind_of Method, cc.method(:foo) + + # Calling cc.method(:foo) works + assert_raise(NoMethodError) { cc.method(:foo).call(kwarg1: :val1, kwarg2: :val2) } + assert_equal :foo, cc.m + assert_equal [], cc.argv + assert_equal({ kwarg1: :val1, kwarg2: :val2 }, cc.kwargs) + + # calling cc.foo fails + assert_raise(NoMethodError) { cc.foo(kwarg1: :val1, kwarg2: :val2) } + assert_equal :foo, cc.m + assert_equal [], cc.argv + assert_equal({ kwarg1: :val1, kwarg2: :val2 }, cc.kwargs) +end + assert 'Method#source_location' do skip if proc{}.source_location.nil? @@ -230,14 +260,14 @@ def o.baz(x, y, z, *w, u:, v:, **opts, &blk) o = Object.new def o.foo; end m = o.method(:foo) - assert_equal("#", m.unbind.inspect) + assert_match("#", m.unbind.inspect) c = Class.new c.class_eval { def foo; end; } m = c.new.method(:foo) - assert_equal("#", m.inspect) + assert_match("#", m.inspect) m = c.instance_method(:foo) - assert_equal("#", m.inspect) + assert_match("#", m.inspect) end assert 'owner' do @@ -482,3 +512,131 @@ def m(x, y, z, *w, u:, v:, **opts, &blk) assert_raise(ArgumentError) { BasicObject.instance_method(:__id__).bind_call nil, 1 } assert_raise(ArgumentError) { BasicObject.instance_method(:__id__).bind_call nil, opts: 1 } end + +assert 'Method#parameters and #arity on aliased methods' do + # Regression: an alias proc carries the original method's name in body.mid + # (not an irep), with `upper` pointing at the original proc. Both + # mrb_proc_parameters (mruby-proc-ext) and mrb_proc_arity (core src/proc.c) + # used to fall into their irep branch for alias procs and dereference body.mid + # as an mrb_irep* -> SEGV / misaligned read. Both must resolve through `upper`. + # + # The literals below match CRuby for these positional/optional/rest/block + # signatures (where mruby and CRuby agree), so this is independent ground + # truth, not merely "alias == original". + c = Class.new { + def f0; end + def f1(a); end + def fopt(a, b = 1); end + def frest(a, *b); end + def fblk(a, &b); end + def fmix(a, b = 1, *c, &d); end + alias_method :a0, :f0 + alias_method :a1, :f1 + alias_method :aopt, :fopt + alias_method :arest, :frest + alias_method :ablk, :fblk + alias_method :amix, :fmix + } + cases = [ + # name, parameters, arity + [:a0, [], 0], + [:a1, [[:req, :a]], 1], + [:aopt, [[:req, :a], [:opt, :b]], -2], + [:arest, [[:req, :a], [:rest, :b]], -2], + [:ablk, [[:req, :a], [:block, :b]], 1], + [:amix, [[:req, :a], [:opt, :b], [:rest, :c], [:block, :d]], -2], + ] + cases.each do |name, params, arity| + u = c.instance_method(name) + assert_equal params, u.parameters + assert_equal arity, u.arity + # the bound Method goes through the same proc paths and must agree + b = c.new.method(name) + assert_equal params, b.parameters + assert_equal arity, b.arity + end + # alias must equal the original it points at (parameters and arity) + { a0: :f0, a1: :f1, aopt: :fopt, arest: :frest, ablk: :fblk, amix: :fmix }.each do |al, orig| + assert_equal c.instance_method(orig).parameters, c.instance_method(al).parameters + assert_equal c.instance_method(orig).arity, c.instance_method(al).arity + end + + # Alias-of-an-alias collapses to one proc at creation; must still resolve. + chain = Class.new { + def orig(x, y) end + alias_method :a1, :orig + alias_method :a2, :a1 + } + assert_equal [[:req, :x], [:req, :y]], chain.instance_method(:a2).parameters + assert_equal 2, chain.instance_method(:a2).arity + + # Aliasing a C method does NOT create an alias proc (it reuses the original + # cfunc method), so it must behave exactly like the original and never crash. + c2 = Class.new(String) { alias_method :up2, :upcase } + assert_equal String.instance_method(:upcase).parameters, + c2.instance_method(:up2).parameters + assert_equal String.instance_method(:upcase).arity, + c2.instance_method(:up2).arity +end + +assert 'Method/UnboundMethod on C-defined (native) methods' do + # C methods have no irep: arity/parameters come from the packed argument spec + # (caspec) and source_location is always nil. This exercises the + # MRB_PROC_CFUNC_P branches of mrb_proc_arity / mrb_proc_parameters and the + # cfunc path of method_search_vm -- none of which the suite covered before. + + # source_location is nil for genuinely C-defined methods (not mrblib ones). + assert_nil "x".method(:upcase).source_location + assert_nil 1.method(:+).source_location + assert_nil [].method(:push).source_location + assert_nil String.instance_method(:upcase).source_location + + # arity: documented values for these core C methods. + assert_equal 0, "x".method(:upcase).arity # no args + assert_equal 1, 1.method(:+).arity # one required + assert_equal(-1, [].method(:push).arity) # variadic (rest) + assert_equal(-1, [].method(:first).arity) # optional + + # parameters: always an Array of Arrays, never crashes; a no-arg C method + # gives []. C-method parameter *kinds* are an approximation (names are absent, + # and required args surface as :opt for non-strict procs), so we assert the + # stable shape rather than pinning exact kind labels -- except :rest, which is + # meaningful: a variadic C method must expose a rest parameter. + assert_equal [], "x".method(:upcase).parameters + [1.method(:+), [].method(:push), {}.method(:[]), [].method(:first)].each do |m| + ps = m.parameters + assert_true ps.is_a?(Array) + ps.each { |p| assert_true p.is_a?(Array) } + end + assert_true [].method(:push).parameters.any? { |entry| entry[0] == :rest } + + # identity / metadata on a C method. + m = "abc".method(:upcase) + assert_equal String, m.owner + assert_equal :upcase, m.name + assert_equal "abc", m.receiver + assert_equal "#", m.to_s + assert_equal "#", String.instance_method(:upcase).to_s + + # behaviour: the C function actually runs via call / [] / bind / bind_call / + # unbind+rebind. + assert_equal 5, 2.method(:+).call(3) + assert_equal 5, 2.method(:+)[3] + assert_equal 5, Integer.instance_method(:+).bind_call(2, 3) + assert_equal 5, Integer.instance_method(:+).bind(2).call(3) + assert_equal 11, 5.method(:+).unbind.bind(10).call(1) + + # eql?: equal only when bound to the SAME receiver object and same definition. + s = "cat" + assert_true s.method(:upcase) == s.method(:upcase) + assert_false s.method(:upcase) == "cat".method(:upcase) # distinct receivers + assert_false s.method(:upcase) == s.method(:downcase) + + # super_method resolves across C methods (Integer#to_s -> BasicObject#to_s). + sm = 5.method(:to_s).super_method + assert_false sm.nil? + assert_equal :to_s, sm.name + + # binding a C UnboundMethod to an incompatible receiver still raises cleanly. + assert_raise(TypeError) { String.instance_method(:upcase).bind(42) } +end diff --git a/mrbgems/mruby-numeric-ext/README.md b/mrbgems/mruby-numeric-ext/README.md new file mode 100644 index 0000000000..c5fcc22793 --- /dev/null +++ b/mrbgems/mruby-numeric-ext/README.md @@ -0,0 +1,78 @@ +# mruby-numeric-ext + +## Purpose + +This mrbgem extends the `Numeric` and `Integer` classes in mruby with additional methods for common numerical operations and checks. + +## Functionality + +This gem adds the following methods: + +### Numeric Class + +- `zero?`: Returns `true` if the number is zero, `false` otherwise. +- `nonzero?`: Returns the number itself if it's not zero, `nil` otherwise. +- `positive?`: Returns `true` if the number is greater than 0, `false` otherwise. +- `negative?`: Returns `true` if the number is less than 0, `false` otherwise. +- `integer?`: Returns `true` if the number is an `Integer` (this is overridden in the `Integer` class). + +### Integer Class + +- `allbits?(mask)`: Returns `true` if all bits of `self & mask` are 1. +- `anybits?(mask)`: Returns `true` if any bits of `self & mask` are 1. +- `nobits?(mask)`: Returns `true` if no bits of `self & mask` are 1. +- `bit_length`: Returns the number of bits of the absolute value of `self` in binary. `0.bit_length #=> 0`; `(-1).bit_length #=> 0`; `2.bit_length #=> 2`. +- `ceildiv(other)`: Returns the result of `self` divided by `other`, rounded up to the nearest integer. +- `integer?`: Returns `true` (overrides `Numeric#integer?`). +- `remainder(numeric)`: Returns the remainder of `self` divided by `numeric`. Equivalent to `x - y * (x / y).truncate`. +- `pow(numeric)`: Returns `self` raised to the power of `numeric`. +- `pow(integer, integer)`: Returns modular exponentiation (`(self ** exponent) % modulus`). +- `digits(base=10)`: Returns an array of integers representing the base-radix digits of `self`. The first element is the least significant digit. +- `size`: Returns the number of bytes in the machine representation of the integer. +- `odd?`: Returns `true` if the integer is odd, `false` otherwise. +- `even?`: Returns `true` if the integer is even, `false` otherwise. +- `Integer.sqrt(integer)`: (Class method) Returns the integer square root of the given non-negative integer. + +## How to use + +To use this mrbgem, add it to your `build_config.rb`: + +```ruby +MRuby::Build.new do |conf| + # ... (other configurations) + conf.gem :core => 'mruby-numeric-ext' +end +``` + +Then you can use the extended methods: + +```ruby +p 5.positive? # => true +p (-3).negative? # => true +p 0.zero? # => true +p 5.nonzero? # => 5 +p 0.nonzero? # => nil + +p 7.allbits?(3) # => false (binary 111 & 011 is 011, not 011) +p 7.anybits?(3) # => true +p 7.nobits?(8) # => true (binary 111 & 1000 is 000) + +p 10.ceildiv(3) # => 4 +p (-10).ceildiv(3) # => -3 + +p 10.remainder(3) # => 1 +p (-10).remainder(3) # => -1 + +p 2.pow(3) # => 8 +p 2.pow(3, 5) # => 3 ( (2**3) % 5 ) + +p 12345.digits # => [5, 4, 3, 2, 1] +p 12345.digits(16) # => [9, 3, 0, 3] + +p 42.size # => (depends on machine, e.g., 4 or 8) +p 5.bit_length # => 3 +p 5.odd? # => true +p 4.even? # => true +p Integer.sqrt(16) # => 4 +p Integer.sqrt(17) # => 4 +``` diff --git a/mrbgems/mruby-numeric-ext/mrblib/numeric_ext.rb b/mrbgems/mruby-numeric-ext/mrblib/numeric_ext.rb index ee205b85ae..c121fdec23 100644 --- a/mrbgems/mruby-numeric-ext/mrblib/numeric_ext.rb +++ b/mrbgems/mruby-numeric-ext/mrblib/numeric_ext.rb @@ -3,7 +3,7 @@ class Numeric # call-seq: # zero? -> true or false # - # Returns +true+ if +zero+ has a zero value, +false+ otherwise. + # Returns `true` if `zero` has a zero value, `false` otherwise. # # Of the Core and Standard Library classes, # only Rational and Complex use this implementation. @@ -16,8 +16,8 @@ def zero? # call-seq: # nonzero? -> self or nil # - # Returns +self+ if +self+ is not a zero value, +nil+ otherwise; - # uses method zero? for the evaluation. + # Returns `self` if `self` is not a zero value, `nil` otherwise; + # uses method `zero?` for the evaluation. # def nonzero? if self == 0 @@ -31,7 +31,7 @@ def nonzero? # call-seq: # positive? -> true or false # - # Returns +true+ if +self+ is greater than 0, +false+ otherwise. + # Returns `true` if `self` is greater than 0, `false` otherwise. # def positive? self > 0 @@ -41,17 +41,32 @@ def positive? # call-seq: # negative? -> true or false # - # Returns +true+ if +self+ is less than 0, +false+ otherwise. + # Returns `true` if `self` is less than 0, `false` otherwise. # def negative? self < 0 end + ## + # call-seq: + # num.integer? -> true or false + # + # Returns true if num is an Integer. + # + # 1.0.integer? #=> false + # 1.integer? #=> true + # + def integer? + false + end +end + +class Integer ## # call-seq: # int.allbits?(mask) -> true or false # - # Returns +true+ if all bits of +int+ & +mask+ are 1. + # Returns `true` if all bits of ``int` & `mask`` are 1. # def allbits?(mask) (self & mask) == mask @@ -61,7 +76,7 @@ def allbits?(mask) # call-seq: # int.anybits?(mask) -> true or false # - # Returns +true+ if any bits of +int+ & +mask+ are 1. + # Returns `true` if any bits of ``int` & `mask`` are 1. # def anybits?(mask) (self & mask) != 0 @@ -71,18 +86,16 @@ def anybits?(mask) # call-seq: # int.nobits?(mask) -> true or false # - # Returns +true+ if no bits of +int+ & +mask+ are 1. + # Returns `true` if no bits of ``int` & `mask`` are 1. # def nobits?(mask) (self & mask) == 0 end -end -class Integer # call-seq: # ceildiv(other) -> integer # - # Returns the result of division +self+ by +other+. The + # Returns the result of division `self` by `other`. The # result is rounded up to the nearest integer. # # 3.ceildiv(3) # => 1 @@ -96,4 +109,17 @@ class Integer def ceildiv(other) -div(-other) end + + ## + # call-seq: + # int.integer? -> true + # + # Returns true since this is an Integer. + # + # 1.integer? #=> true + # 42.integer? #=> true + # + def integer? + true + end end diff --git a/mrbgems/mruby-numeric-ext/src/numeric_ext.c b/mrbgems/mruby-numeric-ext/src/numeric_ext.c index 5fa4e5fb1d..13159b75df 100644 --- a/mrbgems/mruby-numeric-ext/src/numeric_ext.c +++ b/mrbgems/mruby-numeric-ext/src/numeric_ext.c @@ -1,18 +1,53 @@ #include #include #include +#include +#include #include -#include #ifndef MRB_NO_FLOAT static mrb_value flo_remainder(mrb_state *mrb, mrb_value self); #endif +/* + * call-seq: + * int.bit_length -> integer + * + * Returns the number of bits of the absolute value of self in binary representation. + * For zero, returns 0. For negative integers, behaves as (~self).bit_length + * (e.g., (-1).bit_length => 0, (-2).bit_length => 1). + */ +static mrb_value +int_bit_length(mrb_state *mrb, mrb_value self) +{ +#ifdef MRB_USE_BIGINT + if (mrb_bigint_p(self)) { + mrb_int sign = mrb_bint_sign(mrb, self); + if (sign == 0) return mrb_fixnum_value(0); + mrb_value v = self; + if (sign < 0) v = mrb_bint_rev(mrb, self); /* ~self = -self-1 */ + mrb_value s = mrb_bint_to_s(mrb, v, 2); + return mrb_int_value(mrb, (mrb_int)RSTRING_LEN(s)); + } +#endif + mrb_int x = mrb_integer(self); + if (x == 0) return mrb_fixnum_value(0); + + /* for negative fixnums, use ~x */ + mrb_uint ux = (mrb_uint)(x < 0 ? ~x : x); + mrb_int bits = 0; + while (ux) { + bits++; + ux >>= 1; + } + return mrb_int_value(mrb, bits); +} + /* * call-seq: * num.remainder(numeric) -> real * - * x.remainder(y) means x-y*(x/y).truncate. + * `x.remainder(y)` means `x-y*(x/y).truncate`. * * See Numeric#divmod. */ @@ -46,6 +81,106 @@ int_remainder(mrb_state *mrb, mrb_value x) mrb_value mrb_int_pow(mrb_state *mrb, mrb_value x, mrb_value y); +static mrb_int +mrb_int_gcd(mrb_int x, mrb_int y) +{ + /* Negate via unsigned so MRB_INT_MIN doesn't overflow. + The cast back at the end produces MRB_INT_MIN only when the + true result is 2^63 (gcd of MRB_INT_MIN with itself or 0); + callers detect that case from the negative return value. */ + mrb_uint ux = (x < 0) ? -(mrb_uint)x : (mrb_uint)x; + mrb_uint uy = (y < 0) ? -(mrb_uint)y : (mrb_uint)y; + + while (uy != 0) { + mrb_uint temp = uy; + uy = ux % uy; + ux = temp; + } + + return (mrb_int)ux; +} + +/* + * call-seq: + * int.gcd(other_int) -> integer + * + * Returns the greatest common divisor of the two integers. + * The result is always positive. + */ +static mrb_value +int_gcd(mrb_state *mrb, mrb_value x) +{ + mrb_value y = mrb_get_arg1(mrb); + +#ifdef MRB_USE_BIGINT + if (mrb_bigint_p(x) || mrb_bigint_p(y)) { + if (!mrb_integer_p(y) && !mrb_bigint_p(y)) { + mrb_raisef(mrb, E_TYPE_ERROR, "can't convert %Y into Integer", y); + } + if (!mrb_bigint_p(x)) x = mrb_bint_new_int(mrb, mrb_integer(x)); + if (!mrb_bigint_p(y)) y = mrb_bint_new_int(mrb, mrb_integer(y)); + return mrb_bint_gcd(mrb, x, y); + } +#endif + + if (!mrb_integer_p(y)) { + mrb_raisef(mrb, E_TYPE_ERROR, "can't convert %Y into Integer", y); + } + mrb_int g = mrb_int_gcd(mrb_integer(x), mrb_integer(y)); + /* g < 0 only when the mathematical result is 2^63 (= |MRB_INT_MIN|), + which does not fit in mrb_int. */ + if (g < 0) mrb_int_overflow(mrb, "gcd"); + return mrb_int_value(mrb, g); +} + +/* + * call-seq: + * int.lcm(other_int) -> integer + * + * Returns the least common multiple of the two integers. + * The result is always positive. + */ +static mrb_value +int_lcm(mrb_state *mrb, mrb_value x) +{ + mrb_value y = mrb_get_arg1(mrb); + mrb_int a, b, gcd_val; + +#ifdef MRB_USE_BIGINT + if (mrb_bigint_p(x) || mrb_bigint_p(y)) { + if (!mrb_integer_p(y) && !mrb_bigint_p(y)) { + mrb_raisef(mrb, E_TYPE_ERROR, "can't convert %Y into Integer", y); + } + if (!mrb_bigint_p(x)) x = mrb_bint_new_int(mrb, mrb_integer(x)); + if (!mrb_bigint_p(y)) y = mrb_bint_new_int(mrb, mrb_integer(y)); + return mrb_bint_lcm(mrb, x, y); + } +#endif + + if (!mrb_integer_p(y)) { + mrb_raisef(mrb, E_TYPE_ERROR, "can't convert %Y into Integer", y); + } + + a = mrb_integer(x); + b = mrb_integer(y); + + if (a == 0 || b == 0) return mrb_int_value(mrb, 0); + + /* Negation of MRB_INT_MIN is UB and the lcm with any non-zero + operand would not fit in mrb_int anyway. */ + if (a == MRB_INT_MIN || b == MRB_INT_MIN) mrb_int_overflow(mrb, "lcm"); + + gcd_val = mrb_int_gcd(a, b); + if (a < 0) a = -a; + if (b < 0) b = -b; + + mrb_int lcm_val; + if (mrb_int_mul_overflow(a / gcd_val, b, &lcm_val)) { + mrb_int_overflow(mrb, "lcm"); + } + return mrb_int_value(mrb, lcm_val); +} + /* * call-seq: * integer.pow(numeric) -> numeric @@ -59,29 +194,42 @@ mrb_value mrb_int_pow(mrb_state *mrb, mrb_value x, mrb_value y); static mrb_value int_powm(mrb_state *mrb, mrb_value x) { - mrb_value m; - mrb_int e, exp, mod, result = 1; + mrb_value m, e; + mrb_int exp, mod, result = 1; + mrb_bool neg_mod = FALSE; if (mrb_get_argc(mrb) == 1) { return mrb_int_pow(mrb, x, mrb_get_arg1(mrb)); } - mrb_get_args(mrb, "io", &e, &m); - if (e < 0) mrb_raise(mrb, E_ARGUMENT_ERROR, "int.pow(n,m): n must be positive"); + mrb_get_args(mrb, "oo", &e, &m); + if (!mrb_integer_p(e) && !mrb_bigint_p(e)) { + mrb_raise(mrb, E_TYPE_ERROR, "int.pow(n,m): 2nd argument not allowed unless 1st argument is an integer"); + } #ifdef MRB_USE_BIGINT if (mrb_bigint_p(x)) { return mrb_bint_powm(mrb, x, e, m); } - if (mrb_bigint_p(m)) { + if (mrb_bigint_p(e) || mrb_bigint_p(m)) { return mrb_bint_powm(mrb, mrb_bint_new_int(mrb, mrb_integer(x)), e, m); } #endif + exp = mrb_integer(e); + if (exp < 0) mrb_raise(mrb, E_ARGUMENT_ERROR, "int.pow(n,m): n must be positive"); if (!mrb_integer_p(m)) mrb_raise(mrb, E_TYPE_ERROR, "int.pow(n,m): m must be integer"); mod = mrb_integer(m); - if (mod < 0) mrb_raise(mrb, E_ARGUMENT_ERROR, "int.pow(n,m): m must be positive when 2nd argument specified"); if (mod == 0) mrb_int_zerodiv(mrb); + if (mod < 0) { + neg_mod = TRUE; + mod = -mod; + } if (mod == 1) return mrb_fixnum_value(0); + + /* Early return for zero base with positive exponent */ mrb_int base = mrb_integer(x); - exp = e; + if (base == 0 && exp > 0) { + return mrb_fixnum_value(0); + } + for (;;) { mrb_int tmp; if (exp & 1) { @@ -111,6 +259,12 @@ int_powm(mrb_state *mrb, mrb_value x) } base = tmp % mod; } + + /* Apply signed modulo adjustment for negative modulus */ + /* Ruby: result + m for non-zero result when m is negative */ + if (neg_mod && result != 0) { + result = result - mod; /* result - |m| = result + m (since m is negative) */ + } return mrb_int_value(mrb, result); } @@ -118,15 +272,15 @@ int_powm(mrb_state *mrb, mrb_value x) * call-seq: * digits(base = 10) -> array_of_integers * - * Returns an array of integers representing the +base+-radix - * digits of +self+; + * Returns an array of integers representing the `base`-radix + * digits of `self`; * the first element of the array represents the least significant digit: * * 12345.digits # => [5, 4, 3, 2, 1] * 12345.digits(7) # => [4, 6, 6, 0, 5] * 12345.digits(100) # => [45, 23, 1] * - * Raises an exception if +self+ is negative or +base+ is less than 2. + * Raises an exception if `self` is negative or `base` is less than 2. * */ @@ -158,8 +312,7 @@ int_digits(mrb_state *mrb, mrb_value self) } while (mrb_bint_cmp(mrb, x, zero) > 0) { - mrb_value q = mrb_bint_mod(mrb, x, bv); - mrb_ary_push(mrb, digits, q); + mrb_ary_push(mrb, digits, mrb_bint_mod(mrb, x, bv)); x = mrb_bint_div(mrb, x, bv); if (!mrb_bigint_p(x)) { mrb_int n = mrb_integer(x); @@ -194,7 +347,75 @@ int_digits(mrb_state *mrb, mrb_value self) return digits; } +/* + * call-seq: + * int.size -> int + * + * Returns the number of bytes in the machine representation of int + * (machine dependent). + * + * 1.size #=> 8 + * -1.size #=> 8 + * 2147483647.size #=> 8 + * (256**10 - 1).size #=> 12 + * (256**20 - 1).size #=> 20 + * (256**40 - 1).size #=> 40 + */ + +static mrb_value +int_size(mrb_state *mrb, mrb_value self) +{ + size_t size = sizeof(mrb_int); +#ifdef MRB_USE_BIGINT + if (mrb_bigint_p(self)) { + size = mrb_bint_memsize(self); + } +#endif + return mrb_fixnum_value((mrb_int)size); +} + +/* + * call-seq: + * int.even? -> true or false + * + * Returns `true` if `int` is an even number. + */ +static mrb_value +int_even(mrb_state *mrb, mrb_value self) +{ +#ifdef MRB_USE_BIGINT + if (mrb_bigint_p(self)) { + mrb_value and1 = mrb_bint_and(mrb, self, mrb_fixnum_value(1)); + if (mrb_integer(and1) == 0) return mrb_true_value(); + return mrb_false_value(); + } +#endif + return mrb_bool_value(mrb_integer(self) % 2 == 0); +} + +/* + * call-seq: + * int.odd? -> true or false + * + * Returns `true` if `int` is an odd number. + */ +static mrb_value +int_odd(mrb_state *mrb, mrb_value self) +{ + mrb_value even = int_even(mrb, self); + mrb_bool odd = !mrb_test(even); + return mrb_bool_value(odd); +} + #ifndef MRB_NO_FLOAT +/* + * call-seq: + * num.remainder(numeric) -> real + * + * `x.remainder(y)` means `x-y*(x/y).truncate`. + * + * See Numeric#divmod. + */ static mrb_value flo_remainder(mrb_state *mrb, mrb_value self) { @@ -208,33 +429,102 @@ flo_remainder(mrb_state *mrb, mrb_value self) } #endif +/* + * Integer square root implementation using the Babylonian method. + * This is an efficient integer-only algorithm to find the largest + * integer `x` such that `x*x <= n`. + */ +static mrb_int +isqrt(mrb_int n) +{ + mrb_assert(n >= 0); + if (n < 2) return n; + + mrb_int x = n; + mrb_int y = (x + 1) / 2; + + // Babylonian method (integer version) + while (y < x) { + x = y; + y = (x + n / x) / 2; + } + + return x; +} + +/* + * call-seq: + * Integer.sqrt(n) -> integer + * + * Returns the integer square root of the non-negative integer `n`, + * which is the largest integer `i` such that `i*i <= n`. + * + * Integer.sqrt(0) # => 0 + * Integer.sqrt(1) # => 1 + * Integer.sqrt(24) # => 4 + * Integer.sqrt(25) # => 5 + * Integer.sqrt(10**40) # => 10**20 + */ +static mrb_value +int_sqrt(mrb_state *mrb, mrb_value self) +{ + mrb_value arg = mrb_get_arg1(mrb); + + if (mrb_integer_p(arg)) { + mrb_int n = mrb_integer(arg); + if (n < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "non-negative integer required"); + } + return mrb_int_value(mrb, isqrt(n)); + } +#ifdef MRB_USE_BIGINT + else if (mrb_bigint_p(arg)) { + return mrb_bint_sqrt(mrb, arg); + } +#endif + else { + mrb_raise(mrb, E_TYPE_ERROR, "expected Integer"); + } +} + +static const mrb_mt_entry integer_ext_rom_entries[] = { + MRB_MT_ENTRY(int_remainder, MRB_SYM(remainder), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(int_powm, MRB_SYM(pow), MRB_ARGS_ARG(1,1)), + MRB_MT_ENTRY(int_digits, MRB_SYM(digits), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(int_size, MRB_SYM(size), MRB_ARGS_NONE()), + MRB_MT_ENTRY(int_bit_length, MRB_SYM(bit_length), MRB_ARGS_NONE()), + MRB_MT_ENTRY(int_odd, MRB_SYM_Q(odd), MRB_ARGS_NONE()), + MRB_MT_ENTRY(int_even, MRB_SYM_Q(even), MRB_ARGS_NONE()), + MRB_MT_ENTRY(int_gcd, MRB_SYM(gcd), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(int_lcm, MRB_SYM(lcm), MRB_ARGS_REQ(1)), +}; + + void mrb_mruby_numeric_ext_gem_init(mrb_state* mrb) { - struct RClass *i = mrb->integer_class; - - mrb_define_alias(mrb, i, "modulo", "%"); - mrb_define_method(mrb, i, "remainder", int_remainder, MRB_ARGS_REQ(1)); + struct RClass *ic = mrb->integer_class; - mrb_define_method_id(mrb, i, MRB_SYM(pow), int_powm, MRB_ARGS_ARG(1,1)); - mrb_define_method_id(mrb, i, MRB_SYM(digits), int_digits, MRB_ARGS_OPT(1)); + mrb_define_alias_id(mrb, ic, MRB_SYM(modulo), MRB_OPSYM(mod)); + MRB_MT_INIT_ROM(mrb, ic, integer_ext_rom_entries); + mrb_define_class_method_id(mrb, ic, MRB_SYM(sqrt), int_sqrt, MRB_ARGS_REQ(1)); #ifndef MRB_NO_FLOAT - struct RClass *f = mrb->float_class; - - mrb_define_alias(mrb, f, "modulo", "%"); - mrb_define_method(mrb, f, "remainder", flo_remainder, MRB_ARGS_REQ(1)); - - mrb_define_const_id(mrb, mrb->float_class, MRB_SYM(RADIX), mrb_fixnum_value(MRB_FLT_RADIX)); - mrb_define_const_id(mrb, mrb->float_class, MRB_SYM(MANT_DIG), mrb_fixnum_value(MRB_FLT_MANT_DIG)); - mrb_define_const_id(mrb, mrb->float_class, MRB_SYM(EPSILON), mrb_float_value(mrb, MRB_FLT_EPSILON)); - mrb_define_const_id(mrb, mrb->float_class, MRB_SYM(DIG), mrb_fixnum_value(MRB_FLT_DIG)); - mrb_define_const_id(mrb, mrb->float_class, MRB_SYM(MIN_EXP), mrb_fixnum_value(MRB_FLT_MIN_EXP)); - mrb_define_const_id(mrb, mrb->float_class, MRB_SYM(MIN), mrb_float_value(mrb, MRB_FLT_MIN)); - mrb_define_const_id(mrb, mrb->float_class, MRB_SYM(MIN_10_EXP), mrb_fixnum_value(MRB_FLT_MIN_10_EXP)); - mrb_define_const_id(mrb, mrb->float_class, MRB_SYM(MAX_EXP), mrb_fixnum_value(MRB_FLT_MAX_EXP)); - mrb_define_const_id(mrb, mrb->float_class, MRB_SYM(MAX), mrb_float_value(mrb, MRB_FLT_MAX)); - mrb_define_const_id(mrb, mrb->float_class, MRB_SYM(MAX_10_EXP), mrb_fixnum_value(MRB_FLT_MAX_10_EXP)); + struct RClass *fc = mrb->float_class; + + mrb_define_alias_id(mrb, fc, MRB_SYM(modulo), MRB_OPSYM(mod)); + mrb_define_method_id(mrb, fc, MRB_SYM(remainder), flo_remainder, MRB_ARGS_REQ(1)); + + mrb_define_const_id(mrb, fc, MRB_SYM(RADIX), mrb_fixnum_value(MRB_FLT_RADIX)); + mrb_define_const_id(mrb, fc, MRB_SYM(MANT_DIG), mrb_fixnum_value(MRB_FLT_MANT_DIG)); + mrb_define_const_id(mrb, fc, MRB_SYM(EPSILON), mrb_float_value(mrb, MRB_FLT_EPSILON)); + mrb_define_const_id(mrb, fc, MRB_SYM(DIG), mrb_fixnum_value(MRB_FLT_DIG)); + mrb_define_const_id(mrb, fc, MRB_SYM(MIN_EXP), mrb_fixnum_value(MRB_FLT_MIN_EXP)); + mrb_define_const_id(mrb, fc, MRB_SYM(MIN), mrb_float_value(mrb, MRB_FLT_MIN)); + mrb_define_const_id(mrb, fc, MRB_SYM(MIN_10_EXP), mrb_fixnum_value(MRB_FLT_MIN_10_EXP)); + mrb_define_const_id(mrb, fc, MRB_SYM(MAX_EXP), mrb_fixnum_value(MRB_FLT_MAX_EXP)); + mrb_define_const_id(mrb, fc, MRB_SYM(MAX), mrb_float_value(mrb, MRB_FLT_MAX)); + mrb_define_const_id(mrb, fc, MRB_SYM(MAX_10_EXP), mrb_fixnum_value(MRB_FLT_MAX_10_EXP)); #endif /* MRB_NO_FLOAT */ } diff --git a/mrbgems/mruby-numeric-ext/test/numeric.rb b/mrbgems/mruby-numeric-ext/test/numeric.rb index 0da071f264..08eb4e9d0f 100644 --- a/mrbgems/mruby-numeric-ext/test/numeric.rb +++ b/mrbgems/mruby-numeric-ext/test/numeric.rb @@ -26,6 +26,30 @@ assert_equal(361, 9.pow(1024,1000)) end +assert('Integer#gcd') do + assert_equal(1, 2.gcd(3)) + assert_equal(5, 10.gcd(15)) + assert_equal(6, 24.gcd(18)) + assert_equal(7, 7.gcd(0)) + assert_equal(7, 0.gcd(7)) + assert_equal(0, 0.gcd(0)) + assert_equal(5, (-10).gcd(15)) + assert_equal(5, 10.gcd(-15)) + assert_equal(5, (-10).gcd(-15)) +end + +assert('Integer#lcm') do + assert_equal(6, 2.lcm(3)) + assert_equal(30, 10.lcm(15)) + assert_equal(72, 24.lcm(18)) + assert_equal(0, 7.lcm(0)) + assert_equal(0, 0.lcm(7)) + assert_equal(0, 0.lcm(0)) + assert_equal(30, (-10).lcm(15)) + assert_equal(30, 10.lcm(-15)) + assert_equal(30, (-10).lcm(-15)) +end + assert('Integer#ceildiv') do assert_equal(0, 0.ceildiv(3)) assert_equal(1, 1.ceildiv(3)) @@ -49,3 +73,65 @@ assert_equal(-8, (-2).pow(3)) # assert_equal(361, 9.pow(1024,1000)) end + +assert('Integer#even?') do + assert_true(0.even?) + assert_true(2.even?) + assert_true(-2.even?) + assert_false(1.even?) + assert_false(-1.even?) + + # assert_true((10**100).even?) + # assert_true((-10**100).even?) + # assert_false((10**100+1).even?) + # assert_false((-10**100-1).even?) +end + +assert('Integer#odd?') do + assert_false(0.odd?) + assert_false(2.odd?) + assert_false(-2.odd?) + assert_true(1.odd?) + assert_true(-1.odd?) + + # assert_false((10**100).odd?) + # assert_false((-10**100).odd?) + # assert_true((10**100+1).odd?) + # assert_true((-10**100-1).odd?) +end + +assert('Integer#digits') do + assert_equal([5, 4, 3, 2, 1], 12345.digits) + assert_equal([4, 6, 6, 0, 5], 12345.digits(7)) + assert_equal([45, 23, 1], 12345.digits(100)) +end + +assert('Integer.sqrt') do + assert_equal(4, Integer.sqrt(16)) + assert_equal(10, Integer.sqrt(100)) + assert_equal(85, Integer.sqrt(7244)) +end + +assert('Integer#bit_length') do + # zero + assert_equal 0, 0.bit_length + + # positives + assert_equal 1, 1.bit_length + assert_equal 2, 2.bit_length + assert_equal 2, 3.bit_length + assert_equal 3, 4.bit_length + assert_equal 3, 5.bit_length + + # negatives (use ~n semantics) + assert_equal 0, (-1).bit_length + assert_equal 1, (-2).bit_length + assert_equal 2, (-3).bit_length + assert_equal 2, (-4).bit_length + assert_equal 3, (-5).bit_length + + # bigint cases may be enabled depending on config + # assert_equal 100, (2**100 - 1).bit_length + # assert_equal 101, (2**100).bit_length + # assert_equal 0, (-1).bit_length +end diff --git a/mrbgems/mruby-object-ext/README.md b/mrbgems/mruby-object-ext/README.md new file mode 100644 index 0000000000..078e4741d2 --- /dev/null +++ b/mrbgems/mruby-object-ext/README.md @@ -0,0 +1,144 @@ +# mruby-object-ext + +This mrbgem provides several extension methods to core objects in mruby, enhancing their functionality and providing useful utilities for common programming patterns. + +## Core Extension Methods + +This gem extends core mruby objects with the following methods: + +### `Kernel#yield_self` (aliased as `then`) + +Yields the receiver (self) to the given block and returns the result of the block. This is useful for chaining operations in a readable way. + +**Signature:** + +```ruby +obj.yield_self {|current_obj| ... } > an_object +obj.then {|current_obj| ... } -> an_object +``` + +**Example:** + +```ruby +result = "hello" + .yield_self {|s| s.upcase } + .then {|s| s + " WORLD" } + +puts result #=> "HELLO WORLD" + +# Another example with a different data type +(1..5) + .yield_self {|range| range.to_a } + .then {|arr| arr.select(&:even?) } + .then {|evens| evens.map {|n| n * n } } + .then {|squares| puts squares.inspect } #=> [4, 16] +``` + +### `Kernel#tap` + +Yields the receiver (self) to the given block and then returns the receiver itself. This method is primarily used for "tapping into" a method chain to perform operations on intermediate results without affecting the final result of the chain. + +**Signature:** + +```ruby +obj.tap {|current_obj| ... } -> obj +``` + +**Example:** + +```ruby +(1..10) + .to_a + .tap {|arr| puts "Array: #{arr.inspect}" } + .select(&:even?) + .tap {|evens| puts "Evens: #{evens.inspect}" } + .map {|n| n * n } + .tap {|squares| puts "Squares: #{squares.inspect}" } +# Output: +# Array: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +# Evens: [2, 4, 6, 8, 10] +# Squares: [4, 16, 36, 64, 100] +``` + +### `Kernel#itself` + +Returns the receiver (self). While simple, it can be useful in certain chaining or meta-programming scenarios. + +**Signature:** + +```ruby +obj.itself -> obj +``` + +**Example:** + +```ruby +string = "my string" +puts string.itself.object_id == string.object_id #=> true + +# Can be useful with methods expecting a callable +data = { a: 1, b: 2 } +transformed_data = data.transform_values(&:itself) # No change if values are already what you want +puts transformed_data #=> {:a=>1, :b=>2} (in mruby hash output format) +``` + +### `BasicObject#instance_exec` + +Executes the given block within the context of the receiver (obj). This means that inside the block, `self` is set to `obj`, giving the code access to `obj`'s instance variables and private methods. Arguments passed to `instance_exec` are passed as block parameters. + +**Signature:** + +```ruby +obj.instance_exec(arg...) {|var...| block } -> result_of_block +``` + +**Example:** + +```ruby +class MyClass + def initialize(value) + @secret = value + end + + def reveal_secret(multiplier, &block) + # self here is MyClass instance + instance_exec(multiplier, &block) + end +end + +instance = MyClass.new(10) +result = instance.reveal_secret(5) {|m| @secret * m } +puts result #=> 50 + +# Example without a class context +num = 42 +num.instance_exec("Number is: ") {|prefix| puts prefix + self.to_s } #=> Number is: 42 +``` + +## `NilClass` Extensions + +This gem also adds convenient type conversion methods to `NilClass`: + +- **`nil.to_a`**: Returns an empty Array (`[]`). + + ```ruby + p nil.to_a #=> [] + ``` + +- **`nil.to_f`**: Returns `0.0`. + + ```ruby + p nil.to_f #=> 0.0 + ``` + +- **`nil.to_h`**: Returns an empty Hash (`{}`). + + ```ruby + p nil.to_h #=> {} + ``` + +- **`nil.to_i`**: Returns `0`. + + ```ruby + p nil.to_i #=> 0 + ``` diff --git a/mrbgems/mruby-object-ext/mrblib/object.rb b/mrbgems/mruby-object-ext/mrblib/object.rb index f014df469f..065fb22aef 100644 --- a/mrbgems/mruby-object-ext/mrblib/object.rb +++ b/mrbgems/mruby-object-ext/mrblib/object.rb @@ -3,12 +3,12 @@ module Kernel # obj.yield_self {|_obj|...} -> an_object # obj.then {|_obj|...} -> an_object # - # Yields obj and returns the result. + # Yields *obj* and returns the result. # # 'my string'.yield_self {|s|s.upcase} #=> "MY STRING" # def yield_self(&block) - return to_enum :yield_self unless block + return to_enum(:yield_self) unless block block.call(self) end alias then yield_self @@ -17,7 +17,7 @@ def yield_self(&block) # call-seq: # obj.tap{|x|...} -> obj # - # Yields x to the block, and then returns x. + # Yields `x` to the block, and then returns `x`. # The primary purpose of this method is to "tap into" a method chain, # in order to perform operations on intermediate results within the chain. # diff --git a/mrbgems/mruby-object-ext/src/object.c b/mrbgems/mruby-object-ext/src/object.c index 69f1763cef..7ffd8215f0 100644 --- a/mrbgems/mruby-object-ext/src/object.c +++ b/mrbgems/mruby-object-ext/src/object.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include /* * call-seq: @@ -60,27 +60,24 @@ nil_to_i(mrb_state *mrb, mrb_value obj) } /* + * Document-method: Kernel#itself + * * call-seq: * obj.itself -> an_object * - * Returns obj. + * Returns *obj*. * * string = 'my string' #=> "my string" * string.itself.object_id == string.object_id #=> true * */ -static mrb_value -mrb_f_itself(mrb_state *mrb, mrb_value self) -{ - return self; -} /* * call-seq: * obj.instance_exec(arg...) {|var...| block } -> obj * * Executes the given block within the context of the receiver - * (_obj_). In order to set the context, the variable +self+ is set + * (_obj_). In order to set the context, the variable `self` is set * to _obj_ while the code is executing, giving the code access to * _obj_'s instance variables. Arguments are passed as block parameters. * @@ -94,37 +91,34 @@ mrb_f_itself(mrb_state *mrb, mrb_value self) */ static mrb_value -mrb_obj_instance_exec(mrb_state *mrb, mrb_value self) +obj_instance_exec(mrb_state *mrb, mrb_value self) { - const mrb_value *argv; - mrb_int argc; - mrb_value blk; - struct RClass *c; - - mrb_get_args(mrb, "*&!", &argv, &argc, &blk); - c = mrb_singleton_class_ptr(mrb, self); - if (mrb->c->ci->cci > 0) { - return mrb_yield_with_class(mrb, blk, argc, argv, self, c); - } - mrb_vm_ci_target_class_set(mrb->c->ci, c); - return mrb_yield_cont(mrb, blk, self, argc, argv); + return mrb_object_exec(mrb, self, mrb_singleton_class_ptr(mrb, self)); } +static const mrb_mt_entry nil_ext_rom_entries[] = { + MRB_MT_ENTRY(nil_to_a, MRB_SYM(to_a), MRB_ARGS_NONE()), + MRB_MT_ENTRY(nil_to_h, MRB_SYM(to_h), MRB_ARGS_NONE()), + MRB_MT_ENTRY(nil_to_i, MRB_SYM(to_i), MRB_ARGS_NONE()), +#ifndef MRB_NO_FLOAT + MRB_MT_ENTRY(nil_to_f, MRB_SYM(to_f), MRB_ARGS_NONE()), +#endif +}; + +static const mrb_mt_entry bob_ext_rom_entries[] = { + MRB_MT_ENTRY(obj_instance_exec, MRB_SYM(instance_exec), MRB_ARGS_ANY()|MRB_ARGS_BLOCK()), +}; + void mrb_mruby_object_ext_gem_init(mrb_state* mrb) { struct RClass * n = mrb->nil_class; - mrb_define_method(mrb, n, "to_a", nil_to_a, MRB_ARGS_NONE()); -#ifndef MRB_NO_FLOAT - mrb_define_method(mrb, n, "to_f", nil_to_f, MRB_ARGS_NONE()); -#endif - mrb_define_method(mrb, n, "to_h", nil_to_h, MRB_ARGS_NONE()); - mrb_define_method(mrb, n, "to_i", nil_to_i, MRB_ARGS_NONE()); + MRB_MT_INIT_ROM(mrb, n, nil_ext_rom_entries); - mrb_define_method(mrb, mrb->kernel_module, "itself", mrb_f_itself, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, mrb->kernel_module, MRB_SYM(itself), mrb_obj_itself, MRB_ARGS_NONE()); - mrb_define_method(mrb, mrb_class_get_id(mrb, MRB_SYM(BasicObject)), "instance_exec", mrb_obj_instance_exec, MRB_ARGS_ANY() | MRB_ARGS_BLOCK()); + MRB_MT_INIT_ROM(mrb, mrb_class_get_id(mrb, MRB_SYM(BasicObject)), bob_ext_rom_entries); } void diff --git a/mrbgems/mruby-object-ext/test/object.rb b/mrbgems/mruby-object-ext/test/object.rb index f0742f8ce4..3b7dd9c4a0 100644 --- a/mrbgems/mruby-object-ext/test/object.rb +++ b/mrbgems/mruby-object-ext/test/object.rb @@ -35,7 +35,7 @@ class B assert_kind_of Class, A::B ensure - Object.remove_const :A + Object.__send__(:remove_const,:A) end begin @@ -48,6 +48,20 @@ module B assert_kind_of Module, A::B ensure - Object.remove_const :A + Object.__send__(:remove_const,:A) end end + +assert('argument forwarding via instance_exec') do + assert_equal [[], {}, nil], instance_exec { |*args, **kw, &blk| [args, kw, blk] } + assert_equal [[1, 2, 3], {}, nil], instance_exec(1, 2, 3) { |*args, **kw, &blk| [args, kw, blk] } + assert_equal [[], { a: 1 }, nil], instance_exec(a: 1) { |*args, **kw, &blk| [args, kw, blk] } +end + +assert('argument forwarding via instance_exec from c') do + assert_equal [[], {}, nil], instance_exec_from_c { |*args, **kw, &blk| [args, kw, blk] } + assert_equal [[1, 2, 3], {}, nil], instance_exec_from_c(1, 2, 3) { |*args, **kw, &blk| [args, kw, blk] } + + # currently there is no easy way to call a method from C passing keyword arguments + #assert_equal [[], { a: 1 }, nil], instance_exec_from_c(a: 1) { |*args, **kw, &blk| [args, kw, blk] } +end diff --git a/mrbgems/mruby-object-ext/test/object_ext.c b/mrbgems/mruby-object-ext/test/object_ext.c new file mode 100644 index 0000000000..46274541d7 --- /dev/null +++ b/mrbgems/mruby-object-ext/test/object_ext.c @@ -0,0 +1,17 @@ +#include + +static mrb_value +obj_instance_exec_from_c(mrb_state *mrb, mrb_value self) +{ + mrb_int argc; + const mrb_value *argv; + mrb_value blk; + mrb_get_args(mrb, "*&!", &argv, &argc, &blk); + return mrb_funcall_with_block(mrb, self, mrb_intern_lit(mrb, "instance_exec"), argc, argv, blk); +} + +void +mrb_mruby_object_ext_gem_test(mrb_state *mrb) +{ + mrb_define_method(mrb, mrb->kernel_module, "instance_exec_from_c", obj_instance_exec_from_c, MRB_ARGS_ANY()); +} diff --git a/mrbgems/mruby-objectspace/README.md b/mrbgems/mruby-objectspace/README.md new file mode 100644 index 0000000000..f5b52dacc6 --- /dev/null +++ b/mrbgems/mruby-objectspace/README.md @@ -0,0 +1,59 @@ +# mruby-objectspace + +This mrbgem provides the `ObjectSpace` module for mruby, allowing introspection of live objects within the mruby environment. + +## Purpose + +The `ObjectSpace` module offers methods to count and iterate over objects currently allocated by the mruby interpreter. This can be useful for debugging, memory profiling, or understanding the internal state of your mruby application. + +## Functionality + +### `ObjectSpace.count_objects([result_hash]) -> Hash` + +Counts all objects currently alive in the mruby process, categorized by their types. + +- **`result_hash` (optional `Hash`):** If provided, this hash will be cleared and populated with the results. This can be useful to avoid allocating a new hash during sensitive operations like memory profiling, potentially reducing probe effect. + +- **Returns:** A `Hash` where keys are symbols representing object types (e.g., `:T_OBJECT`, `:T_CLASS`, `:T_STRING`) or, in some cases, internal type integers. The values are the counts of objects for each type. The hash also includes special keys: + - `:TOTAL`: The total number of objects (including free slots). + - `:FREE`: The number of free (unallocated) object slots. + +**Example of the returned hash structure:** + +```ruby +{ + :TOTAL=>10000, + :FREE=>3011, + :T_OBJECT=>6, + :T_CLASS=>404, + :T_STRING=>500, + # ... other types +} +``` + +### `ObjectSpace.each_object([module]) {|obj| ... } -> Fixnum` + +Iterates over each live object in the mruby process, calling the provided block once for each object. + +- **`module` (optional `Module` or `Class`):** If this argument is provided, the block will only be called for objects that are instances of the given `module` or one of its subclasses. + +- **Block (`{|obj| ... }`):** A block that will be executed with each object (`obj`) found. + +- **Returns:** A `Fixnum` representing the total number of objects iterated over. + +**Example Usage:** + +```ruby +# Iterate over all objects +total_objects = ObjectSpace.each_object do |obj| + # Do something with obj + p obj +end +puts "Total objects found: #{total_objects}" + +# Iterate only over Array objects +array_count = ObjectSpace.each_object(Array) do |arr| + p arr +end +puts "Found #{array_count} Arrays." +``` diff --git a/mrbgems/mruby-objectspace/src/mruby_objectspace.c b/mrbgems/mruby-objectspace/src/mruby_objectspace.c index 4c7708111d..eeb23cc020 100644 --- a/mrbgems/mruby-objectspace/src/mruby_objectspace.c +++ b/mrbgems/mruby-objectspace/src/mruby_objectspace.c @@ -10,7 +10,6 @@ #include #include #include -#include struct os_count_struct { mrb_int total; @@ -50,7 +49,7 @@ os_count_object_type(mrb_state *mrb, struct RBasic *obj, void *data) * # ... * } * - * If the optional argument +result_hash+ is given, + * If the optional argument `result_hash` is given, * it is overwritten and returned. This is intended to avoid probe effect. * */ @@ -157,9 +156,9 @@ os_each_object_cb(mrb_state *mrb, struct RBasic *obj, void *ud) * * Calls the block once for each object in this Ruby process. * Returns the number of objects found. - * If the optional argument +module+ is given, + * If the optional argument `module` is given, * calls the block for only those classes or modules - * that match (or are a subclass of) +module+. + * that match (or are a subclass of) `module`. * * If no block is given, ArgumentError is raised. * diff --git a/mrbgems/mruby-os-memsize/README.md b/mrbgems/mruby-os-memsize/README.md new file mode 100644 index 0000000000..766ca694ec --- /dev/null +++ b/mrbgems/mruby-os-memsize/README.md @@ -0,0 +1,75 @@ +# mruby-os-memsize + +## Description + +The `mruby-os-memsize` mrbgem extends the `ObjectSpace` module in mruby, providing tools to inspect the approximate amount of heap memory allocated for objects. This can be useful for debugging memory usage and understanding the memory footprint of your mruby application. + +## Usage / API + +This gem adds two class methods to the `ObjectSpace` module: + +### `ObjectSpace.memsize_of(obj) -> Numeric` + +Returns the approximate amount of heap memory allocated for the given `obj` in bytes. + +- The returned value is platform-dependent, as it's based on the `size_t` type of the underlying C implementation. +- Immediate values (e.g., integers, booleans, symbols) and some small, embedded objects (like short strings or small arrays that fit directly into an object pointer) will typically report a size of 0 or a very small, fixed size representing the object pointer itself. +- The accuracy of the reported size can vary depending on the object's type and internal structure. + +**Example:** + +```ruby +str = "This is a test string" +array = [1, 2, 3, 4, 5] + +puts "Size of string: #{ObjectSpace.memsize_of(str)} bytes" +puts "Size of array: #{ObjectSpace.memsize_of(array)} bytes" +puts "Size of 123: #{ObjectSpace.memsize_of(123)}" # Likely 0 +puts "Size of :symbol: #{ObjectSpace.memsize_of(:symbol)}" # Likely 0 + +class MyClass + def initialize + @data = "some internal data" + end +end +instance = MyClass.new +puts "Size of MyClass instance: #{ObjectSpace.memsize_of(instance)}" +``` + +### `ObjectSpace.memsize_of_all([klass]) -> Numeric` + +Returns the total approximate heap memory allocated for all living objects in the mruby environment. + +- If an optional `klass` argument (a Class object) is provided, it returns the total memory size only for instances of that specific class. + +**Example:** + +```ruby +# Get total memory size of all objects +total_memory = ObjectSpace.memsize_of_all +puts "Total heap memory used by all objects: #{total_memory} bytes" + +# Get total memory size for all String objects +total_string_memory = ObjectSpace.memsize_of_all(String) +puts "Total heap memory used by Strings: #{total_string_memory} bytes" + +class Person + def initialize(name) + @name = name + end +end + +p1 = Person.new("Alice") +p2 = Person.new("Bob") + +total_person_memory = ObjectSpace.memsize_of_all(Person) +puts "Total heap memory used by Person instances: #{total_person_memory} bytes" +``` + +## License + +This mrbgem is released under the MIT License. (See `mrbgem.rake` for details within the mruby distribution). + +## Author + +mruby developers diff --git a/mrbgems/mruby-os-memsize/mrbgem.rake b/mrbgems/mruby-os-memsize/mrbgem.rake index b5c163bba6..8bd04d9515 100644 --- a/mrbgems/mruby-os-memsize/mrbgem.rake +++ b/mrbgems/mruby-os-memsize/mrbgem.rake @@ -3,8 +3,8 @@ MRuby::Gem::Specification.new('mruby-os-memsize') do |spec| spec.author = 'mruby developers' spec.summary = 'ObjectSpace memsize_of method' - spec.add_dependency('mruby-objectspace') - spec.add_test_dependency('mruby-metaprog') - spec.add_test_dependency('mruby-method') - spec.add_test_dependency('mruby-fiber') + spec.add_dependency('mruby-objectspace', :core => 'mruby-objectspace') + spec.add_test_dependency('mruby-metaprog', :core => 'mruby-metaprog') + spec.add_test_dependency('mruby-method', :core => 'mruby-method') + spec.add_test_dependency('mruby-fiber', :core => 'mruby-fiber') end diff --git a/mrbgems/mruby-os-memsize/src/memsize.c b/mrbgems/mruby-os-memsize/src/memsize.c index c35a380064..5420dbde4f 100644 --- a/mrbgems/mruby-os-memsize/src/memsize.c +++ b/mrbgems/mruby-os-memsize/src/memsize.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -15,14 +16,23 @@ static size_t os_memsize_of_irep(mrb_state* state, const struct mrb_irep *irep) { - size_t size; - int i; + size_t size = (irep->slen * sizeof(mrb_sym)) + + (irep->plen * sizeof(mrb_irep_pool)) + + (irep->ilen * sizeof(mrb_code)) + + (irep->rlen * sizeof(struct mrb_irep*)); - size = (irep->slen * sizeof(mrb_sym)) + - (irep->plen * sizeof(mrb_code)) + - (irep->ilen * sizeof(mrb_code)); + for (int i = 0; i < irep->plen; i++) { + const mrb_irep_pool *p = &irep->pool[i]; + if ((p->tt & IREP_TT_NFLAG) == 0) { /* string pool value */ + size += (p->tt>>2); + } + else if (p->tt == IREP_TT_BIGINT) { /* bigint pool value */ + size += p->u.str[0]; + } + } - for(i = 0; i < irep->rlen; i++) { + for (int i = 0; i < irep->rlen; i++) { + size += sizeof(struct mrb_irep); /* size of irep structure */ size += os_memsize_of_irep(state, irep->reps[i]); } return size; @@ -31,13 +41,12 @@ os_memsize_of_irep(mrb_state* state, const struct mrb_irep *irep) static size_t os_memsize_of_method(mrb_state* mrb, mrb_value method_obj) { - size_t size; mrb_value proc_value = mrb_obj_iv_get(mrb, mrb_obj_ptr(method_obj), mrb_intern_lit(mrb, "_proc")); if (mrb_nil_p(proc_value)) return 0; struct RProc *proc = mrb_proc_ptr(proc_value); - size = sizeof(struct RProc); + size_t size = sizeof(struct RProc); if (!MRB_PROC_CFUNC_P(proc)) size += os_memsize_of_irep(mrb, proc->body.irep); return size; } @@ -72,7 +81,7 @@ os_memsize_of_object(mrb_state* mrb, mrb_value obj) case MRB_TT_MODULE: case MRB_TT_SCLASS: case MRB_TT_ICLASS: - size += mrb_gc_mark_mt_size(mrb, mrb_class_ptr(obj)) * sizeof(mrb_method_t); + size += mrb_class_mt_memsize(mrb, mrb_class_ptr(obj)); /* fall through */ case MRB_TT_EXCEPTION: case MRB_TT_OBJECT: { @@ -95,14 +104,16 @@ os_memsize_of_object(mrb_state* mrb, mrb_value obj) /* Arrays that do not fit within an RArray perform a heap allocation * storing an array of pointers to the original objects*/ size += mrb_objspace_page_slot_size(); - if(len > MRB_ARY_EMBED_LEN_MAX) size += sizeof(mrb_value *) * len; + if (len > MRB_ARY_EMBED_LEN_MAX) + size += sizeof(mrb_value*) * len; break; } case MRB_TT_PROC: { struct RProc* proc = mrb_proc_ptr(obj); size += mrb_objspace_page_slot_size(); size += MRB_ENV_LEN(proc->e.env) * sizeof(mrb_value); - if(!MRB_PROC_CFUNC_P(proc)) size += os_memsize_of_irep(mrb, proc->body.irep); + if (!MRB_PROC_CFUNC_P(proc)) + size += os_memsize_of_irep(mrb, proc->body.irep); break; } case MRB_TT_RANGE: @@ -112,7 +123,7 @@ os_memsize_of_object(mrb_state* mrb, mrb_value obj) #endif break; case MRB_TT_FIBER: { - struct RFiber* f = (struct RFiber *)mrb_ptr(obj); + struct RFiber* f = (struct RFiber*)mrb_ptr(obj); ptrdiff_t stack_size = f->cxt->stend - f->cxt->stbase; ptrdiff_t ci_size = f->cxt->ciend - f->cxt->cibase; @@ -145,15 +156,23 @@ os_memsize_of_object(mrb_state* mrb, mrb_value obj) size += mrb_objspace_page_slot_size(); #endif break; +#if defined(MRB_USE_SET) + case MRB_TT_SET: + size += mrb_set_memsize(obj); + break; +#endif case MRB_TT_BIGINT: #if defined(MRB_USE_BIGINT) size += mrb_bint_memsize(obj); /* fall through */ #endif - case MRB_TT_DATA: + case MRB_TT_CDATA: case MRB_TT_ISTRUCT: size += mrb_objspace_page_slot_size(); break; + case MRB_TT_BACKTRACE: + size += ((struct RBacktrace*)mrb_obj_ptr(obj))->len * sizeof(struct mrb_backtrace_location); + break; /* zero heap size types. * immediate VM stack values, contained within mrb_state, or on C stack */ case MRB_TT_TRUE: @@ -191,12 +210,9 @@ os_memsize_of_object(mrb_state* mrb, mrb_value obj) static mrb_value os_memsize_of(mrb_state *mrb, mrb_value self) { - size_t total; - mrb_value obj; - - mrb_get_args(mrb, "o", &obj); + mrb_value obj = mrb_get_arg1(mrb); - total = os_memsize_of_object(mrb, obj); + size_t total = os_memsize_of_object(mrb, obj); return mrb_fixnum_value((mrb_int)total); } @@ -208,7 +224,7 @@ struct os_memsize_of_all_cb_data { static int os_memsize_of_all_cb(mrb_state *mrb, struct RBasic *obj, void *d) { - struct os_memsize_of_all_cb_data *data = (struct os_memsize_of_all_cb_data *)d; + struct os_memsize_of_all_cb_data *data = (struct os_memsize_of_all_cb_data*)d; switch (obj->tt) { case MRB_TT_FREE: case MRB_TT_ENV: case MRB_TT_BREAK: case MRB_TT_ICLASS: diff --git a/mrbgems/mruby-pack/README.md b/mrbgems/mruby-pack/README.md index 712cf8e1c7..9ecd2e0805 100644 --- a/mrbgems/mruby-pack/README.md +++ b/mrbgems/mruby-pack/README.md @@ -1,47 +1,120 @@ -# mruby-pack (pack / unpack) +# mruby-pack -mruby-pack provides `Array#pack` and `String#unpack` for mruby. +The `mruby-pack` mrbgem enhances mruby by providing `Array#pack` and `String#unpack` methods. These methods facilitate the conversion between Ruby data types (specifically arrays and strings) and binary data representations, offering a powerful way to handle binary data. ## Installation Add the line below into your build configuration: -``` - conf.gem :core => 'mruby-pack' +```ruby +conf.gem :core => 'mruby-pack' ``` There is no dependency on other mrbgems. -## Supported template string - -- A : arbitrary binary string (space padded, count is width) -- a : arbitrary binary string (null padded, count is width) -- C : 8-bit unsigned (unsigned char) -- c : 8-bit signed (signed char) -- D, d: 64-bit float, native format -- E : 64-bit float, little endian byte order -- e : 32-bit float, little endian byte order -- F, f: 32-bit float, native format -- G : 64-bit float, network (big-endian) byte order -- g : 32-bit float, network (big-endian) byte order -- H : hex string (high nibble first) -- h : hex string (low nibble first) -- I : unsigned integer, native endian (`unsigned int` in C) -- i : signed integer, native endian (`int` in C) -- L : 32-bit unsigned, native endian (`uint32_t`) -- l : 32-bit signed, native endian (`int32_t`) -- m : base64 encoded string (see RFC 2045, count is width) -- N : 32-bit unsigned, network (big-endian) byte order -- n : 16-bit unsigned, network (big-endian) byte order -- Q : 64-bit unsigned, native endian (`uint64_t`) -- q : 64-bit signed, native endian (`int64_t`) -- S : 16-bit unsigned, native endian (`uint16_t`) -- s : 16-bit signed, native endian (`int16_t`) -- U : UTF-8 character -- V : 32-bit unsigned, VAX (little-endian) byte order -- v : 16-bit unsigned, VAX (little-endian) byte order -- x : null byte -- Z : same as "a", except that null is added with * +## `Array#pack` + +The `Array#pack` method converts an array into a binary string. It takes a template string as an argument, which dictates how each element in the array should be converted and packed into the resulting binary string. The template string consists of directives, where each directive specifies the type and format of the data to be packed. + +**Example:** + +```ruby +arr = [65, 66, 67] +binary_string = arr.pack("ccc") # Packs three 8-bit signed integers +# binary_string will be "ABC" + +arr2 = [72, 101, 108, 108, 111] +binary_string2 = arr2.pack("C*") # Packs all elements as 8-bit unsigned integers +# binary_string2 will be "Hello" + +arr3 = [0x1234, 0x5678] +binary_string3 = arr3.pack("S>2") # Packs two 16-bit unsigned integers, big-endian +# binary_string3 will be "\x12\x34\x56\x78" (depending on native endianness if S is used without > or <) +# Using "n2" would be more explicit for network (big-endian) byte order: +# binary_string3 = arr3.pack("n2") # Packs two 16-bit unsigned, network (big-endian) byte order +# binary_string3 will be "\x12\x34\x56\x78" +``` + +## `String#unpack` + +The `String#unpack` method performs the reverse operation of `Array#pack`. It takes a binary string and a template string as input. It extracts data from the binary string according to the directives in the template and converts it into an array of Ruby objects. + +**Example:** + +```ruby +binary_data = "test" +arr = binary_data.unpack("aaaa") +# arr will be ["t", "e", "s", "t"] + +binary_data2 = "\x01\x02\x03" +arr2 = binary_data2.unpack("C3") # Unpacks three 8-bit unsigned integers +# arr2 will be [1, 2, 3] + +binary_data3 = "Hello\x00World" +arr3 = binary_data3.unpack("Z*Z*") # Unpacks two null-terminated strings +# arr3 will be ["Hello", "World"] + +# BER (Basic Encoding Rules) example +arr4 = [127, 128, 16383] +binary_data4 = arr4.pack("w*") # BER-encodes integers of varying sizes +# binary_data4 will contain BER-compressed data +arr4_unpacked = binary_data4.unpack("w*") # Decodes BER data back to integers +# arr4_unpacked will be [127, 128, 16383] +``` + +## Supported Template Directives + +The template string is a sequence of characters that specify the type and format of the data to be packed or unpacked. Each directive can be followed by a count (e.g., "C4" for four 8-bit unsigned integers) or `*` to consume all remaining items/bytes. + +Here is a list of supported template characters and their meanings: + +| Directive | Description | +| --------- | ------------------------------------------------------------------------------------------------------- | +| `A` | Arbitrary binary string (space padded, count is width) | +| `a` | Arbitrary binary string (null padded, count is width) | +| `B` | Bit string (descending bit order) | +| `b` | Bit string (ascending bit order) | +| `C` | 8-bit unsigned integer (unsigned char) | +| `c` | 8-bit signed integer (signed char) | +| `D`, `d` | Double-precision float, native format (64-bit) | +| `E` | Double-precision float, little-endian (64-bit) | +| `e` | Single-precision float, little-endian (32-bit) | +| `F`, `f` | Single-precision float, native format (32-bit) | +| `G` | Double-precision float, network (big-endian) byte order (64-bit) | +| `g` | Single-precision float, network (big-endian) byte order (32-bit) | +| `H` | Hex string (high nibble first) | +| `h` | Hex string (low nibble first) | +| `I` | Unsigned integer, native endian (`unsigned int` in C) | +| `i` | Signed integer, native endian (`int` in C) | +| `J` | Unsigned integer, native endian (`uintptr_t` in C) | +| `j` | Signed integer, native endian (`intptr_t` in C) | +| `L` | 32-bit unsigned integer, native endian (`uint32_t`) | +| `l` | 32-bit signed integer, native endian (`int32_t`) | +| `m` | Base64 encoded string (see RFC 2045, count is input bytes for pack, output chars for unpack) | +| `N` | 32-bit unsigned integer, network (big-endian) byte order | +| `n` | 16-bit unsigned integer, network (big-endian) byte order | +| `Q` | 64-bit unsigned integer, native endian (`uint64_t`) | +| `q` | 64-bit signed integer, native endian (`int64_t`) | +| `S` | 16-bit unsigned integer, native endian (`uint16_t`) (Use `S>` for big-endian, `S<` for little-endian) | +| `s` | 16-bit signed integer, native endian (`int16_t`) (Use `s>` for big-endian, `s<` for little-endian) | +| `u` | UU-encoded string (Unix-to-Unix encoding) | +| `U` | UTF-8 character | +| `V` | 32-bit unsigned integer, VAX (little-endian) byte order | +| `v` | 16-bit unsigned integer, VAX (little-endian) byte order | +| `w` | BER-compressed integer (variable length encoding) | +| `x` | Null byte (skip forward one byte) | +| `X` | Back up one byte | +| `Z` | Null-terminated string (when unpacking, reads until NULL; when packing, appends a NULL if count is `*`) | +| `@` | Moves to absolute position (offset from the beginning of the string) | + +**Modifiers:** + +Some directives can be followed by modifiers that affect their behavior: + +- `>`: Big-endian (for `S`, `s`, `L`, `l`, `Q`, `q`, etc., when native order is not desired). +- `<`: Little-endian (for `S`, `s`, `L`, `l`, `Q`, `q`, etc., when native order is not desired). + +These modifiers are typically used with integer and float types to specify byte order explicitly. For network byte order (big-endian), directives like `n` (16-bit) and `N` (32-bit) are commonly used. ## License diff --git a/mrbgems/mruby-pack/src/pack.c b/mrbgems/mruby-pack/src/pack.c index eb00c41b23..5f466924d5 100644 --- a/mrbgems/mruby-pack/src/pack.c +++ b/mrbgems/mruby-pack/src/pack.c @@ -3,16 +3,16 @@ */ #include -#include "mruby/error.h" -#include "mruby/array.h" -#include "mruby/class.h" -#include "mruby/numeric.h" -#include "mruby/string.h" -#include "mruby/variable.h" -#include "mruby/endian.h" +#include +#include +#include +#include +#include +#include +#include +#include #include -#include #include #define INT_OVERFLOW_P(n) ((n) < MRB_INT_MIN || (n) > MRB_INT_MAX) @@ -36,17 +36,18 @@ enum pack_dir { //PACK_DIR_VAX, PACK_DIR_BER, /* w */ PACK_DIR_UTF8, /* U */ - //PACK_DIR_BER, PACK_DIR_DOUBLE, /* E */ PACK_DIR_FLOAT, /* f */ PACK_DIR_STR, /* A */ PACK_DIR_HEX, /* h */ + PACK_DIR_BSTR, /* b */ PACK_DIR_BASE64, /* m */ + PACK_DIR_UU, /* u */ PACK_DIR_QENC, /* M */ PACK_DIR_NUL, /* x */ PACK_DIR_BACK, /* X */ PACK_DIR_ABS, /* @ */ - PACK_DIR_INVALID + PACK_DIR_NONE, /* - */ }; enum pack_type { @@ -69,30 +70,283 @@ enum pack_type { #define PACK_BASE64_IGNORE 0xff #define PACK_BASE64_PADDING 0xfe +#define IGN PACK_BASE64_IGNORE +#define PAD PACK_BASE64_PADDING + +static const unsigned char base64chars[64] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', + 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', + 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', +}; +static const unsigned char base64_dec_tab[128] = { + IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, + IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, + IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, IGN, 62, IGN, IGN, IGN, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, IGN, IGN, IGN, PAD, IGN, IGN, + IGN, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, IGN, IGN, IGN, IGN, IGN, + IGN, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, IGN, IGN, IGN, IGN, IGN, +}; + +/* lookup table for hex character to integer conversion */ +static const signed char hex_lookup[256] = { + /* 0x00-0x0F */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0x10-0x1F */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0x20-0x2F */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0x30-0x3F */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + /* 0x40-0x4F */ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0x50-0x5F */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0x60-0x6F */ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0x70-0x7F */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0x80-0x8F */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0x90-0x9F */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0xA0-0xAF */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0xB0-0xBF */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0xC0-0xCF */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0xD0-0xDF */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0xE0-0xEF */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 0xF0-0xFF */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +}; +/* byte index arrays for endianness optimization */ +static const int be_idx16[2] = {1, 0}; /* big-endian 16-bit: MSB, LSB */ +static const int le_idx16[2] = {0, 1}; /* little-endian 16-bit: LSB, MSB */ +static const int be_idx32[4] = {3, 2, 1, 0}; /* big-endian 32-bit: MSB...LSB */ +static const int le_idx32[4] = {0, 1, 2, 3}; /* little-endian 32-bit: LSB...MSB */ +static const int be_idx64[8] = {7, 6, 5, 4, 3, 2, 1, 0}; /* big-endian 64-bit: MSB...LSB */ +static const int le_idx64[8] = {0, 1, 2, 3, 4, 5, 6, 7}; /* little-endian 64-bit: LSB...MSB */ +/* lookup tables for binary string optimization */ +/* Convert character to bit value (0 or 1) */ +static inline uint8_t char_to_bit(unsigned char c) { + switch (c) { + case '0': return 0; + case '1': return 1; + default: return 0; + } +} + +static const char bit_to_char[2] = {'0', '1'}; + +/* 8-bit batch processing functions for binary strings */ +static inline uint8_t +pack_8_bits_msb(const char *src) +{ + uint8_t result = 0; + result |= char_to_bit((uint8_t)src[0]) << 7; + result |= char_to_bit((uint8_t)src[1]) << 6; + result |= char_to_bit((uint8_t)src[2]) << 5; + result |= char_to_bit((uint8_t)src[3]) << 4; + result |= char_to_bit((uint8_t)src[4]) << 3; + result |= char_to_bit((uint8_t)src[5]) << 2; + result |= char_to_bit((uint8_t)src[6]) << 1; + result |= char_to_bit((uint8_t)src[7]); + return result; +} + +static inline uint8_t +pack_8_bits_lsb(const char *src) +{ + uint8_t result = 0; + result |= char_to_bit((uint8_t)src[0]); + result |= char_to_bit((uint8_t)src[1]) << 1; + result |= char_to_bit((uint8_t)src[2]) << 2; + result |= char_to_bit((uint8_t)src[3]) << 3; + result |= char_to_bit((uint8_t)src[4]) << 4; + result |= char_to_bit((uint8_t)src[5]) << 5; + result |= char_to_bit((uint8_t)src[6]) << 6; + result |= char_to_bit((uint8_t)src[7]) << 7; + return result; +} + +static inline void +unpack_8_bits_msb(uint8_t byte, char *dst) +{ + dst[0] = bit_to_char[(byte >> 7) & 1]; + dst[1] = bit_to_char[(byte >> 6) & 1]; + dst[2] = bit_to_char[(byte >> 5) & 1]; + dst[3] = bit_to_char[(byte >> 4) & 1]; + dst[4] = bit_to_char[(byte >> 3) & 1]; + dst[5] = bit_to_char[(byte >> 2) & 1]; + dst[6] = bit_to_char[(byte >> 1) & 1]; + dst[7] = bit_to_char[byte & 1]; +} + +static inline void +unpack_8_bits_lsb(uint8_t byte, char *dst) +{ + dst[0] = bit_to_char[byte & 1]; + dst[1] = bit_to_char[(byte >> 1) & 1]; + dst[2] = bit_to_char[(byte >> 2) & 1]; + dst[3] = bit_to_char[(byte >> 3) & 1]; + dst[4] = bit_to_char[(byte >> 4) & 1]; + dst[5] = bit_to_char[(byte >> 5) & 1]; + dst[6] = bit_to_char[(byte >> 6) & 1]; + dst[7] = bit_to_char[(byte >> 7) & 1]; +} + +/* character classification for string format optimization */ +#define CHAR_NULL 0x01 +#define CHAR_SPACE 0x02 + +/* Character classification function */ +static inline uint8_t char_class(unsigned char c) { + switch (c) { + case '\0': return CHAR_NULL; + case ' ': + case '\t': + case '\n': + case '\v': + case '\f': + case '\r': return CHAR_SPACE; + default: return 0; + } +} +/* UTF-8 optimization lookup tables */ +/* UTF-8 sequence length lookup table for non-ASCII bytes (0x80-0xFF) */ +/* Index = byte_value - 0x80, so table[0] = info for byte 0x80 */ +static const uint8_t utf8_seq_len_high[128] = { + /* 0x80-0xBF: Invalid start bytes (continuation bytes) - return 0 for error */ + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + + /* 0xC0-0xDF: 2-byte sequences */ + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, + + /* 0xE0-0xEF: 3-byte sequences */ + 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, + + /* 0xF0-0xF7: 4-byte sequences */ + 4,4,4,4,4,4,4,4, + + /* 0xF8-0xFF: Invalid start bytes - return 0 for error */ + 0,0,0,0,0,0,0,0 +}; + +/* Fast validation for UTF-8 continuation bytes (0x80-0xBF) */ +/* Index = byte_value - 0x80 */ +static const uint8_t utf8_is_continuation[128] = { + /* 0x80-0xBF: Valid continuation bytes */ + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + + /* 0xC0-0xFF: Invalid continuation bytes */ + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0 +}; + +/* Helper macro for continuation byte validation */ +#define IS_UTF8_CONTINUATION(byte) \ + ((byte) >= 0x80 && utf8_is_continuation[(byte) - 0x80]) + +/* Quoted-printable optimization lookup table */ +/* 0=literal, 1=encode, 2=special (newline) */ +static const uint8_t qprint_encode_type[256] = { + /* 0x00-0x1F: Control characters - encode */ + 1,1,1,1,1,1,1,1, 1,0,2,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + /* \t \n */ + + /* 0x20-0x3F: Printable ASCII */ + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,1,0,0, + /* = */ + + /* 0x40-0x5F: Printable ASCII */ + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + + /* 0x60-0x7F: Printable ASCII */ + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1, + /* DEL*/ + + /* 0x80-0xFF: High-bit characters - encode */ + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1 +}; + +/* UU-encoding optimization lookup tables */ +/* UU-encoding uses 6-bit values mapped to ASCII 32-95, but space (32) -> backtick (96) */ +static const char uu_encode_table[64] = { + '`', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', + '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_' +}; + +/* UU-decoding lookup table for ASCII 32-127 */ +static const int8_t uu_decode_table[96] = { + /* ASCII 32-47: ` ! " # $ % & ' ( ) * + , - . / */ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + /* ASCII 48-63: 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + /* ASCII 64-79: @ A B C D E F G H I J K L M N O */ + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + /* ASCII 80-95: P Q R S T U V W X Y Z [ \ ] ^ _ */ + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + /* ASCII 96-127: ` a b c... (invalid for UU, but ` = 0 for decoding) */ + 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +}; + +/* template parsing optimization structures */ +typedef struct { + enum pack_dir dir; + enum pack_type type; + int size; + unsigned int base_flags; +} format_info_t; + +/* Format character lookup function */ +static inline format_info_t pack_format_info(unsigned char c) { + switch (c) { + case 'A': return (format_info_t){PACK_DIR_STR, PACK_TYPE_STRING, 0, PACK_FLAG_WIDTH | PACK_FLAG_COUNT2}; + case 'a': return (format_info_t){PACK_DIR_STR, PACK_TYPE_STRING, 0, PACK_FLAG_WIDTH | PACK_FLAG_COUNT2 | PACK_FLAG_a}; + case 'B': return (format_info_t){PACK_DIR_BSTR, PACK_TYPE_STRING, 0, PACK_FLAG_COUNT2}; + case 'b': return (format_info_t){PACK_DIR_BSTR, PACK_TYPE_STRING, 0, PACK_FLAG_COUNT2 | PACK_FLAG_LSB}; + case 'C': return (format_info_t){PACK_DIR_CHAR, PACK_TYPE_INTEGER, 1, 0}; + case 'c': return (format_info_t){PACK_DIR_CHAR, PACK_TYPE_INTEGER, 1, PACK_FLAG_SIGNED}; + case 'D': return (format_info_t){PACK_DIR_DOUBLE, PACK_TYPE_FLOAT, 8, PACK_FLAG_SIGNED}; + case 'd': return (format_info_t){PACK_DIR_DOUBLE, PACK_TYPE_FLOAT, 8, PACK_FLAG_SIGNED}; + case 'E': return (format_info_t){PACK_DIR_DOUBLE, PACK_TYPE_FLOAT, 8, PACK_FLAG_SIGNED | PACK_FLAG_LT}; + case 'e': return (format_info_t){PACK_DIR_FLOAT, PACK_TYPE_FLOAT, 4, PACK_FLAG_SIGNED | PACK_FLAG_LT}; + case 'F': return (format_info_t){PACK_DIR_FLOAT, PACK_TYPE_FLOAT, 4, PACK_FLAG_SIGNED}; + case 'f': return (format_info_t){PACK_DIR_FLOAT, PACK_TYPE_FLOAT, 4, PACK_FLAG_SIGNED}; + case 'G': return (format_info_t){PACK_DIR_DOUBLE, PACK_TYPE_FLOAT, 8, PACK_FLAG_SIGNED | PACK_FLAG_GT}; + case 'g': return (format_info_t){PACK_DIR_FLOAT, PACK_TYPE_FLOAT, 4, PACK_FLAG_SIGNED | PACK_FLAG_GT}; + case 'H': return (format_info_t){PACK_DIR_HEX, PACK_TYPE_STRING, 0, PACK_FLAG_COUNT2}; + case 'h': return (format_info_t){PACK_DIR_HEX, PACK_TYPE_STRING, 0, PACK_FLAG_COUNT2 | PACK_FLAG_LSB}; + /* I, i, J, j are handled specially based on sizeof() */ + case 'L': return (format_info_t){PACK_DIR_LONG, PACK_TYPE_INTEGER, 4, 0}; + case 'l': return (format_info_t){PACK_DIR_LONG, PACK_TYPE_INTEGER, 4, PACK_FLAG_SIGNED}; + case 'M': return (format_info_t){PACK_DIR_QENC, PACK_TYPE_STRING, 0, PACK_FLAG_WIDTH | PACK_FLAG_COUNT2}; + case 'm': return (format_info_t){PACK_DIR_BASE64, PACK_TYPE_STRING, 0, PACK_FLAG_WIDTH | PACK_FLAG_COUNT2}; + case 'N': return (format_info_t){PACK_DIR_LONG, PACK_TYPE_INTEGER, 4, PACK_FLAG_GT}; + case 'n': return (format_info_t){PACK_DIR_SHORT, PACK_TYPE_INTEGER, 2, PACK_FLAG_GT}; + case 'Q': return (format_info_t){PACK_DIR_QUAD, PACK_TYPE_INTEGER, 8, 0}; + case 'q': return (format_info_t){PACK_DIR_QUAD, PACK_TYPE_INTEGER, 8, PACK_FLAG_SIGNED}; + case 'S': return (format_info_t){PACK_DIR_SHORT, PACK_TYPE_INTEGER, 2, 0}; + case 's': return (format_info_t){PACK_DIR_SHORT, PACK_TYPE_INTEGER, 2, PACK_FLAG_SIGNED}; + case 'u': return (format_info_t){PACK_DIR_UU, PACK_TYPE_STRING, 0, PACK_FLAG_WIDTH | PACK_FLAG_COUNT2}; + case 'U': return (format_info_t){PACK_DIR_UTF8, PACK_TYPE_INTEGER, 0, 0}; + case 'V': return (format_info_t){PACK_DIR_LONG, PACK_TYPE_INTEGER, 4, PACK_FLAG_LT}; + case 'v': return (format_info_t){PACK_DIR_SHORT, PACK_TYPE_INTEGER, 2, PACK_FLAG_LT}; + case 'w': return (format_info_t){PACK_DIR_BER, PACK_TYPE_INTEGER, 0, PACK_FLAG_SIGNED}; + case 'x': return (format_info_t){PACK_DIR_NUL, PACK_TYPE_NONE, 0, 0}; + case 'X': return (format_info_t){PACK_DIR_BACK, PACK_TYPE_NONE, 0, 0}; + case '@': return (format_info_t){PACK_DIR_ABS, PACK_TYPE_NONE, 0, 0}; + case 'Z': return (format_info_t){PACK_DIR_STR, PACK_TYPE_STRING, 0, PACK_FLAG_WIDTH | PACK_FLAG_COUNT2 | PACK_FLAG_Z}; + default: return (format_info_t){PACK_DIR_NONE, PACK_TYPE_NONE, 0, 0}; + } +} -const static unsigned char base64chars[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; -const static unsigned char base64_dec_tab[] = - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x3e\xff\xff\xff\x3f" - "\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\xff\xff\xff\xfe\xff\xff" - "\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e" - "\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xff\xff\xff\xff\xff" - "\xff\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28" - "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\xff\xff\xff\xff\xff"; + +#define IS_PADDING_CHAR_A(c) (char_class((unsigned char)(c)) & (CHAR_NULL | CHAR_SPACE)) +#define IS_PADDING_CHAR_Z(c) (char_class((unsigned char)(c)) & CHAR_NULL) static int hex2int(unsigned char ch) { - if (ch >= '0' && ch <= '9') - return ch - '0'; - else if (ch >= 'A' && ch <= 'F') - return 10 + (ch - 'A'); - else if (ch >= 'a' && ch <= 'f') - return 10 + (ch - 'a'); - else - return -1; + return hex_lookup[ch]; } static mrb_value @@ -121,42 +375,37 @@ pack_char(mrb_state *mrb, mrb_value o, mrb_value str, mrb_int sidx, unsigned int } static int -unpack_char(mrb_state *mrb, const void *src, int srclen, mrb_value ary, unsigned int flags) +unpack_char(mrb_state *mrb, const void *src, mrb_int srclen, mrb_value ary, unsigned int flags) { if (flags & PACK_FLAG_SIGNED) - mrb_ary_push(mrb, ary, mrb_fixnum_value(*(signed char *)src)); + mrb_ary_push(mrb, ary, mrb_fixnum_value(*(signed char*)src)); else - mrb_ary_push(mrb, ary, mrb_fixnum_value(*(unsigned char *)src)); + mrb_ary_push(mrb, ary, mrb_fixnum_value(*(unsigned char*)src)); return 1; } static int pack_short(mrb_state *mrb, mrb_value o, mrb_value str, mrb_int sidx, unsigned int flags) { - uint16_t n; - str = str_len_ensure(mrb, str, sidx + 2); - n = (uint16_t)mrb_integer(o); - if (flags & PACK_FLAG_LITTLEENDIAN) { - RSTRING_PTR(str)[sidx+0] = n % 256; - RSTRING_PTR(str)[sidx+1] = n / 256; - } else { - RSTRING_PTR(str)[sidx+0] = n / 256; - RSTRING_PTR(str)[sidx+1] = n % 256; - } + uint16_t n = (uint16_t)mrb_integer(o); + char *dptr = RSTRING_PTR(str) + sidx; + + /* use lookup tables to eliminate branching */ + const int *idx = (flags & PACK_FLAG_LITTLEENDIAN) ? le_idx16 : be_idx16; + dptr[idx[0]] = (char)(n & 0xff); + dptr[idx[1]] = (char)(n >> 8); + return 2; } static int -unpack_short(mrb_state *mrb, const unsigned char *src, int srclen, mrb_value ary, unsigned int flags) +unpack_short(mrb_state *mrb, const unsigned char *src, mrb_int srclen, mrb_value ary, unsigned int flags) { - int n; + /* use lookup tables to eliminate branching */ + const int *idx = (flags & PACK_FLAG_LITTLEENDIAN) ? le_idx16 : be_idx16; + int n = (src[idx[1]] << 8) | src[idx[0]]; - if (flags & PACK_FLAG_LITTLEENDIAN) { - n = src[1] * 256 + src[0]; - } else { - n = src[0] * 256 + src[1]; - } if ((flags & PACK_FLAG_SIGNED) && (n >= 0x8000)) { n -= 0x10000; } @@ -167,21 +416,17 @@ unpack_short(mrb_state *mrb, const unsigned char *src, int srclen, mrb_value ary static int pack_long(mrb_state *mrb, mrb_value o, mrb_value str, mrb_int sidx, unsigned int flags) { - uint32_t n; - str = str_len_ensure(mrb, str, sidx + 4); - n = (uint32_t)mrb_integer(o); - if (flags & PACK_FLAG_LITTLEENDIAN) { - RSTRING_PTR(str)[sidx+0] = (char)(n & 0xff); - RSTRING_PTR(str)[sidx+1] = (char)(n >> 8); - RSTRING_PTR(str)[sidx+2] = (char)(n >> 16); - RSTRING_PTR(str)[sidx+3] = (char)(n >> 24); - } else { - RSTRING_PTR(str)[sidx+0] = (char)(n >> 24); - RSTRING_PTR(str)[sidx+1] = (char)(n >> 16); - RSTRING_PTR(str)[sidx+2] = (char)(n >> 8); - RSTRING_PTR(str)[sidx+3] = (char)(n & 0xff); - } + uint32_t n = (uint32_t)mrb_integer(o); + char *dptr = RSTRING_PTR(str) + sidx; + + /* use lookup tables to eliminate branching */ + const int *idx = (flags & PACK_FLAG_LITTLEENDIAN) ? le_idx32 : be_idx32; + dptr[idx[0]] = (char)(n & 0xff); + dptr[idx[1]] = (char)(n >> 8); + dptr[idx[2]] = (char)(n >> 16); + dptr[idx[3]] = (char)(n >> 24); + return 4; } @@ -197,18 +442,18 @@ u32tostr(char *buf, size_t len, uint32_t n) return; } - *p -- = '\0'; - len --; + *p-- = '\0'; + len--; if (n > 0) { for (; len > 0 && n > 0; len --, n /= 10) { *p -- = '0' + (n % 10); } - p ++; + p++; } else if (len > 0) { *p = '0'; - len --; + len--; } memmove(buf, p, bufend - p); @@ -219,7 +464,7 @@ u32tostr(char *buf, size_t len, uint32_t n) #endif /* MRB_INT64 */ static int -unpack_long(mrb_state *mrb, const unsigned char *src, int srclen, mrb_value ary, unsigned int flags) +unpack_long(mrb_state *mrb, const unsigned char *src, mrb_int srclen, mrb_value ary, unsigned int flags) { #ifndef MRB_INT64 char msg[60]; @@ -227,20 +472,17 @@ unpack_long(mrb_state *mrb, const unsigned char *src, int srclen, mrb_value ary, uint32_t ul; mrb_int n; - if (flags & PACK_FLAG_LITTLEENDIAN) { - ul = (uint32_t)src[3] * 256*256*256; - ul += (uint32_t)src[2] *256*256; - ul += (uint32_t)src[1] *256; - ul += (uint32_t)src[0]; - } else { - ul = (uint32_t)src[0] * 256*256*256; - ul += (uint32_t)src[1] *256*256; - ul += (uint32_t)src[2] *256; - ul += (uint32_t)src[3]; - } + /* use lookup tables to eliminate branching */ + const int *idx = (flags & PACK_FLAG_LITTLEENDIAN) ? le_idx32 : be_idx32; + ul = ((uint32_t)src[idx[3]] << 24) | + ((uint32_t)src[idx[2]] << 16) | + ((uint32_t)src[idx[1]] << 8) | + (uint32_t)src[idx[0]]; + if (flags & PACK_FLAG_SIGNED) { n = (int32_t)ul; - } else { + } + else { #ifndef MRB_INT64 if (UINT_OVERFLOW_P(ul)) { u32tostr(msg, sizeof(msg), ul); @@ -256,29 +498,21 @@ unpack_long(mrb_state *mrb, const unsigned char *src, int srclen, mrb_value ary, static int pack_quad(mrb_state *mrb, mrb_value o, mrb_value str, mrb_int sidx, unsigned int flags) { - uint64_t n; - str = str_len_ensure(mrb, str, sidx + 8); - n = (uint64_t)mrb_integer(o); - if (flags & PACK_FLAG_LITTLEENDIAN) { - RSTRING_PTR(str)[sidx+0] = (char)(n & 0xff); - RSTRING_PTR(str)[sidx+1] = (char)(n >> 8); - RSTRING_PTR(str)[sidx+2] = (char)(n >> 16); - RSTRING_PTR(str)[sidx+3] = (char)(n >> 24); - RSTRING_PTR(str)[sidx+4] = (char)(n >> 32); - RSTRING_PTR(str)[sidx+5] = (char)(n >> 40); - RSTRING_PTR(str)[sidx+6] = (char)(n >> 48); - RSTRING_PTR(str)[sidx+7] = (char)(n >> 56); - } else { - RSTRING_PTR(str)[sidx+0] = (char)(n >> 56); - RSTRING_PTR(str)[sidx+1] = (char)(n >> 48); - RSTRING_PTR(str)[sidx+2] = (char)(n >> 40); - RSTRING_PTR(str)[sidx+3] = (char)(n >> 32); - RSTRING_PTR(str)[sidx+4] = (char)(n >> 24); - RSTRING_PTR(str)[sidx+5] = (char)(n >> 16); - RSTRING_PTR(str)[sidx+6] = (char)(n >> 8); - RSTRING_PTR(str)[sidx+7] = (char)(n & 0xff); - } + uint64_t n = (uint64_t)mrb_integer(o); + char *dptr = RSTRING_PTR(str) + sidx; + + /* use lookup tables to eliminate branching */ + const int *idx = (flags & PACK_FLAG_LITTLEENDIAN) ? le_idx64 : be_idx64; + dptr[idx[0]] = (char)(n & 0xff); + dptr[idx[1]] = (char)(n >> 8); + dptr[idx[2]] = (char)(n >> 16); + dptr[idx[3]] = (char)(n >> 24); + dptr[idx[4]] = (char)(n >> 32); + dptr[idx[5]] = (char)(n >> 40); + dptr[idx[6]] = (char)(n >> 48); + dptr[idx[7]] = (char)(n >> 56); + return 8; } @@ -286,27 +520,24 @@ static void u64tostr(char *buf, size_t len, uint64_t n) { #ifdef MRB_NO_STDIO - char *bufend = buf + len; - char *p = bufend - 1; + mrb_assert(len > 0); - if (len < 1) { + if (n < 10) { + buf[0] = '0' + n; + buf[1] = '\0'; return; } - *p -- = '\0'; - len --; + char *bufend = buf + len; + char *p = bufend - 1; - if (n > 0) { - for (; len > 0 && n > 0; len --, n /= 10) { - *p -- = '0' + (n % 10); - } - p ++; - } - else if (len > 0) { - *p = '0'; - len --; - } + *p-- = '\0'; + len--; + for (; len > 0 && n > 0; len--, n /= 10) { + *p-- = '0' + (n % 10); + } + p++; memmove(buf, p, bufend - p); #else snprintf(buf, len, "%" PRIu64, n); @@ -318,13 +549,11 @@ static void i64tostr(char *buf, size_t len, int64_t n) { #ifdef MRB_NO_STDIO - if (len < 1) { - return; - } + mrb_assert(len > 0); if (n < 0) { - *buf ++ = '-'; - len --; + *buf++ = '-'; + len--; n = -n; } @@ -336,25 +565,22 @@ i64tostr(char *buf, size_t len, int64_t n) #endif /* MRB_INT64 */ static int -unpack_quad(mrb_state *mrb, const unsigned char *src, int srclen, mrb_value ary, unsigned int flags) +unpack_quad(mrb_state *mrb, const unsigned char *src, mrb_int srclen, mrb_value ary, unsigned int flags) { char msg[60]; - uint64_t ull; - int i, pos, step; mrb_int n; - if (flags & PACK_FLAG_LITTLEENDIAN) { - pos = 7; - step = -1; - } else { - pos = 0; - step = 1; - } - ull = 0; - for (i = 0; i < 8; i++) { - ull = ull * 256 + (uint64_t)src[pos]; - pos += step; - } + /* use lookup tables to eliminate branching */ + const int *idx = (flags & PACK_FLAG_LITTLEENDIAN) ? le_idx64 : be_idx64; + uint64_t ull = ((uint64_t)src[idx[7]] << 56) | + ((uint64_t)src[idx[6]] << 48) | + ((uint64_t)src[idx[5]] << 40) | + ((uint64_t)src[idx[4]] << 32) | + ((uint64_t)src[idx[3]] << 24) | + ((uint64_t)src[idx[2]] << 16) | + ((uint64_t)src[idx[1]] << 8) | + (uint64_t)src[idx[0]]; + if (flags & PACK_FLAG_SIGNED) { int64_t sll = ull; #ifndef MRB_INT64 @@ -364,7 +590,8 @@ unpack_quad(mrb_state *mrb, const unsigned char *src, int srclen, mrb_value ary, } #endif n = (mrb_int)sll; - } else { + } + else { if (UINT_OVERFLOW_P(ull)) { u64tostr(msg, sizeof(msg), ull); mrb_raisef(mrb, E_RANGE_ERROR, "cannot unpack to Integer: %s", msg); @@ -379,43 +606,70 @@ static int pack_BER(mrb_state *mrb, mrb_value o, mrb_value str, mrb_int sidx, unsigned int flags) { mrb_int n = mrb_integer(o); - int i; - char *p; if (n < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "can't compress negative numbers"); } - for (i=1; i<(int)sizeof(mrb_int)+1; i++) { - mrb_int mask = ~((1L<<(7*i))-1); + + /* fast path for 1-byte values (0-127) */ + if (n < 128) { + str = str_len_ensure(mrb, str, sidx + 1); + RSTRING_PTR(str)[sidx] = (char)n; + return 1; + } + + /* fast path for 2-byte values (128-16383) */ + if (n < 16384) { + str = str_len_ensure(mrb, str, sidx + 2); + char *p = RSTRING_PTR(str) + sidx; + *p++ = (char)((n >> 7) | 0x80); + *p = (char)(n & 0x7f); + return 2; + } + + /* original algorithm for larger values */ + int i; + for (i = 1; i < (int)sizeof(mrb_int) + 1; i++) { + mrb_int mask = ~((1L << (7 * i)) - 1); if ((n & mask) == 0) break; } + str = str_len_ensure(mrb, str, sidx + i); - p = RSTRING_PTR(str)+sidx; - for (size_t j=i; j>0; p++,j--) { - mrb_int x = (n>>(7*(j-1)))&0x7f; + char *p = RSTRING_PTR(str) + sidx; + + for (size_t j = i; j > 0; p++, j--) { + mrb_int x = (n >> (7 * (j - 1))) & 0x7f; *p = (char)x; if (j > 1) *p |= 0x80; } + return i; } static int -unpack_BER(mrb_state *mrb, const unsigned char *src, int srclen, mrb_value ary, unsigned int flags) +unpack_BER(mrb_state *mrb, const unsigned char *src, mrb_int srclen, mrb_value ary, unsigned int flags) { - int i; mrb_int n = 0; const unsigned char *p = src; const unsigned char *e = p + srclen; if (srclen == 0) return 0; - for (i=1; p (MRB_INT_MAX>>7)) { + + /* calculate maximum safe bytes before potential overflow */ + const int max_safe_bytes = (sizeof(mrb_int) * 8 - 1) / 7; /* conservative estimate */ + + int i; + for (i = 1; p < e; p++, i++) { + /* check overflow before we might exceed safe limits */ + if (i > max_safe_bytes || n > (MRB_INT_MAX >> 7)) { mrb_raise(mrb, E_RANGE_ERROR, "BER unpacking 'w' overflow"); } + n <<= 7; n |= *p & 0x7f; if ((*p & 0x80) == 0) break; } + mrb_ary_push(mrb, ary, mrb_int_value(mrb, n)); return i; } @@ -424,62 +678,50 @@ unpack_BER(mrb_state *mrb, const unsigned char *src, int srclen, mrb_value ary, static int pack_double(mrb_state *mrb, mrb_value o, mrb_value str, mrb_int sidx, unsigned int flags) { - int i; - double d; - uint8_t *buffer = (uint8_t *)&d; - str = str_len_ensure(mrb, str, sidx + 8); - d = mrb_float(o); + union { + double d; + uint64_t u; + } converter; - if (flags & PACK_FLAG_LITTLEENDIAN) { - if (littleendian) { - memcpy(RSTRING_PTR(str) + sidx, buffer, 8); - } - else { - for (i = 0; i < 8; ++i) { - RSTRING_PTR(str)[sidx + i] = buffer[8 - i - 1]; - } - } - } else { - if (littleendian) { - for (i = 0; i < 8; ++i) { - RSTRING_PTR(str)[sidx + i] = buffer[8 - i - 1]; - } - } - else { - memcpy(RSTRING_PTR(str) + sidx, buffer, 8); - } - } + str = str_len_ensure(mrb, str, sidx + 8); + converter.d = mrb_float(o); + char *dptr = RSTRING_PTR(str) + sidx; + + /* Use bit shifts for endian-independent byte extraction (same as pack_quad) */ + uint64_t n = converter.u; + const int *idx = (flags & PACK_FLAG_LITTLEENDIAN) ? le_idx64 : be_idx64; + dptr[idx[0]] = (char)(n & 0xff); + dptr[idx[1]] = (char)((n >> 8) & 0xff); + dptr[idx[2]] = (char)((n >> 16) & 0xff); + dptr[idx[3]] = (char)((n >> 24) & 0xff); + dptr[idx[4]] = (char)((n >> 32) & 0xff); + dptr[idx[5]] = (char)((n >> 40) & 0xff); + dptr[idx[6]] = (char)((n >> 48) & 0xff); + dptr[idx[7]] = (char)((n >> 56) & 0xff); return 8; } static int -unpack_double(mrb_state *mrb, const unsigned char * src, int srclen, mrb_value ary, unsigned int flags) +unpack_double(mrb_state *mrb, const unsigned char * src, mrb_int srclen, mrb_value ary, unsigned int flags) { - int i; - double d; - uint8_t *buffer = (uint8_t *)&d; - - if (flags & PACK_FLAG_LITTLEENDIAN) { - if (littleendian) { - memcpy(buffer, src, 8); - } - else { - for (i = 0; i < 8; ++i) { - buffer[8 - i - 1] = src[i]; - } - } - } else { - if (littleendian) { - for (i = 0; i < 8; ++i) { - buffer[8 - i - 1] = src[i]; - } - } - else { - memcpy(buffer, src, 8); - } - } - mrb_ary_push(mrb, ary, mrb_float_value(mrb, d)); + union { + double d; + uint64_t u; + } converter; + + /* Use bit shifts for endian-independent byte assembly (same as unpack_quad) */ + const int *idx = (flags & PACK_FLAG_LITTLEENDIAN) ? le_idx64 : be_idx64; + converter.u = ((uint64_t)src[idx[7]] << 56) | + ((uint64_t)src[idx[6]] << 48) | + ((uint64_t)src[idx[5]] << 40) | + ((uint64_t)src[idx[4]] << 32) | + ((uint64_t)src[idx[3]] << 24) | + ((uint64_t)src[idx[2]] << 16) | + ((uint64_t)src[idx[1]] << 8) | + ((uint64_t)src[idx[0]]); + + mrb_ary_push(mrb, ary, mrb_float_value(mrb, converter.d)); return 8; } @@ -487,62 +729,42 @@ unpack_double(mrb_state *mrb, const unsigned char * src, int srclen, mrb_value a static int pack_float(mrb_state *mrb, mrb_value o, mrb_value str, mrb_int sidx, unsigned int flags) { - int i; - float f; - uint8_t *buffer = (uint8_t *)&f; + union { + float f; + uint32_t u; + } converter; + str = str_len_ensure(mrb, str, sidx + 4); - f = (float)mrb_float(o); + converter.f = (float)mrb_float(o); + char *dptr = RSTRING_PTR(str) + sidx; - if (flags & PACK_FLAG_LITTLEENDIAN) { - if (littleendian) { - memcpy(RSTRING_PTR(str) + sidx, buffer, 4); - } - else { - for (i = 0; i < 4; ++i) { - RSTRING_PTR(str)[sidx + i] = buffer[4 - i - 1]; - } - } - } else { - if (littleendian) { - for (i = 0; i < 4; ++i) { - RSTRING_PTR(str)[sidx + i] = buffer[4 - i - 1]; - } - } - else { - memcpy(RSTRING_PTR(str) + sidx, buffer, 4); - } - } + /* Use bit shifts for endian-independent byte extraction (same as pack_long) */ + uint32_t n = converter.u; + const int *idx = (flags & PACK_FLAG_LITTLEENDIAN) ? le_idx32 : be_idx32; + dptr[idx[0]] = (char)(n & 0xff); + dptr[idx[1]] = (char)((n >> 8) & 0xff); + dptr[idx[2]] = (char)((n >> 16) & 0xff); + dptr[idx[3]] = (char)((n >> 24) & 0xff); return 4; } static int -unpack_float(mrb_state *mrb, const unsigned char * src, int srclen, mrb_value ary, unsigned int flags) +unpack_float(mrb_state *mrb, const unsigned char * src, mrb_int srclen, mrb_value ary, unsigned int flags) { - int i; - float f; - uint8_t *buffer = (uint8_t *)&f; + union { + float f; + uint32_t u; + } converter; - if (flags & PACK_FLAG_LITTLEENDIAN) { - if (littleendian) { - memcpy(buffer, src, 4); - } - else { - for (i = 0; i < 4; ++i) { - buffer[4 - i - 1] = src[i]; - } - } - } else { - if (littleendian) { - for (i = 0; i < 4; ++i) { - buffer[4 - i - 1] = src[i]; - } - } - else { - memcpy(buffer, src, 4); - } - } - mrb_ary_push(mrb, ary, mrb_float_value(mrb, f)); + /* Use bit shifts for endian-independent byte assembly (same as unpack_long) */ + const int *idx = (flags & PACK_FLAG_LITTLEENDIAN) ? le_idx32 : be_idx32; + converter.u = ((uint32_t)src[idx[3]] << 24) | + ((uint32_t)src[idx[2]] << 16) | + ((uint32_t)src[idx[1]] << 8) | + ((uint32_t)src[idx[0]]); + + mrb_ary_push(mrb, ary, mrb_float_value(mrb, converter.f)); return 4; } @@ -552,36 +774,11 @@ static int pack_utf8(mrb_state *mrb, mrb_value o, mrb_value str, mrb_int sidx, int count, unsigned int flags) { char utf8[4]; - int len = 0; - uint32_t c = 0; - - c = (uint32_t)mrb_integer(o); - - /* Unicode character */ - /* from mruby-compiler gem */ - if (c < 0x80) { - utf8[0] = (char)c; - len = 1; - } - else if (c < 0x800) { - utf8[0] = (char)(0xC0 | (c >> 6)); - utf8[1] = (char)(0x80 | (c & 0x3F)); - len = 2; - } - else if (c < 0x10000) { - utf8[0] = (char)(0xE0 | (c >> 12) ); - utf8[1] = (char)(0x80 | ((c >> 6) & 0x3F)); - utf8[2] = (char)(0x80 | ( c & 0x3F)); - len = 3; - } - else if (c < 0x200000) { - utf8[0] = (char)(0xF0 | (c >> 18) ); - utf8[1] = (char)(0x80 | ((c >> 12) & 0x3F)); - utf8[2] = (char)(0x80 | ((c >> 6) & 0x3F)); - utf8[3] = (char)(0x80 | ( c & 0x3F)); - len = 4; - } - else { + int len; + uint32_t c = (uint32_t)mrb_integer(o); + + len = (int)mrb_utf8_to_buf(utf8, c); + if (len == 0) { mrb_raise(mrb, E_RANGE_ERROR, "pack(U): value out of range"); } @@ -591,90 +788,94 @@ pack_utf8(mrb_state *mrb, mrb_value o, mrb_value str, mrb_int sidx, int count, u return len; } -static const unsigned long utf8_limits[] = { - 0x0, /* 1 */ - 0x80, /* 2 */ - 0x800, /* 3 */ - 0x10000, /* 4 */ - 0x200000, /* 5 */ - 0x4000000, /* 6 */ - 0x80000000, /* 7 */ -}; -static unsigned long -utf8_to_uv(mrb_state *mrb, const char *p, long *lenp) -{ - int c = *p++ & 0xff; - unsigned long uv = c; - long n = 1; - if (!(uv & 0x80)) { - *lenp = 1; - return uv; +static int +unpack_utf8(mrb_state *mrb, const unsigned char * src, mrb_int srclen, mrb_value ary, unsigned int flags) +{ + if (srclen == 0) { + return 1; } - if (!(uv & 0x40)) { - *lenp = 1; - malformed: - mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed UTF-8 character"); + + const unsigned char *p = src; + uint8_t first_byte = *p; + + /* ASCII fast path - most common case */ + if (first_byte < 0x80) { + mrb_ary_push(mrb, ary, mrb_fixnum_value(first_byte)); + return 1; } - if (!(uv & 0x20)) { n = 2; uv &= 0x1f; } - else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; } - else if (!(uv & 0x08)) { n = 4; uv &= 0x07; } - else if (!(uv & 0x04)) { n = 5; uv &= 0x03; } - else if (!(uv & 0x02)) { n = 6; uv &= 0x01; } - else { - *lenp = 1; + /* Multi-byte UTF-8 with optimized lookup table */ + uint8_t seq_len = utf8_seq_len_high[first_byte - 0x80]; + if (seq_len == 0 || seq_len > srclen) { goto malformed; } - if (n > *lenp) { - mrb_raisef(mrb, E_ARGUMENT_ERROR, "malformed UTF-8 character (expected %d bytes, given %d bytes)", - n, *lenp); - } - *lenp = n--; - if (n != 0) { - while (n--) { - c = *p++ & 0xff; - if ((c & 0xc0) != 0x80) { - *lenp -= n + 1; - goto malformed; - } - else { - c &= 0x3f; - uv = uv << 6 | c; - } + + /* Inline 2-byte sequence optimization - common case */ + if (seq_len == 2) { + if (!IS_UTF8_CONTINUATION(p[1])) { + goto malformed; } + uint32_t uv = ((first_byte & 0x1F) << 6) | (p[1] & 0x3F); + /* validate minimum value for 2-byte sequence */ + if (uv >= 0x80) { + mrb_ary_push(mrb, ary, mrb_fixnum_value((mrb_int)uv)); + return 2; + } + goto redundant; } - n = *lenp - 1; - if (uv < utf8_limits[n]) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "redundant UTF-8 sequence"); - } - return uv; -} -static int -unpack_utf8(mrb_state *mrb, const unsigned char * src, int srclen, mrb_value ary, unsigned int flags) -{ - unsigned long uv; - long lenp = srclen; + /* Inline 3-byte sequence optimization */ + if (seq_len == 3) { + if (!IS_UTF8_CONTINUATION(p[1]) || !IS_UTF8_CONTINUATION(p[2])) { + goto malformed; + } + uint32_t uv = ((first_byte & 0x0F) << 12) | + ((p[1] & 0x3F) << 6) | + (p[2] & 0x3F); + /* validate minimum value for 3-byte sequence */ + if (uv >= 0x800) { + mrb_ary_push(mrb, ary, mrb_fixnum_value((mrb_int)uv)); + return 3; + } + goto redundant; + } - if (srclen == 0) { - return 1; + /* 4-byte sequence - less common, use original implementation */ + if (seq_len == 4) { + if (!IS_UTF8_CONTINUATION(p[1]) || !IS_UTF8_CONTINUATION(p[2]) || !IS_UTF8_CONTINUATION(p[3])) { + goto malformed; + } + uint32_t uv = ((first_byte & 0x07) << 18) | + ((p[1] & 0x3F) << 12) | + ((p[2] & 0x3F) << 6) | + (p[3] & 0x3F); + /* validate minimum value and maximum valid Unicode */ + if (uv >= 0x10000 && uv <= 0x10FFFF) { + mrb_ary_push(mrb, ary, mrb_fixnum_value((mrb_int)uv)); + return 4; + } + if (uv < 0x10000) goto redundant; + goto malformed; } - uv = utf8_to_uv(mrb, (const char *)src, &lenp); - mrb_ary_push(mrb, ary, mrb_fixnum_value((mrb_int)uv)); - return (int)lenp; + +malformed: + mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed UTF-8 character"); + +redundant: + mrb_raise(mrb, E_ARGUMENT_ERROR, "redundant UTF-8 sequence"); } static int pack_str(mrb_state *mrb, mrb_value src, mrb_value dst, mrb_int didx, int count, unsigned int flags) { - mrb_int copylen, slen, padlen; - char *dptr, *dptr0, pad, *sptr; - - sptr = RSTRING_PTR(src); - slen = RSTRING_LEN(src); + const char *sptr = RSTRING_PTR(src); + mrb_int slen = RSTRING_LEN(src); + mrb_int copylen, padlen; + char pad; + /* determine padding character based on format */ if ((flags & PACK_FLAG_a) || (flags & PACK_FLAG_Z)) pad = '\0'; else @@ -682,151 +883,266 @@ pack_str(mrb_state *mrb, mrb_value src, mrb_value dst, mrb_int didx, int count, if (count == 0) { return 0; - } else if (count == -1) { + } + else if (count == -1) { copylen = slen; padlen = (flags & PACK_FLAG_Z) ? 1 : 0; - } else if (count < slen) { + } + else if (count < slen) { copylen = count; padlen = 0; - } else { + } + else { copylen = slen; padlen = count - slen; } + /* pre-allocate exact buffer size */ dst = str_len_ensure(mrb, dst, didx + copylen + padlen); - dptr0 = dptr = RSTRING_PTR(dst) + didx; - memcpy(dptr, sptr, copylen); - dptr += copylen; - while (padlen-- > 0) { - *dptr++ = pad; + char *dptr = RSTRING_PTR(dst) + didx; + char *dptr0 = dptr; + + /* copy string data */ + if (copylen > 0) { + memcpy(dptr, sptr, copylen); + dptr += copylen; + } + + /* bulk padding using memset instead of loop */ + if (padlen > 0) { + memset(dptr, pad, padlen); + dptr += padlen; } return (int)(dptr - dptr0); } +#define CHECK_UNPACK_LEN(mrb, slen, ary) do {\ + if ((slen) <= 0) {\ + mrb_ary_push(mrb, ary, mrb_str_new(mrb, 0, 0));\ + return 0;\ + }\ +} while (0) + static int -unpack_str(mrb_state *mrb, const void *src, int slen, mrb_value ary, int count, unsigned int flags) +unpack_str(mrb_state *mrb, const void *src, mrb_int slen, mrb_value ary, int count, unsigned int flags) { - mrb_value dst; - const char *cp, *sptr; + CHECK_UNPACK_LEN(mrb, slen, ary); + + const char *sptr = (const char*)src; int copylen; - sptr = (const char *)src; - if (count != -1 && count < slen) { + if (count != -1 && count < slen) { slen = count; } - copylen = slen; + copylen = (int)slen; - if (slen >= 0 && flags & PACK_FLAG_Z) { /* "Z" */ - if ((cp = (const char *)memchr(sptr, '\0', slen)) != NULL) { + if (slen >= 0 && flags & PACK_FLAG_Z) { /* "Z" format */ + const char *cp = (const char*)memchr(sptr, '\0', slen); + if (cp != NULL) { copylen = (int)(cp - sptr); if (count == -1) { slen = copylen + 1; } } } - else if (!(flags & PACK_FLAG_a)) { /* "A" */ - while (copylen > 0 && (sptr[copylen - 1] == '\0' || ISSPACE(sptr[copylen - 1]))) { + else if (!(flags & PACK_FLAG_a)) { /* "A" format - trim spaces and nulls */ + /* optimized reverse trimming using lookup table */ + while (copylen > 0 && IS_PADDING_CHAR_A(sptr[copylen - 1])) { copylen--; } } + /* "a" format does no trimming */ if (copylen < 0) copylen = 0; - dst = mrb_str_new(mrb, sptr, (mrb_int)copylen); + + mrb_value dst = mrb_str_new(mrb, sptr, (mrb_int)copylen); mrb_ary_push(mrb, ary, dst); - return slen; + return (int)slen; } static int pack_hex(mrb_state *mrb, mrb_value src, mrb_value dst, mrb_int didx, int count, unsigned int flags) { - int a, b; - unsigned int ashift, bshift; - long slen; - char *dptr, *dptr0, *sptr; - - sptr = RSTRING_PTR(src); - slen = (long)RSTRING_LEN(src); + char *sptr = RSTRING_PTR(src); + long slen = (long)RSTRING_LEN(src); + unsigned int ashift, bshift; if (flags & PACK_FLAG_LSB) { ashift = 0; bshift = 4; - } else { + } + else { ashift = 4; bshift = 0; } if (count == -1) { count = slen; - } else if (slen > count) { + } + else if (slen > count) { slen = count; } - dst = str_len_ensure(mrb, dst, didx + count); - dptr = RSTRING_PTR(dst) + didx; - - dptr0 = dptr; - for (; count > 0; count -= 2) { - a = b = 0; - if (slen > 0) { - a = hex2int(*sptr++); - if (a < 0) break; - slen--; - } - if (slen > 0) { - b = hex2int(*sptr++); - if (b < 0) break; - slen--; - } + /* calculate output buffer size needed - one byte per two hex chars */ + /* use count/2 + (count&1) to avoid overflow when count == INT_MAX */ + int output_bytes = count / 2 + (count & 1); + dst = str_len_ensure(mrb, dst, didx + output_bytes); + char *dptr = RSTRING_PTR(dst) + didx; + char *dptr0 = dptr; + + /* process pairs of hex characters */ + while (slen >= 2) { + int a = hex2int((unsigned char)*sptr++); + if (a < 0) break; + int b = hex2int((unsigned char)*sptr++); + if (b < 0) break; + *dptr++ = (a << ashift) + (b << bshift); + slen -= 2; + } + + /* handle odd remaining character */ + if (slen > 0) { + int a = hex2int((unsigned char)*sptr); + if (a >= 0) { + *dptr++ = (a << ashift); + } } return (int)(dptr - dptr0); } static int -unpack_hex(mrb_state *mrb, const void *src, int slen, mrb_value ary, int count, unsigned int flags) +unpack_hex(mrb_state *mrb, const void *src, mrb_int slen, mrb_value ary, int count, unsigned int flags) { - mrb_value dst; - int a, ashift, b, bshift; - const char *sptr, *sptr0; - char *dptr, *dptr0; - const char hexadecimal[] = "0123456789abcdef"; + CHECK_UNPACK_LEN(mrb, slen, ary); + int ashift, bshift; if (flags & PACK_FLAG_LSB) { ashift = 0; bshift = 4; - } else { + } + else { ashift = 4; bshift = 0; } - sptr = (const char *)src; + const char *sptr = (const char*)src; + const char *sptr0 = sptr; if (count == -1) - count = slen * 2; + count = (int)(slen * 2); - dst = mrb_str_new(mrb, NULL, count); - dptr = RSTRING_PTR(dst); + mrb_value dst = mrb_str_new(mrb, NULL, count); + char *dptr = RSTRING_PTR(dst); + char *dptr0 = dptr; - sptr0 = sptr; - dptr0 = dptr; - while (slen > 0 && count > 0) { - a = (*sptr >> ashift) & 0x0f; - b = (*sptr >> bshift) & 0x0f; - sptr++; + const char hexadecimal[] = "0123456789abcdef"; + + /* process full bytes when we need pairs of hex characters */ + while (slen > 0 && count >= 2) { + unsigned char byte = *sptr++; slen--; - *dptr++ = hexadecimal[a]; - count--; + *dptr++ = hexadecimal[(byte >> ashift) & 0x0f]; + *dptr++ = hexadecimal[(byte >> bshift) & 0x0f]; + count -= 2; + } + + /* handle remaining single character if count is odd */ + if (slen > 0 && count > 0) { + unsigned char byte = *sptr++; + *dptr++ = hexadecimal[(byte >> ashift) & 0x0f]; + } - if (count > 0) { - *dptr++ = hexadecimal[b]; - count--; + dst = mrb_str_resize(mrb, dst, (mrb_int)(dptr - dptr0)); + mrb_ary_push(mrb, ary, dst); + return (int)(sptr - sptr0); +} + +static int +pack_bstr(mrb_state *mrb, mrb_value src, mrb_value dst, mrb_int didx, int count, unsigned int flags) +{ + const char *sptr = RSTRING_PTR(src); + int slen = (int)RSTRING_LEN(src); + + if (count == -1) { + count = slen; + } + else if (slen > count) { + slen = count; + } + + /* calculate exact output size: (slen + 7) / 8 */ + int output_bytes = (slen + 7) / 8; + dst = str_len_ensure(mrb, dst, didx + output_bytes); + char *dptr = RSTRING_PTR(dst) + didx; + char *dptr0 = dptr; + + /* select batch processing function based on bit order */ + uint8_t (*pack_func)(const char *) = (flags & PACK_FLAG_LSB) ? pack_8_bits_lsb : pack_8_bits_msb; + + /* process 8 characters at a time */ + int full_bytes = slen / 8; + for (int i = 0; i < full_bytes; i++) { + *dptr++ = (char)pack_func(sptr); + sptr += 8; + } + + /* handle remaining bits (partial byte) */ + int remaining_bits = slen % 8; + if (remaining_bits > 0) { + char temp_chars[8] = {'0', '0', '0', '0', '0', '0', '0', '0'}; + /* copy remaining characters, padding with '0' */ + for (int i = 0; i < remaining_bits; i++) { + temp_chars[i] = sptr[i]; + } + *dptr++ = (char)pack_func(temp_chars); + } + + return (int)(dptr - dptr0); +} + +static int +unpack_bstr(mrb_state *mrb, const void *src, mrb_int slen, mrb_value ary, int count, unsigned int flags) +{ + CHECK_UNPACK_LEN(mrb, slen, ary); + + const char *sptr0 = (const char*)src; + const char *sptr = sptr0; + + if (count == -1 || count > slen * 8) + count = (int)(slen * 8); + + /* pre-allocate exact output size */ + mrb_value dst = mrb_str_new(mrb, NULL, count); + char *dptr = RSTRING_PTR(dst); + char *dptr0 = dptr; + + /* select batch processing function based on bit order */ + void (*unpack_func)(uint8_t, char *) = (flags & PACK_FLAG_LSB) ? unpack_8_bits_lsb : unpack_8_bits_msb; + + /* process 8 bits (1 byte) at a time */ + int full_bytes = count / 8; + for (int i = 0; i < full_bytes; i++) { + unpack_func((uint8_t)*sptr++, dptr); + dptr += 8; + } + + /* handle remaining bits (partial byte) */ + int remaining_bits = count % 8; + if (remaining_bits > 0) { + char temp_chars[8]; + unpack_func((uint8_t)*sptr++, temp_chars); + /* copy only the required number of characters */ + for (int i = 0; i < remaining_bits; i++) { + *dptr++ = temp_chars[i]; } } + /* ensure string is properly sized */ dst = mrb_str_resize(mrb, dst, (mrb_int)(dptr - dptr0)); mrb_ary_push(mrb, ary, dst); return (int)(sptr - sptr0); @@ -835,89 +1151,130 @@ unpack_hex(mrb_state *mrb, const void *src, int slen, mrb_value ary, int count, static int pack_base64(mrb_state *mrb, mrb_value src, mrb_value dst, mrb_int didx, int count) { - mrb_int dstlen; - unsigned long l; - mrb_int column, srclen; - char *srcptr, *dstptr, *dstptr0; - - srcptr = RSTRING_PTR(src); - srclen = RSTRING_LEN(src); + char *srcptr = RSTRING_PTR(src); + mrb_int srclen = RSTRING_LEN(src); - if (srclen == 0) /* easy case */ + if (srclen == 0) { return 0; + } - if (count != 0 && count < 3) { /* -1, 1 or 2 */ + if (count != 0 && count < 3) { count = 45; - } else if (count >= 3) { + } + else if (count >= 3) { count -= count % 3; } - dstlen = (srclen+2) / 3 * 4; + /* precise memory allocation */ + mrb_int dstlen = (srclen + 2) / 3 * 4; if (count > 0) { dstlen += (srclen / count) + ((srclen % count) == 0 ? 0 : 1); } dst = str_len_ensure(mrb, dst, didx + dstlen); - dstptr = RSTRING_PTR(dst) + didx; + char *dstptr = RSTRING_PTR(dst) + didx; + char *dstptr0 = dstptr; - dstptr0 = dstptr; - for (column = 3; srclen >= 3; srclen -= 3, column += 3) { - l = (unsigned char)*srcptr++ << 16; - l += (unsigned char)*srcptr++ << 8; - l += (unsigned char)*srcptr++; + if (count == 0) { + /* fast path: no line wrapping */ + while (srclen >= 3) { + unsigned long l = (unsigned char)*srcptr++ << 16; + l += (unsigned char)*srcptr++ << 8; + l += (unsigned char)*srcptr++; + srclen -= 3; + + *dstptr++ = base64chars[(l >> 18) & 0x3f]; + *dstptr++ = base64chars[(l >> 12) & 0x3f]; + *dstptr++ = base64chars[(l >> 6) & 0x3f]; + *dstptr++ = base64chars[l & 0x3f]; + } + } + else { + /* line wrapping path */ + mrb_int column = 3; + while (srclen >= 3) { + unsigned long l = (unsigned char)*srcptr++ << 16; + l += (unsigned char)*srcptr++ << 8; + l += (unsigned char)*srcptr++; + srclen -= 3; + + *dstptr++ = base64chars[(l >> 18) & 0x3f]; + *dstptr++ = base64chars[(l >> 12) & 0x3f]; + *dstptr++ = base64chars[(l >> 6) & 0x3f]; + *dstptr++ = base64chars[l & 0x3f]; + + if (column == count) { + *dstptr++ = '\n'; + column = 0; + } + column += 3; + } - *dstptr++ = base64chars[(l >> 18) & 0x3f]; - *dstptr++ = base64chars[(l >> 12) & 0x3f]; - *dstptr++ = base64chars[(l >> 6) & 0x3f]; - *dstptr++ = base64chars[ l & 0x3f]; + /* handle remaining 1-2 bytes */ + if (srclen == 1) { + unsigned long l = (unsigned char)*srcptr << 16; + *dstptr++ = base64chars[(l >> 18) & 0x3f]; + *dstptr++ = base64chars[(l >> 12) & 0x3f]; + *dstptr++ = '='; + *dstptr++ = '='; + column += 3; + } + else if (srclen == 2) { + unsigned long l = (unsigned char)*srcptr++ << 16; + l += (unsigned char)*srcptr << 8; + *dstptr++ = base64chars[(l >> 18) & 0x3f]; + *dstptr++ = base64chars[(l >> 12) & 0x3f]; + *dstptr++ = base64chars[(l >> 6) & 0x3f]; + *dstptr++ = '='; + column += 3; + } - if (column == count) { + if (column > 0) { *dstptr++ = '\n'; - column = 0; } + return (int)(dstptr - dstptr0); } + + /* handle remaining 1-2 bytes for fast path */ if (srclen == 1) { - l = (unsigned char)*srcptr++ << 16; + unsigned long l = (unsigned char)*srcptr << 16; *dstptr++ = base64chars[(l >> 18) & 0x3f]; *dstptr++ = base64chars[(l >> 12) & 0x3f]; *dstptr++ = '='; *dstptr++ = '='; - column += 3; - } else if (srclen == 2) { - l = (unsigned char)*srcptr++ << 16; - l += (unsigned char)*srcptr++ << 8; + } + else if (srclen == 2) { + unsigned long l = (unsigned char)*srcptr++ << 16; + l += (unsigned char)*srcptr << 8; *dstptr++ = base64chars[(l >> 18) & 0x3f]; *dstptr++ = base64chars[(l >> 12) & 0x3f]; - *dstptr++ = base64chars[(l >> 6) & 0x3f]; + *dstptr++ = base64chars[(l >> 6) & 0x3f]; *dstptr++ = '='; - column += 3; - } - if (column > 0 && count > 0) { - *dstptr++ = '\n'; } return (int)(dstptr - dstptr0); } static int -unpack_base64(mrb_state *mrb, const void *src, int slen, mrb_value ary) +unpack_base64(mrb_state *mrb, const void *src, mrb_int slen, mrb_value ary) { - mrb_value dst; - int dlen; - unsigned long l; - int i, padding; - unsigned char c, ch[4]; - const char *sptr, *sptr0; - char *dptr, *dptr0; + CHECK_UNPACK_LEN(mrb, slen, ary); - sptr0 = sptr = (const char *)src; + const char *sptr0 = (const char*)src; + const char *sptr = sptr0; - dlen = slen / 4 * 3; /* an estimated value - may be shorter */ - dst = mrb_str_new(mrb, NULL, dlen); - dptr0 = dptr = RSTRING_PTR(dst); + /* estimate buffer size - may be shorter due to padding/whitespace */ + int dlen = (int)(slen / 4 * 3); + mrb_value dst = mrb_str_new(mrb, NULL, dlen); + char *dptr0 = RSTRING_PTR(dst); + char *dptr = dptr0; - padding = 0; + int padding = 0; while (slen >= 4) { - for (i = 0; i < 4; i++) { + unsigned char ch[4]; + + /* collect 4 valid base64 characters */ + for (int i = 0; i < 4; i++) { + unsigned char c; do { if (slen-- == 0) goto done; @@ -932,17 +1289,20 @@ unpack_base64(mrb_state *mrb, const void *src, int slen, mrb_value ary) } while (c >= sizeof(base64_dec_tab) || ch[i] == PACK_BASE64_IGNORE); } - l = (ch[0] << 18) + (ch[1] << 12) + (ch[2] << 6) + ch[3]; + /* decode 4 characters to 3 bytes */ + unsigned long l = (ch[0] << 18) + (ch[1] << 12) + (ch[2] << 6) + ch[3]; if (padding == 0) { *dptr++ = (l >> 16) & 0xff; *dptr++ = (l >> 8) & 0xff; *dptr++ = l & 0xff; - } else if (padding == 1) { + } + else if (padding == 1) { *dptr++ = (l >> 16) & 0xff; *dptr++ = (l >> 8) & 0xff; break; - } else { + } + else { *dptr++ = (l >> 16) & 0xff; break; } @@ -965,36 +1325,57 @@ pack_qenc(mrb_state *mrb, mrb_value src, mrb_value dst, mrb_int didx, int count) int dlen = 0; if (count <= 1) count = 72; + while (s < send) { - if ((*s > 126) || - (*s < 32 && *s != '\n' && *s != '\t') || - (*s == '=')) { - buff[i++] = '='; - buff[i++] = hex_table[(*s & 0xf0) >> 4]; - buff[i++] = hex_table[*s & 0x0f]; - n += 3; - prev = EOF; + unsigned char byte = (unsigned char)*s; + uint8_t encode_type = qprint_encode_type[byte]; + + /* ASCII printable fast path - most common case */ + if (encode_type == 0) { + /* Handle space/tab at line end special case */ + if ((byte == ' ' || byte == '\t') && s + 1 < send && *(s + 1) == '\n') { + /* Space/tab before newline needs encoding */ + buff[i++] = '='; + buff[i++] = hex_table[(byte & 0xf0) >> 4]; + buff[i++] = hex_table[byte & 0x0f]; + n += 3; + prev = EOF; + } + else { + /* Regular printable character - direct copy */ + buff[i++] = byte; + n++; + prev = byte; + } } - else if (*s == '\n') { + /* Newline special handling */ + else if (encode_type == 2) { if (prev == ' ' || prev == '\t') { buff[i++] = '='; - buff[i++] = *s; + buff[i++] = byte; } - buff[i++] = *s; + buff[i++] = byte; n = 0; - prev = *s; + prev = byte; } + /* Character needs encoding */ else { - buff[i++] = *s; - n++; - prev = *s; + buff[i++] = '='; + buff[i++] = hex_table[(byte & 0xf0) >> 4]; + buff[i++] = hex_table[byte & 0x0f]; + n += 3; + prev = EOF; } + + /* Check line length limit */ if (n > count) { buff[i++] = '='; buff[i++] = '\n'; n = 0; prev = '\n'; } + + /* Flush buffer if getting full */ if (i > 1024 - 5) { str_len_ensure(mrb, dst, didx+dlen+i); memcpy(RSTRING_PTR(dst)+didx+dlen, buff, i); @@ -1003,49 +1384,215 @@ pack_qenc(mrb_state *mrb, mrb_value src, mrb_value dst, mrb_int didx, int count) } s++; } + + /* Add final soft line break if needed */ if (n > 0) { buff[i++] = '='; buff[i++] = '\n'; } + + /* Flush remaining buffer */ if (i > 0) { str_len_ensure(mrb, dst, didx+dlen+i); memcpy(RSTRING_PTR(dst)+didx+dlen, buff, i); dlen += i; } + return dlen; } static int -unpack_qenc(mrb_state *mrb, const void *src, int slen, mrb_value ary) +unpack_qenc(mrb_state *mrb, const void *src, mrb_int slen, mrb_value ary) { + CHECK_UNPACK_LEN(mrb, slen, ary); + mrb_value buf = mrb_str_new(mrb, 0, slen); - const char *s = (const char*)src, *ss = s; + const char *s = (const char*)src; const char *send = s + slen; char *ptr = RSTRING_PTR(buf); int c1, c2; while (s < send) { - if (*s == '=') { - if (++s == send) break; - if (s+1 < send && *s == '\r' && *(s+1) == '\n') - s++; - if (*s != '\n') { - if ((c1 = hex2int(*s)) == -1) break; - if (++s == send) break; - if ((c2 = hex2int(*s)) == -1) break; - *ptr++ = (char)(c1 << 4 | c2); - } - } - else { + /* Fast path for non-encoded characters - most common case */ + if (*s != '=') { *ptr++ = *s; + s++; + continue; + } + + /* Handle =XX encoded sequences */ + if (++s == send) break; + + /* Handle soft line breaks: =\r\n or =\n */ + if (s + 1 < send && *s == '\r' && *(s + 1) == '\n') { + s += 2; /* Skip \r\n */ + continue; + } + if (*s == '\n') { + s++; /* Skip \n */ + continue; } + + /* Decode =XX hex sequence */ + if ((c1 = hex2int(*s)) == -1) break; + if (++s == send) break; + if ((c2 = hex2int(*s)) == -1) break; + *ptr++ = (char)(c1 << 4 | c2); s++; - ss = s; } + buf = mrb_str_resize(mrb, buf, (mrb_int)(ptr - RSTRING_PTR(buf))); - mrb_str_cat(mrb, buf, ss, send-ss); mrb_ary_push(mrb, ary, buf); - return slen; + return (int)slen; +} + +static int +pack_uu(mrb_state *mrb, mrb_value src, mrb_value dst, mrb_int didx, int count) +{ + char *s = RSTRING_PTR(src); + int slen = (int)RSTRING_LEN(src); + int lines_written = 0; + int dlen = 0; + + if (count <= 1) count = 45; /* default line length for UU-encoding */ + + /* Calculate buffer size by accounting for per-line encoding + * Each line encodes separately, so padding happens per line, not globally + */ + mrb_int num_lines = (slen + count - 1) / count; /* Number of lines */ + mrb_int total_encoded = 0; + mrb_int temp_slen = slen; + + /* Calculate actual encoded size line by line */ + while (temp_slen > 0) { + mrb_int line_len = (temp_slen > count) ? count : temp_slen; + total_encoded += ((line_len + 2) / 3) * 4; /* Each line's encoded size */ + temp_slen -= line_len; + } + + /* Total buffer = encoded data + (length char + newline) per line + terminating line */ + mrb_int buffer_size = total_encoded + num_lines * 2 + 2; + str_len_ensure(mrb, dst, didx + buffer_size); + char *dptr = RSTRING_PTR(dst) + didx; + + while (slen > 0) { + int line_len = (slen > count) ? count : slen; + + /* Write line length character */ + *dptr++ = uu_encode_table[line_len & 0x3F]; + dlen++; + + int processed = 0; + while (processed < line_len) { + /* Process groups of 3 bytes -> 4 characters */ + uint32_t group = 0; + int bytes_in_group = 0; + + /* Read up to 3 bytes */ + for (int i = 0; i < 3 && processed < line_len; i++) { + group = (group << 8) | (unsigned char)s[processed++]; + bytes_in_group++; + } + + /* Pad incomplete group with zeros */ + group <<= (3 - bytes_in_group) * 8; + + /* Extract 4 groups of 6 bits and encode */ + *dptr++ = uu_encode_table[(group >> 18) & 0x3F]; + *dptr++ = uu_encode_table[(group >> 12) & 0x3F]; + *dptr++ = uu_encode_table[(group >> 6) & 0x3F]; + *dptr++ = uu_encode_table[group & 0x3F]; + dlen += 4; + } + + /* Add newline */ + *dptr++ = '\n'; + dlen++; + + s += line_len; + slen -= line_len; + lines_written++; + } + + /* Add terminating line if data was processed */ + if (lines_written > 0) { + *dptr++ = uu_encode_table[0]; /* length 0 */ + *dptr++ = '\n'; + dlen += 2; + } + + return dlen; +} + +static int +unpack_uu(mrb_state *mrb, const void *src, mrb_int slen, mrb_value ary) +{ + const unsigned char *s = (const unsigned char*)src; + const unsigned char *send = s + slen; + mrb_value result = mrb_str_new(mrb, 0, slen * 3 / 4); /* estimate result size */ + char *dptr = RSTRING_PTR(result); + char *dptr_start = dptr; + + while (s < send) { + /* Skip empty lines and whitespace */ + while (s < send && (*s == '\n' || *s == '\r' || *s == ' ' || *s == '\t')) { + s++; + } + if (s >= send) break; + + /* Read line length */ + int line_len = 0; + if (*s >= 32 && *s < 128 && uu_decode_table[*s - 32] >= 0) { + line_len = uu_decode_table[*s - 32]; + s++; + } + else { + break; /* Invalid length character */ + } + + /* Empty line indicates end */ + if (line_len == 0) { + break; + } + + /* Decode line data */ + int bytes_decoded = 0; + while (bytes_decoded < line_len && s + 3 < send) { + /* Decode 4 characters to 3 bytes */ + int c[4]; + int valid = 1; + + for (int i = 0; i < 4; i++) { + if (s >= send || *s < 32 || *s >= 128 || uu_decode_table[*s - 32] < 0) { + valid = 0; + break; + } + c[i] = uu_decode_table[*s++ - 32]; + } + + if (!valid) break; + + /* Combine 4 x 6-bit values into 3 x 8-bit values */ + uint32_t group = (c[0] << 18) | (c[1] << 12) | (c[2] << 6) | c[3]; + + /* Extract up to 3 bytes, don't exceed line length */ + int bytes_to_extract = (line_len - bytes_decoded > 3) ? 3 : (line_len - bytes_decoded); + + for (int i = 0; i < bytes_to_extract; i++) { + *dptr++ = (group >> (16 - i * 8)) & 0xFF; + bytes_decoded++; + } + } + + /* Skip to end of line */ + while (s < send && *s != '\n' && *s != '\r') { + s++; + } + } + + result = mrb_str_resize(mrb, result, (mrb_int)(dptr - dptr_start)); + mrb_ary_push(mrb, ary, result); + return (int)slen; } static int @@ -1061,7 +1608,7 @@ pack_nul(mrb_state *mrb, mrb_value dst, mrb_int didx, int count) } static void -check_x(mrb_state *mrb, int a, int count, char c) +check_x(mrb_state *mrb, mrb_int a, mrb_int count, char c) { if (a < count) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "%c outside of string", c); @@ -1081,90 +1628,32 @@ has_tmpl(const struct tmpl *tmpl) return (tmpl->idx < RSTRING_LEN(tmpl->str)); } -static void -read_tmpl(mrb_state *mrb, struct tmpl *tmpl, enum pack_dir *dirp, enum pack_type *typep, int *sizep, int *countp, unsigned int *flagsp) +static enum pack_dir +read_tmpl(mrb_state *mrb, struct tmpl *tmpl, enum pack_type *typep, mrb_int *sizep, mrb_int *countp, unsigned int *flagsp) { mrb_int t, tlen; int ch, size = 0; enum pack_dir dir; enum pack_type type; - int count = 1; + mrb_int count = 1; unsigned int flags = 0; const char *tptr; tptr = RSTRING_PTR(tmpl->str); tlen = RSTRING_LEN(tmpl->str); + restart: + if (tmpl->idx >= tlen) return PACK_DIR_NONE; t = tptr[tmpl->idx++]; -alias: + alias: + + /* Handle whitespace - skip and restart */ + if (ISSPACE((char)t)) { + goto restart; + } + + /* Special handling for runtime-dependent formats and special characters */ switch (t) { - case 'A': - dir = PACK_DIR_STR; - type = PACK_TYPE_STRING; - flags |= PACK_FLAG_WIDTH | PACK_FLAG_COUNT2; - break; - case 'a': - dir = PACK_DIR_STR; - type = PACK_TYPE_STRING; - flags |= PACK_FLAG_WIDTH | PACK_FLAG_COUNT2 | PACK_FLAG_a; - break; - case 'C': - dir = PACK_DIR_CHAR; - type = PACK_TYPE_INTEGER; - size = 1; - break; - case 'c': - dir = PACK_DIR_CHAR; - type = PACK_TYPE_INTEGER; - size = 1; - flags |= PACK_FLAG_SIGNED; - break; - case 'D': case 'd': - dir = PACK_DIR_DOUBLE; - type = PACK_TYPE_FLOAT; - size = 8; - flags |= PACK_FLAG_SIGNED; - break; - case 'F': case 'f': - dir = PACK_DIR_FLOAT; - type = PACK_TYPE_FLOAT; - size = 4; - flags |= PACK_FLAG_SIGNED; - break; - case 'E': - dir = PACK_DIR_DOUBLE; - type = PACK_TYPE_FLOAT; - size = 8; - flags |= PACK_FLAG_SIGNED | PACK_FLAG_LT; - break; - case 'e': - dir = PACK_DIR_FLOAT; - type = PACK_TYPE_FLOAT; - size = 4; - flags |= PACK_FLAG_SIGNED | PACK_FLAG_LT; - break; - case 'G': - dir = PACK_DIR_DOUBLE; - type = PACK_TYPE_FLOAT; - size = 8; - flags |= PACK_FLAG_SIGNED | PACK_FLAG_GT; - break; - case 'g': - dir = PACK_DIR_FLOAT; - type = PACK_TYPE_FLOAT; - size = 4; - flags |= PACK_FLAG_SIGNED | PACK_FLAG_GT; - break; - case 'H': - dir = PACK_DIR_HEX; - type = PACK_TYPE_STRING; - flags |= PACK_FLAG_COUNT2; - break; - case 'h': - dir = PACK_DIR_HEX; - type = PACK_TYPE_STRING; - flags |= PACK_FLAG_COUNT2 | PACK_FLAG_LSB; - break; case 'I': switch (sizeof(int)) { case 2: t = 'S'; goto alias; @@ -1183,107 +1672,49 @@ read_tmpl(mrb_state *mrb, struct tmpl *tmpl, enum pack_dir *dirp, enum pack_type mrb_raisef(mrb, E_RUNTIME_ERROR, "mruby-pack does not support sizeof(int) == %d", (int)sizeof(int)); } break; - case 'L': - dir = PACK_DIR_LONG; - type = PACK_TYPE_INTEGER; - size = 4; - break; - case 'l': - dir = PACK_DIR_LONG; - type = PACK_TYPE_INTEGER; - size = 4; - flags |= PACK_FLAG_SIGNED; - break; - case 'w': - dir = PACK_DIR_BER; - type = PACK_TYPE_INTEGER; - flags |= PACK_FLAG_SIGNED; - break; - case 'm': - dir = PACK_DIR_BASE64; - type = PACK_TYPE_STRING; - flags |= PACK_FLAG_WIDTH | PACK_FLAG_COUNT2; - break; - case 'M': - dir = PACK_DIR_QENC; - type = PACK_TYPE_STRING; - flags |= PACK_FLAG_WIDTH | PACK_FLAG_COUNT2; - break; - case 'N': /* = "L>" */ - dir = PACK_DIR_LONG; - type = PACK_TYPE_INTEGER; - size = 4; - flags |= PACK_FLAG_GT; - break; - case 'n': /* = "S>" */ - dir = PACK_DIR_SHORT; - type = PACK_TYPE_INTEGER; - size = 2; - flags |= PACK_FLAG_GT; - break; - case 'Q': - dir = PACK_DIR_QUAD; - type = PACK_TYPE_INTEGER; - size = 8; - break; - case 'q': - dir = PACK_DIR_QUAD; - type = PACK_TYPE_INTEGER; - size = 8; - flags |= PACK_FLAG_SIGNED; - break; - case 'S': - dir = PACK_DIR_SHORT; - type = PACK_TYPE_INTEGER; - size = 2; - break; - case 's': - dir = PACK_DIR_SHORT; - type = PACK_TYPE_INTEGER; - size = 2; - flags |= PACK_FLAG_SIGNED; - break; - case 'U': - dir = PACK_DIR_UTF8; - type = PACK_TYPE_INTEGER; - break; - case 'V': /* = "L<" */ - dir = PACK_DIR_LONG; - type = PACK_TYPE_INTEGER; - size = 4; - flags |= PACK_FLAG_LT; - break; - case 'v': /* = "S<" */ - dir = PACK_DIR_SHORT; - type = PACK_TYPE_INTEGER; - size = 2; - flags |= PACK_FLAG_LT; - break; - case 'x': - dir = PACK_DIR_NUL; - type = PACK_TYPE_NONE; - break; - case 'X': - dir = PACK_DIR_BACK; - type = PACK_TYPE_NONE; - break; - case '@': - dir = PACK_DIR_ABS; - type = PACK_TYPE_NONE; + case 'J': + switch (sizeof(intptr_t)) { + case 4: t = 'L'; goto alias; + case 8: t = 'Q'; goto alias; + default: + mrb_raisef(mrb, E_RUNTIME_ERROR, "mruby-pack does not support sizeof(uintptr_t) == %d", (int)sizeof(uintptr_t)); + } break; - case 'Z': - dir = PACK_DIR_STR; - type = PACK_TYPE_STRING; - flags |= PACK_FLAG_WIDTH | PACK_FLAG_COUNT2 | PACK_FLAG_Z; + case 'j': + switch (sizeof(intptr_t)) { + case 4: t = 'l'; goto alias; + case 8: t = 'q'; goto alias; + default: + mrb_raisef(mrb, E_RUNTIME_ERROR, "mruby-pack does not support sizeof(intptr_t) == %d", (int)sizeof(intptr_t)); + } break; + case '#': + while (++tmpl->idx < tlen && tptr[tmpl->idx] != '\n') + ; + goto restart; case 'p': case 'P': case '%': mrb_raisef(mrb, E_ARGUMENT_ERROR, "%c is not supported", (char)t); break; default: - dir = PACK_DIR_INVALID; - type = PACK_TYPE_NONE; - break; + /* Use O(1) lookup table for standard format characters */ + if (t >= 0 && t < 256) { + format_info_t info_val = pack_format_info((unsigned char)t); + const format_info_t *info = &info_val; + if (info->dir != PACK_DIR_NONE) { + /* Valid format character found in lookup table */ + dir = info->dir; + type = info->type; + size = info->size; + flags = info->base_flags; + break; + } + } + + /* Handle invalid characters */ + char c = (char)t; + mrb_value s = mrb_str_new(mrb, &c, 1); + mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown unpack directive %!v", s); } /* read suffix [0-9*_!<>] */ @@ -1291,27 +1722,31 @@ read_tmpl(mrb_state *mrb, struct tmpl *tmpl, enum pack_dir *dirp, enum pack_type ch = tptr[tmpl->idx]; if (ISDIGIT(ch)) { char *e; - mrb_int n = mrb_int_read(tptr+tmpl->idx, tptr+tlen, &e); - if (e == NULL || n > INT_MAX) { + mrb_int n; + if (!mrb_read_int(tptr+tmpl->idx, tptr+tlen, &e, &n) || INT_MAX < n) { mrb_raise(mrb, E_RUNTIME_ERROR, "too big template length"); } - count = (int)n; + count = n; tmpl->idx = (int)(e - tptr); continue; - } else if (ch == '*') { + } + else if (ch == '*') { if (type == PACK_TYPE_NONE) count = 0; else count = -1; - } else if (ch == '_' || ch == '!' || ch == '<' || ch == '>') { + } + else if (ch == '_' || ch == '!' || ch == '<' || ch == '>') { if (strchr("sSiIlLqQ", (int)t) == NULL) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "'%c' allowed only after types sSiIlLqQ", ch); } if (ch == '_' || ch == '!') { flags |= PACK_FLAG_s; - } else if (ch == '<') { + } + else if (ch == '<') { flags |= PACK_FLAG_LT; - } else if (ch == '>') { + } + else if (ch == '>') { flags |= PACK_FLAG_GT; } } @@ -1325,39 +1760,74 @@ read_tmpl(mrb_state *mrb, struct tmpl *tmpl, enum pack_dir *dirp, enum pack_type flags |= PACK_FLAG_LITTLEENDIAN; } - *dirp = dir; *typep = type; *sizep = size; *countp = count; *flagsp = flags; + return dir; } +/* + * call-seq: + * array.pack(template) -> string + * + * Packs the contents of array into a binary string according to the + * directives in template. Directives are single characters from the + * table below. Each directive may be followed by a number indicating + * the number of times to repeat the directive. + * + * Template string directives: + * C - 8-bit unsigned integer (unsigned char) + * c - 8-bit signed integer (signed char) + * S - 16-bit unsigned integer, native endian (uint16_t) + * s - 16-bit signed integer, native endian (int16_t) + * L - 32-bit unsigned integer, native endian (uint32_t) + * l - 32-bit signed integer, native endian (int32_t) + * Q - 64-bit unsigned integer, native endian (uint64_t) + * q - 64-bit signed integer, native endian (int64_t) + * n - 16-bit unsigned integer, network byte order + * N - 32-bit unsigned integer, network byte order + * v - 16-bit unsigned integer, little endian + * V - 32-bit unsigned integer, little endian + * f - single precision float, native format + * d - double precision float, native format + * A - ASCII string, space padded + * a - ASCII string, null padded + * Z - null-terminated string + * H - hex string, high nibble first + * h - hex string, low nibble first + * x - null byte + * X - back up one byte + * @ - null fill to absolute position + * + * [1, 2, 3].pack("CCC") #=> "\x01\x02\x03" + * [1, 2].pack("S*") #=> "\x01\x00\x02\x00" (little endian) + * ["hello"].pack("A10") #=> "hello " + */ static mrb_value mrb_pack_pack(mrb_state *mrb, mrb_value ary) { - mrb_value o, result; - mrb_int aidx; + mrb_value o; struct tmpl tmpl; - int count; + enum pack_type type; + mrb_int count; + mrb_int size; unsigned int flags; enum pack_dir dir; - enum pack_type type; - int ridx, size; prepare_tmpl(mrb, &tmpl); - result = mrb_str_new(mrb, NULL, 128); /* allocate initial buffer */ - aidx = 0; - ridx = 0; + mrb_value result = mrb_str_new(mrb, NULL, 128); /* allocate initial buffer */ + mrb_int aidx = 0; + mrb_int ridx = 0; while (has_tmpl(&tmpl)) { - read_tmpl(mrb, &tmpl, &dir, &type, &size, &count, &flags); + dir = read_tmpl(mrb, &tmpl, &type, &size, &count, &flags); - if (dir == PACK_DIR_INVALID) - continue; - else if (dir == PACK_DIR_NUL) { + if (dir == PACK_DIR_NONE) break; + if (dir == PACK_DIR_NUL) { grow: if (ridx > INT_MAX - count) goto overflow; - ridx += pack_nul(mrb, result, ridx, count); + ridx += pack_nul(mrb, result, ridx, (int)count); continue; } else if (dir == PACK_DIR_BACK) { @@ -1381,7 +1851,7 @@ mrb_pack_pack(mrb_state *mrb, mrb_value ary) if (count == 0 && !(flags & PACK_FLAG_WIDTH)) break; - o = mrb_ary_ref(mrb, ary, aidx); + o = RARRAY_PTR(ary)[aidx]; if (type == PACK_TYPE_INTEGER) { o = mrb_ensure_int_type(mrb, o); } @@ -1398,7 +1868,9 @@ mrb_pack_pack(mrb_state *mrb, mrb_value ary) } } + /* Optimized dispatch using grouped format handling for better branch prediction */ switch (dir) { + /* Integer formats - all use (mrb, o, result, ridx, flags) signature */ case PACK_DIR_CHAR: ridx += pack_char(mrb, o, result, ridx, flags); break; @@ -1414,19 +1886,9 @@ mrb_pack_pack(mrb_state *mrb, mrb_value ary) case PACK_DIR_BER: ridx += pack_BER(mrb, o, result, ridx, flags); break; - case PACK_DIR_BASE64: - ridx += pack_base64(mrb, o, result, ridx, count); - break; - case PACK_DIR_QENC: - ridx += pack_qenc(mrb, o, result, ridx, count); - break; - case PACK_DIR_HEX: - ridx += pack_hex(mrb, o, result, ridx, count, flags); - break; - case PACK_DIR_STR: - ridx += pack_str(mrb, o, result, ridx, count, flags); - break; + #ifndef MRB_NO_FLOAT + /* Float formats - all use (mrb, o, result, ridx, flags) signature */ case PACK_DIR_DOUBLE: ridx += pack_double(mrb, o, result, ridx, flags); break; @@ -1434,9 +1896,34 @@ mrb_pack_pack(mrb_state *mrb, mrb_value ary) ridx += pack_float(mrb, o, result, ridx, flags); break; #endif + + /* String formats with count - use (mrb, o, result, ridx, count, flags) signature */ + case PACK_DIR_HEX: + ridx += pack_hex(mrb, o, result, ridx, (int)count, flags); + break; + case PACK_DIR_BSTR: + ridx += pack_bstr(mrb, o, result, ridx, (int)count, flags); + break; + case PACK_DIR_STR: + ridx += pack_str(mrb, o, result, ridx, (int)count, flags); + break; + + /* String formats with count only - use (mrb, o, result, ridx, count) signature */ + case PACK_DIR_BASE64: + ridx += pack_base64(mrb, o, result, ridx, (int)count); + break; + case PACK_DIR_UU: + ridx += pack_uu(mrb, o, result, ridx, (int)count); + break; + case PACK_DIR_QENC: + ridx += pack_qenc(mrb, o, result, ridx, (int)count); + break; + + /* UTF8 format - special signature (mrb, o, result, ridx, count, flags) */ case PACK_DIR_UTF8: - ridx += pack_utf8(mrb, o, result, ridx, count, flags); + ridx += pack_utf8(mrb, o, result, ridx, (int)count, flags); break; + default: break; } @@ -1460,30 +1947,26 @@ mrb_pack_pack(mrb_state *mrb, mrb_value ary) } static mrb_value -pack_unpack(mrb_state *mrb, mrb_value str, int single) +pack_unpack(mrb_state *mrb, mrb_value str, mrb_bool single) { - mrb_value result; struct tmpl tmpl; - int count; + mrb_int count; unsigned int flags; - enum pack_dir dir; enum pack_type type; - int size; - int srcidx, srclen; + mrb_int size; const unsigned char *sptr; prepare_tmpl(mrb, &tmpl); - srcidx = 0; - srclen = (int)RSTRING_LEN(str); + mrb_int srcidx = 0; + mrb_int srclen = RSTRING_LEN(str); - result = mrb_ary_new(mrb); + mrb_value result = mrb_ary_new(mrb); while (has_tmpl(&tmpl)) { - read_tmpl(mrb, &tmpl, &dir, &type, &size, &count, &flags); + enum pack_dir dir = read_tmpl(mrb, &tmpl, &type, &size, &count, &flags); - if (dir == PACK_DIR_INVALID) - continue; - else if (dir == PACK_DIR_NUL) { + if (dir == PACK_DIR_NONE) break; + if (dir == PACK_DIR_NUL) { check_x(mrb, srclen-srcidx, count, 'x'); srcidx += count; continue; @@ -1499,21 +1982,37 @@ pack_unpack(mrb_state *mrb, mrb_value str, int single) continue; } - /* PACK_FLAG_COUNT2 directions */ - sptr = (const unsigned char *)RSTRING_PTR(str) + srcidx; + /* Optimized dispatch for PACK_FLAG_COUNT2 formats - grouped by signature */ + sptr = (const unsigned char*)RSTRING_PTR(str) + srcidx; switch (dir) { + /* String formats with count and flags - (mrb, sptr, len, result, count, flags) */ case PACK_DIR_HEX: - srcidx += unpack_hex(mrb, sptr, srclen - srcidx, result, count, flags); + srcidx += unpack_hex(mrb, sptr, srclen - srcidx, result, (int)count, flags); + if (single) goto single_return; + continue; + case PACK_DIR_BSTR: + srcidx += unpack_bstr(mrb, sptr, srclen - srcidx, result, (int)count, flags); + if (single) goto single_return; continue; case PACK_DIR_STR: - srcidx += unpack_str(mrb, sptr, srclen - srcidx, result, count, flags); + srcidx += unpack_str(mrb, sptr, srclen - srcidx, result, (int)count, flags); + if (single) goto single_return; continue; + + /* String formats without flags - (mrb, sptr, len, result) */ case PACK_DIR_BASE64: srcidx += unpack_base64(mrb, sptr, srclen - srcidx, result); + if (single) goto single_return; + continue; + case PACK_DIR_UU: + srcidx += unpack_uu(mrb, sptr, srclen - srcidx, result); + if (single) goto single_return; continue; case PACK_DIR_QENC: srcidx += unpack_qenc(mrb, sptr, srclen - srcidx, result); + if (single) goto single_return; continue; + default: break; } @@ -1527,7 +2026,9 @@ pack_unpack(mrb_state *mrb, mrb_value str, int single) } sptr = (const unsigned char*)RSTRING_PTR(str) + srcidx; + /* Optimized dispatch for element-by-element formats - grouped by signature */ switch (dir) { + /* Integer formats - all use (mrb, sptr, len, result, flags) signature */ case PACK_DIR_CHAR: srcidx += unpack_char(mrb, sptr, srclen - srcidx, result, flags); break; @@ -1543,7 +2044,9 @@ pack_unpack(mrb_state *mrb, mrb_value str, int single) case PACK_DIR_BER: srcidx += unpack_BER(mrb, sptr, srclen - srcidx, result, flags); break; + #ifndef MRB_NO_FLOAT + /* Float formats - all use (mrb, sptr, len, result, flags) signature */ case PACK_DIR_FLOAT: srcidx += unpack_float(mrb, sptr, srclen - srcidx, result, flags); break; @@ -1551,9 +2054,12 @@ pack_unpack(mrb_state *mrb, mrb_value str, int single) srcidx += unpack_double(mrb, sptr, srclen - srcidx, result, flags); break; #endif + + /* UTF8 format - uses (mrb, sptr, len, result, flags) signature */ case PACK_DIR_UTF8: srcidx += unpack_utf8(mrb, sptr, srclen - srcidx, result, flags); break; + default: mrb_raise(mrb, E_RUNTIME_ERROR, "mruby-pack's bug"); } @@ -1561,34 +2067,59 @@ pack_unpack(mrb_state *mrb, mrb_value str, int single) count--; } } - if (single) { - if (RARRAY_LEN(result) > 0) { - return RARRAY_PTR(result)[0]; - } - return mrb_nil_value(); + } + if (single) { + single_return: + if (RARRAY_LEN(result) > 0) { + return RARRAY_PTR(result)[0]; } + return mrb_nil_value(); } return result; } +/* + * call-seq: + * string.unpack(template) -> array + * + * Unpacks the contents of string according to the template string, + * returning an array of values. Template uses the same format as + * Array#pack. See Array#pack for template string format. + * + * "\x01\x02\x03".unpack("CCC") #=> [1, 2, 3] + * "\x01\x00\x02\x00".unpack("S*") #=> [1, 2] (little endian) + * "hello ".unpack("A10") #=> ["hello"] + */ static mrb_value mrb_pack_unpack(mrb_state *mrb, mrb_value str) { - return pack_unpack(mrb, str, 0); + return pack_unpack(mrb, str, FALSE); } +/* + * call-seq: + * string.unpack1(template) -> object + * + * Unpacks the first value from string according to the template string. + * This is equivalent to string.unpack(template)[0] but more efficient + * when only the first value is needed. + * + * "\x01\x02\x03".unpack1("C") #=> 1 + * "\x01\x00\x02\x00".unpack1("S") #=> 1 (little endian) + * "hello ".unpack1("A10") #=> "hello" + */ static mrb_value mrb_pack_unpack1(mrb_state *mrb, mrb_value str) { - return pack_unpack(mrb, str, 1); + return pack_unpack(mrb, str, TRUE); } void mrb_mruby_pack_gem_init(mrb_state *mrb) { - mrb_define_method(mrb, mrb->array_class, "pack", mrb_pack_pack, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, mrb->string_class, "unpack", mrb_pack_unpack, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, mrb->string_class, "unpack1", mrb_pack_unpack1, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, mrb->array_class, MRB_SYM(pack), mrb_pack_pack, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, mrb->string_class, MRB_SYM(unpack), mrb_pack_unpack, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, mrb->string_class, MRB_SYM(unpack1), mrb_pack_unpack1, MRB_ARGS_REQ(1)); } void diff --git a/mrbgems/mruby-pack/test/pack.rb b/mrbgems/mruby-pack/test/pack.rb index 16db6607b2..61750e0b90 100644 --- a/mrbgems/mruby-pack/test/pack.rb +++ b/mrbgems/mruby-pack/test/pack.rb @@ -1,4 +1,3 @@ -# coding: utf-8 PACK_IS_LITTLE_ENDIAN = "\x01\00".unpack('S')[0] == 0x01 def assert_pack tmpl, packed, unpacked @@ -39,6 +38,45 @@ def assert_pack tmpl, packed, unpacked assert_equal ["あ"], "=E3=81=82=\n".unpack("M") end +# pack & unpack 'u' (UU-encode) +assert('pack("u")') do + # Basic string test with known good values + assert_pack "u", "-2&5L;&\\L(%=O - -#ifdef MRB_NO_STDIO -# error print conflicts 'MRB_NO_STDIO' in your build configuration -#endif - -#include -#include -#if defined(_WIN32) -# include -# include -#ifdef _MSC_VER -# define isatty(x) _isatty(x) -# define fileno(x) _fileno(x) -#endif -#endif - -static void -printstr(mrb_state *mrb, const char *p, mrb_int len) -{ -#if defined(_WIN32) - if (isatty(fileno(stdout))) { - DWORD written; - int wlen = MultiByteToWideChar(CP_UTF8, 0, p, (int)len, NULL, 0); - wchar_t* utf16 = (wchar_t*)mrb_malloc(mrb, (wlen+1) * sizeof(wchar_t)); - if (MultiByteToWideChar(CP_UTF8, 0, p, (int)len, utf16, wlen) > 0) { - utf16[wlen] = 0; - WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE), - utf16, (DWORD)wlen, &written, NULL); - } - mrb_free(mrb, utf16); - } else -#endif - fwrite(p, (size_t)len, 1, stdout); - fflush(stdout); -} - -static mrb_value -mrb_printstr(mrb_state *mrb, mrb_value self) -{ - mrb_value s = mrb_get_arg1(mrb); - - if (mrb_string_p(s)) { - printstr(mrb, RSTRING_PTR(s), RSTRING_LEN(s)); - } - return mrb_nil_value(); -} - -void -mrb_mruby_print_gem_init(mrb_state* mrb) -{ - struct RClass *krn; - krn = mrb->kernel_module; - mrb_define_method(mrb, krn, "__printstr__", mrb_printstr, MRB_ARGS_REQ(1)); -} - -void -mrb_mruby_print_gem_final(mrb_state* mrb) -{ -} diff --git a/mrbgems/mruby-proc-binding/README.md b/mrbgems/mruby-proc-binding/README.md new file mode 100644 index 0000000000..0f6051c150 --- /dev/null +++ b/mrbgems/mruby-proc-binding/README.md @@ -0,0 +1,140 @@ +# mruby-proc-binding + +This mrbgem provides a `binding` method for `Proc` objects in mruby. This method returns a `Binding` object, which allows you to inspect and manipulate the lexical scope (local variables and `self`) of the `Proc` from which it was obtained. + +## Usage Examples + +Here's how you can use `mruby-proc-binding`: + +### Getting a Binding + +First, obtain a `Binding` object from a `Proc`: + +```ruby +def create_proc + a = 10 + b = 20 + ->(c) { a + b + c } # A sample proc +end + +my_proc = create_proc +# Create a proc that takes one argument (c) and closes over a and b. +# Note: For the binding to capture local variables like 'a' and 'b', +# they must be referenced by a Proc defined in the same scope. +# A simple `proc {}` or `->{}` might not capture them as expected +# unless they are used within that proc. +# To ensure 'a' and 'b' are part of the environment captured by the binding +# from `my_proc.binding`, we ensure `my_proc` itself uses them or is defined +# in a scope where they are present and then `binding` is called on a proc +# that has access to this environment. + +# Let's define a new proc specifically for getting a binding +# that captures the desired local variables. +local_var_scope_proc = nil +a_val = 1 +b_val = 2 +local_var_scope_proc = -> { + # 'a_val' and 'b_val' are now in this proc's lexical scope +} +bind = local_var_scope_proc.binding +``` + +### Inspecting Local Variables + +You can list the names of local variables within the proc's scope: + +```ruby +# Continuing from the previous example: +# Variables 'a_val', 'b_val', and 'local_var_scope_proc' are expected. +p bind.local_variables +# Expected output (order may vary): +# => [:a_val, :b_val, :local_var_scope_proc, :bind] +# Note: The binding itself and other variables defined in the +# same scope might also be listed. +``` + +### Getting Local Variable Values + +Retrieve the value of a specific local variable: + +```ruby +p bind.local_variable_get(:a_val) # => 1 +p bind.local_variable_get(:b_val) # => 2 +``` + +### Setting Local Variable Values + +You can also modify the values of local variables within the binding's scope: + +```ruby +bind.local_variable_set(:a_val, 100) +p bind.local_variable_get(:a_val) # => 100 + +# This also affects evaluation within the binding +p bind.eval("a_val + b_val") # => 102 (100 + 2) +``` + +### Evaluating Code + +Execute arbitrary Ruby code within the binding's context: + +```ruby +# Define some variables in a scope +def get_binding_for_eval + x = 5 + y = 10 + proc {}.binding # Create a binding in this scope +end + +eval_bind = get_binding_for_eval +p eval_bind.eval("x * y") # => 50 +p eval_bind.eval("self") # Shows the receiver of the proc +``` + +### Getting Source Location + +If `mruby-proc-ext` is also available, you can get the source location of a proc: + +```ruby +# Assuming __FILE__ is "test.rb" and this line is line 50 +my_lambda = -> { } # This proc is defined at [__FILE__, __LINE__] +location_binding = my_lambda.binding + +# This requires mruby-proc-ext +if location_binding.respond_to?(:source_location) + p location_binding.source_location # => ["test.rb", 50] (approximately) +else + puts "Binding#source_location not available. Is mruby-proc-ext included?" +end +``` + +A more robust example for capturing variables for `Proc#binding`: + +```ruby +def execution_binding + name = "Ruby" + # The proc must be created in the scope where variables exist + # and it doesn't necessarily need to *use* them for them to be available in the binding. + # However, the most reliable way to ensure they are part of the environment + # is if the proc is created in that environment. + binding_proc = proc {} # This proc is created in the current scope + binding_proc.binding # Returns a binding for this scope +end + +b = execution_binding +p b.local_variables.sort # => [:b, :binding_proc, :name] (or similar) +p b.local_variable_get(:name) # => "Ruby" +b.local_variable_set(:name, "mruby") +p b.eval("name") # => "mruby" +``` + +## Dependencies + +This mrbgem has the following dependencies: + +- **`mruby-binding`**: Provides the core `Binding` object functionality. This gem is a prerequisite. +- **`mruby-proc-ext`**: Required for the `Binding#source_location` method. If `mruby-proc-ext` is not included in your mruby build, the `source_location` method may not be available on binding objects, or it might return `nil`. + +## License + +MIT diff --git a/mrbgems/mruby-proc-binding/mrbgem.rake b/mrbgems/mruby-proc-binding/mrbgem.rake index 425aac8479..53e6faaef0 100644 --- a/mrbgems/mruby-proc-binding/mrbgem.rake +++ b/mrbgems/mruby-proc-binding/mrbgem.rake @@ -3,7 +3,8 @@ MRuby::Gem::Specification.new('mruby-proc-binding') do |spec| spec.author = 'mruby developers' spec.summary = 'Proc#binding method' - spec.add_dependency('mruby-binding-core', :core => 'mruby-binding-core') - spec.add_test_dependency('mruby-binding', :core => 'mruby-binding') + spec.add_dependency('mruby-binding', :core => 'mruby-binding') + spec.add_dependency('mruby-proc-ext', :core => 'mruby-proc-ext') + spec.add_test_dependency('mruby-eval', :core => 'mruby-eval') spec.add_test_dependency('mruby-compiler', :core => 'mruby-compiler') end diff --git a/mrbgems/mruby-proc-binding/src/proc-binding.c b/mrbgems/mruby-proc-binding/src/proc-binding.c deleted file mode 100644 index d176034d2d..0000000000 --- a/mrbgems/mruby-proc-binding/src/proc-binding.c +++ /dev/null @@ -1,50 +0,0 @@ -#include -#include -#include -#include - -/* provided by mruby-proc-ext */ -mrb_value mrb_proc_source_location(mrb_state *mrb, struct RProc *p); - -/* provided by mruby-binding-core */ -mrb_value mrb_binding_alloc(mrb_state *mrb); -struct RProc *mrb_binding_wrap_lvspace(mrb_state *mrb, const struct RProc *proc, struct REnv **envp); - -static mrb_value -mrb_proc_binding(mrb_state *mrb, mrb_value procval) -{ - mrb_value binding = mrb_binding_alloc(mrb); - const struct RProc *proc = mrb_proc_ptr(procval); - struct REnv *env; - - mrb_value receiver; - if (!proc || MRB_PROC_CFUNC_P(proc) || !proc->upper || MRB_PROC_CFUNC_P(proc->upper)) { - env = NULL; - proc = NULL; - receiver = mrb_nil_value(); - } - else { - env = MRB_PROC_ENV(proc); - mrb_assert(env); - proc = proc->upper; - receiver = MRB_ENV_LEN(env) > 0 ? env->stack[0] : mrb_nil_value(); - } - - proc = mrb_binding_wrap_lvspace(mrb, proc, &env); - mrb_iv_set(mrb, binding, MRB_SYM(proc), mrb_obj_value((void *)proc)); - mrb_iv_set(mrb, binding, MRB_SYM(recv), receiver); - mrb_iv_set(mrb, binding, MRB_SYM(env), mrb_obj_value(env)); - mrb_iv_set(mrb, binding, MRB_SYM(source_location), mrb_proc_source_location(mrb, mrb_proc_ptr(procval))); - return binding; -} - -void -mrb_mruby_proc_binding_gem_init(mrb_state *mrb) -{ - mrb_define_method(mrb, mrb->proc_class, "binding", mrb_proc_binding, MRB_ARGS_NONE()); -} - -void -mrb_mruby_proc_binding_gem_final(mrb_state *mrb) -{ -} diff --git a/mrbgems/mruby-proc-binding/src/proc_binding.c b/mrbgems/mruby-proc-binding/src/proc_binding.c new file mode 100644 index 0000000000..770237a688 --- /dev/null +++ b/mrbgems/mruby-proc-binding/src/proc_binding.c @@ -0,0 +1,75 @@ +#include +#include +#include + +/* provided by mruby-proc-ext */ +mrb_value mrb_proc_source_location(mrb_state *mrb, const struct RProc *p); + +/* provided by mruby-binding */ +mrb_value mrb_binding_new(mrb_state *mrb, const struct RProc *proc, mrb_value recv, struct REnv *env); + +/* + * call-seq: + * prc.binding -> binding + * + * Returns a Binding object, which is the execution context that the proc + * was defined in. The returned binding retains the context in which the + * proc was created, including local variables, methods, and constants. + * + * def fred(param) + * proc {} + * end + * + * b = fred(99).binding + * b.eval("param") #=> 99 + * b.eval("param = 1") + * b.eval("param") #=> 1 + * + * # Local variables in the binding's scope + * p = proc { |x| x + 1 } + * a, b, c = 1, 2, 3 + * bind = p.binding + * bind.local_variables #=> [:a, :b, :bind, :c, :p] + */ +static mrb_value +mrb_proc_binding(mrb_state *mrb, mrb_value procval) +{ + const struct RProc *proc = mrb_proc_ptr(procval); + struct REnv *env; + mrb_value receiver; + + if (!proc || MRB_PROC_CFUNC_P(proc) || !proc->upper || MRB_PROC_CFUNC_P(proc->upper)) { + env = NULL; + proc = NULL; + receiver = mrb_nil_value(); + } + else { + env = MRB_PROC_ENV(proc); + mrb_assert(env); + proc = proc->upper; + receiver = MRB_ENV_LEN(env) > 0 ? env->stack[0] : mrb_nil_value(); + } + + mrb_value binding = mrb_binding_new(mrb, proc, receiver, env); + mrb_iv_set(mrb, binding, MRB_SYM(source_location), mrb_proc_source_location(mrb, mrb_proc_ptr(procval))); + return binding; +} + +/* + * Initializes the mruby-proc-binding gem by adding the binding method + * to the Proc class. This allows any proc object to return its execution + * context as a Binding object. + * + * The binding method takes no arguments and returns a Binding object + * that captures the lexical environment where the proc was created. + */ +void +mrb_mruby_proc_binding_gem_init(mrb_state *mrb) +{ + mrb_define_method_id(mrb, mrb->proc_class, MRB_SYM(binding), mrb_proc_binding, MRB_ARGS_NONE()); +} + +void +mrb_mruby_proc_binding_gem_final(mrb_state *mrb) +{ +} diff --git a/mrbgems/mruby-proc-binding/test/proc-binding.c b/mrbgems/mruby-proc-binding/test/proc_binding.c similarity index 100% rename from mrbgems/mruby-proc-binding/test/proc-binding.c rename to mrbgems/mruby-proc-binding/test/proc_binding.c diff --git a/mrbgems/mruby-proc-binding/test/proc-binding.rb b/mrbgems/mruby-proc-binding/test/proc_binding.rb similarity index 100% rename from mrbgems/mruby-proc-binding/test/proc-binding.rb rename to mrbgems/mruby-proc-binding/test/proc_binding.rb diff --git a/mrbgems/mruby-proc-ext/README.md b/mrbgems/mruby-proc-ext/README.md new file mode 100644 index 0000000000..0f4fa82001 --- /dev/null +++ b/mrbgems/mruby-proc-ext/README.md @@ -0,0 +1,107 @@ +# mruby-proc-ext + +This mrbgem extends the functionality of the `Proc` class in mruby, providing additional methods for common operations and introspection. + +## Features + +This gem adds the following methods to the `Proc` class: + +- **`===`**: Alias for `Proc#call`. Allows procs to be used in `case` statements. + + ```ruby + upcase_proc = ->(s) { s.upcase } + case "hello" + when upcase_proc + # this won't match directly, but demonstrates usage if proc returned boolean + end + # More practically: + is_even = ->(n) { n % 2 == 0 } + puts "4 is even" if is_even === 4 # => true + ``` + +- **`yield`**: Alias for `Proc#call`. + + ```ruby + add_one = ->(x) { x + 1 } + puts add_one.yield(5) # => 6 + ``` + +- **`to_proc`**: Returns `self`. Useful for methods expecting a proc. + + ```ruby + p = Proc.new { |x| x * 2 } + [1, 2, 3].map(&p.to_proc) # => [2, 4, 6] + ``` + +- **`curry(arity=self.arity)`**: Returns a curried proc. + + ```ruby + adder = ->(a, b, c) { a + b + c } + curried_adder = adder.curry + add5 = curried_adder.call(2, 3) + puts add5.call(4) # => 9 + + mul = ->(x,y) { x * y } + mul_by_5 = mul.curry.call(5) + puts mul_by_5.call(3) # => 15 + ``` + +- **`<< (composition)`**: Returns a new proc that represents the composition of two procs (g << f is g(f(x))). + + ```ruby + add_one = ->(x) { x + 1 } + double_it = ->(x) { x * 2 } + composed_proc = double_it << add_one # double_it(add_one(x)) + puts composed_proc.call(5) # => double_it(6) => 12 + ``` + +- **`>> (composition)`**: Returns a new proc that represents the composition of two procs (f >> g is g(f(x))). + + ```ruby + add_one = ->(x) { x + 1 } + double_it = ->(x) { x * 2 } + composed_proc = add_one >> double_it # double_it(add_one(x)) + puts composed_proc.call(5) # => double_it(6) => 12 + ``` + +- **`lambda?`**: Returns `true` if the proc is a lambda, `false` otherwise. + + ```ruby + my_lambda = -> {} + my_proc = Proc.new {} + puts my_lambda.lambda? # => true + puts my_proc.lambda? # => false + ``` + +- **`source_location`**: Returns an array containing the source filename and line number where the proc was defined, or `nil` if this information is not available. + + ```ruby + # Assuming this code is in "test.rb" at line 5 + my_proc = Proc.new {} + p my_proc.source_location # => ["test.rb", 5] (if debug info enabled) + ``` + +- **`inspect`**: Returns a string containing a human-readable representation of the proc, including its source location (if available) and whether it's a lambda. + + ```ruby + my_lambda = ->(x) { x * 2 } + # Might output: # + puts my_lambda.inspect + ``` + +- **`parameters`**: Returns an array of arrays, describing the parameters accepted by the proc. Each inner array contains the parameter type (`:req`, `:opt`, `:rest`, `:key`, `:keyrest`, `:block`) and its name. + + ```ruby + prc = ->(a, b=20, *rest, keyparam:, &blk) { } + p prc.parameters + # => [[:req, :a], [:opt, :b], [:rest, :rest], [:keyreq, :keyparam], [:block, :blk]] + ``` + +This gem also adds: + +- **`Kernel#proc`**: Creates a new proc from a block. This is similar to `Proc.new` but ensures the created proc is not a lambda. + + ```ruby + p1 = proc { |a| a } + puts p1.lambda? # => false + ``` diff --git a/mrbgems/mruby-proc-ext/mrblib/proc.rb b/mrbgems/mruby-proc-ext/mrblib/proc.rb index 0c0df205ec..320549ce70 100644 --- a/mrbgems/mruby-proc-ext/mrblib/proc.rb +++ b/mrbgems/mruby-proc-ext/mrblib/proc.rb @@ -1,17 +1,73 @@ class Proc + # + # call-seq: + # prc === obj -> result_of_proc + # + # Invokes the block with obj as the parameter like Proc#call. + # This allows a proc object to be the target of a when clause + # in a case statement. + # + # def the_answer + # 42 + # end + # + # case the_answer + # when proc { |x| x > 40 } + # "correct" + # else + # "incorrect" + # end + # #=> "correct" + # def ===(*args) call(*args) end + # + # call-seq: + # prc.yield(params,...) -> obj + # + # Invokes the block with the given arguments. This method is provided + # for compatibility and is equivalent to Proc#call. + # + # prc = proc { |x| x * 2 } + # prc.yield(5) #=> 10 + # def yield(*args) call(*args) end + # + # call-seq: + # prc.to_proc -> prc + # + # Part of the protocol for converting objects to Proc objects. + # Instances of class Proc simply return themselves. + # + # prc = proc { "hello" } + # prc.to_proc #=> # + # def to_proc self end + # + # call-seq: + # prc.curry -> curried_proc + # prc.curry(arity) -> curried_proc + # + # Returns a curried proc. If the optional arity argument is given, it + # determines the number of arguments. A curried proc receives some + # arguments. If a sufficient number of arguments are supplied, it passes + # the supplied arguments to the original proc and returns the result. + # Otherwise, returns another curried proc that takes the rest of arguments. + # + # b = proc {|x, y, z| (x||0) + (y||0) + (z||0) } + # p b.curry[1][2][3] #=> 6 + # p b.curry[1, 2][3, 4] #=> 6 + # p b.curry(5)[1][2][3][4][5] #=> 6 + # def curry(arity=self.arity) type = :proc abs = lambda {|a| a < 0 ? -a - 1 : a} @@ -19,9 +75,22 @@ def curry(arity=self.arity) if lambda? type = :lambda self_arity = self.arity - if (self_arity >= 0 && arity != self_arity) || - (self_arity < 0 && abs[self_arity] > arity) - raise ArgumentError, "wrong number of arguments (given #{arity}, expected #{abs[self_arity]})" + min_req = abs[self_arity] + if self_arity < 0 + max_arity = 0 + has_rest = false + self.parameters.each do |p| + case p[0] + when :rest, :keyrest then has_rest = true + when :req, :opt then max_arity += 1 + end + end + if arity < min_req || (!has_rest && arity > max_arity) + expected = (!has_rest && max_arity != min_req) ? "#{min_req}..#{max_arity}" : min_req.to_s + raise ArgumentError, "wrong number of arguments (given #{arity}, expected #{expected})" + end + elsif arity != self_arity + raise ArgumentError, "wrong number of arguments (given #{arity}, expected #{self_arity})" end end @@ -39,10 +108,34 @@ def curry(arity=self.arity) make_curry.call end + # + # call-seq: + # prc << other_proc -> new_proc + # + # Returns a new Proc which is the composition of this proc and the given + # other_proc. The returned proc takes a variable number of arguments, calls + # other_proc with them then calls this proc with the result. + # + # f = proc {|x| x * x } + # g = proc {|x| x + x } + # p (f << g).call(2) #=> 16 + # def <<(other) ->(*args, **opts, &block) { call(other.call(*args, **opts, &block)) } end + # + # call-seq: + # prc >> other_proc -> new_proc + # + # Returns a new Proc which is the composition of this proc and the given + # other_proc. The returned proc takes a variable number of arguments, calls + # this proc with them then calls other_proc with the result. + # + # f = proc {|x| x * x } + # g = proc {|x| x + x } + # p (f >> g).call(2) #=> 8 + # def >>(other) ->(*args, **opts, &block) { other.call(call(*args, **opts, &block)) } end diff --git a/mrbgems/mruby-proc-ext/src/proc.c b/mrbgems/mruby-proc-ext/src/proc.c index ac89db193a..4149e0304b 100644 --- a/mrbgems/mruby-proc-ext/src/proc.c +++ b/mrbgems/mruby-proc-ext/src/proc.c @@ -1,66 +1,125 @@ #include +#include #include #include #include #include #include -#include +#include + +/* + * call-seq: + * prc.lambda? -> true or false + * + * Returns `true` if `prc` is a lambda, `false` if it is a proc. + * The difference is how they react to a `return` statement. In a lambda, + * `return` makes the lambda return. In a proc, `return` makes the method + * that called the proc return. + * + * def gen_times(factor) + * return proc {|n| n*factor } # return from the proc + * end + * + * times3 = gen_times(3) + * times5 = gen_times(5) + * + * times3.lambda? #=> false + * times5.lambda? #=> false + * + * def gen_times(factor) + * return lambda {|n| n*factor } # return from the lambda + * end + * + * times3 = gen_times(3) + * times5 = gen_times(5) + * + * times3.lambda? #=> true + * times5.lambda? #=> true + */ static mrb_value -mrb_proc_lambda_p(mrb_state *mrb, mrb_value self) +proc_lambda_p(mrb_state *mrb, mrb_value self) { struct RProc *p = mrb_proc_ptr(self); return mrb_bool_value(MRB_PROC_STRICT_P(p)); } +/* Internal helper function to extract source location from a proc */ mrb_value -mrb_proc_source_location(mrb_state *mrb, struct RProc *p) +mrb_proc_source_location(mrb_state *mrb, const struct RProc *p) { if (MRB_PROC_CFUNC_P(p)) { return mrb_nil_value(); } - else { - const mrb_irep *irep = p->body.irep; - int32_t line; - const char *filename; - filename = mrb_debug_get_filename(mrb, irep, 0); - line = mrb_debug_get_line(mrb, irep, 0); + /* handle alias */ + if (MRB_PROC_ALIAS_P(p)) { + p = p->upper; + } + + const mrb_irep *irep = p->body.irep; + int32_t line; + const char *filename; - return (!filename && line == -1)? mrb_nil_value() - : mrb_assoc_new(mrb, mrb_str_new_cstr(mrb, filename), mrb_fixnum_value(line)); + if (!mrb_debug_get_position(mrb, irep, 0, &line, &filename)) { + return mrb_nil_value(); } + return mrb_assoc_new(mrb, mrb_str_new_cstr(mrb, filename), mrb_fixnum_value(line)); } +/* + * call-seq: + * prc.source_location -> [filename, line] or nil + * + * Returns the Ruby source filename and line number containing this proc + * or `nil` if this proc was not defined in Ruby (i.e. native). + * + * p = proc { puts "hello" } + * p.source_location #=> ["prog.rb", 1] + */ + static mrb_value -mrb_proc_source_location_m(mrb_state *mrb, mrb_value self) +proc_source_location(mrb_state *mrb, mrb_value self) { return mrb_proc_source_location(mrb, mrb_proc_ptr(self)); } +/* + * call-seq: + * prc.to_s -> string + * prc.inspect -> string + * + * Returns the unique identifier for this proc, along with + * an indication of where the proc was defined. + * + * p = proc { puts "hello" } + * p.inspect #=> "#" + * p.to_s #=> "#" + * + * l = lambda { puts "hello" } + * l.inspect #=> "#" + */ + static mrb_value -mrb_proc_inspect(mrb_state *mrb, mrb_value self) +proc_inspect(mrb_state *mrb, mrb_value self) { struct RProc *p = mrb_proc_ptr(self); mrb_value str = mrb_str_new_lit(mrb, "#body.irep; const char *filename; int32_t line; - mrb_str_cat_lit(mrb, str, "@"); + mrb_str_cat_lit(mrb, str, " "); - filename = mrb_debug_get_filename(mrb, irep, 0); - mrb_str_cat_cstr(mrb, str, filename ? filename : "-"); - mrb_str_cat_lit(mrb, str, ":"); - - line = mrb_debug_get_line(mrb, irep, 0); - if (line != -1) { + if (mrb_debug_get_position(mrb, irep, 0, &line, &filename)) { + mrb_str_cat_cstr(mrb, str, filename); + mrb_str_cat_lit(mrb, str, ":"); mrb_str_concat(mrb, str, mrb_fixnum_value(line)); } else { - mrb_str_cat_lit(mrb, str, "-"); + mrb_str_cat_lit(mrb, str, "-:-"); } } @@ -72,8 +131,21 @@ mrb_proc_inspect(mrb_state *mrb, mrb_value self) return str; } +/* + * call-seq: + * proc { |...| block } -> a_proc + * + * Equivalent to `Proc.new`. + * + * def proc(&block) + * block + * end + * + * proc { puts "Hello world" } #=> # + */ + static mrb_value -mrb_kernel_proc(mrb_state *mrb, mrb_value self) +kernel_proc(mrb_state *mrb, mrb_value self) { mrb_value blk; @@ -99,37 +171,49 @@ mrb_proc_parameters(mrb_state *mrb, mrb_value self) mrb_sym name; int size; } *p, parameters_list [] = { - {MRB_SYM(req), 0}, - {MRB_SYM(opt), 0}, - {MRB_SYM(rest), 0}, - {MRB_SYM(req), 0}, - {MRB_SYM(keyrest), 0}, - {MRB_SYM(block), 0}, - {MRB_SYM(key), 0}, + {MRB_SYM(req), 0}, + {MRB_SYM(opt), 0}, + {MRB_SYM(rest), 0}, + {MRB_SYM(req), 0}, + {MRB_SYM(keyrest),0}, + {MRB_SYM(block), 0}, + {MRB_SYM(key), 0}, {0, 0} }; + int i; const struct RProc *proc = mrb_proc_ptr(self); - const struct mrb_irep *irep; + /* An alias proc carries no irep of its own: body.mid holds the aliased + method's name and `upper` points at the original proc (see mrb_alias_method + in class.c). Without this, the else branch below reads body.mid as an + mrb_irep* and dereferences it -> misaligned read / SEGV. Resolve to the + underlying proc, exactly as method_to_s already does, so an aliased method + reports the original's parameters. Alias chains are collapsed at creation, + but loop (as mruby-method does) and bail to empty on a broken chain. */ + while (MRB_PROC_ALIAS_P(proc)) { + proc = proc->upper; + if (!proc) return mrb_ary_new(mrb); + } mrb_aspec aspec; - mrb_value parameters; - mrb_value krest = mrb_nil_value(); - mrb_value block = mrb_nil_value(); - int i, j; - int max = -1; - + mrb_bool has_lv = TRUE; if (MRB_PROC_CFUNC_P(proc)) { - // TODO cfunc aspec is not implemented yet - return mrb_ary_new(mrb); - } - irep = proc->body.irep; - if (!irep) { - return mrb_ary_new(mrb); - } - if (!irep->lv) { - return mrb_ary_new(mrb); + uint32_t caspec_bits = proc->flags & MRB_PROC_CASPEC_MASK; + if (caspec_bits != 0) { + aspec = mrb_proc_decompress_caspec(caspec_bits); + } + else if (MRB_PROC_NOARG_P(proc)) { + aspec = 0; + } + else { + return mrb_ary_new(mrb); + } + has_lv = FALSE; } - if (*irep->iseq != OP_ENTER) { - return mrb_ary_new(mrb); + else { + const struct mrb_irep *irep = proc->body.irep; + if (!irep || !irep->lv || *irep->iseq != OP_ENTER) { + return mrb_ary_new(mrb); + } + aspec = PEEK_W(irep->iseq+1); } if (!MRB_PROC_STRICT_P(proc)) { @@ -137,7 +221,6 @@ mrb_proc_parameters(mrb_state *mrb, mrb_value self) parameters_list[3].name = MRB_SYM(opt); } - aspec = PEEK_W(irep->iseq+1); parameters_list[0].size = MRB_ASPEC_REQ(aspec); parameters_list[1].size = MRB_ASPEC_OPT(aspec); parameters_list[2].size = MRB_ASPEC_REST(aspec); @@ -146,29 +229,29 @@ mrb_proc_parameters(mrb_state *mrb, mrb_value self) parameters_list[5].size = MRB_ASPEC_BLOCK(aspec); parameters_list[6].size = MRB_ASPEC_KEY(aspec); - parameters = mrb_ary_new_capa(mrb, irep->nlocals-1); + int max = 0; + for (i = 0; parameters_list[i].name; i++) { + max += parameters_list[i].size; + } + + mrb_value parameters = mrb_ary_new_capa(mrb, max); + mrb_value krest = mrb_nil_value(); + mrb_value block = mrb_nil_value(); - max = irep->nlocals-1; + const mrb_sym *lv = has_lv ? proc->body.irep->lv : NULL; for (i = 0, p = parameters_list; p->name; p++) { mrb_value sname = mrb_symbol_value(p->name); - for (j = 0; j < p->size; i++, j++) { - mrb_value a; - - a = mrb_ary_new(mrb); + for (int j = 0; j < p->size; i++, j++) { + mrb_value a = mrb_ary_new(mrb); mrb_ary_push(mrb, a, sname); - if (i < max && irep->lv[i]) { - mrb_sym sym = irep->lv[i]; - const char *name = mrb_sym_name(mrb, sym); - switch (name[0]) { - case '*': case '&': - break; - default: - mrb_ary_push(mrb, a, mrb_symbol_value(sym)); - break; - } + if (lv && i < max && lv[i]) { + mrb_ary_push(mrb, a, mrb_symbol_value(lv[i])); } if (p->name == MRB_SYM(block)) { + if (lv && lv[i+1]) { + mrb_ary_push(mrb, a, mrb_symbol_value(lv[i+1])); + } block = a; continue; } if (p->name == MRB_SYM(keyrest)) { @@ -176,24 +259,30 @@ mrb_proc_parameters(mrb_state *mrb, mrb_value self) } mrb_ary_push(mrb, parameters, a); } + /* need to skip empty block slot */ + if (p->size == 0 && p->name == MRB_SYM(block)) i++; } if (!mrb_nil_p(krest)) mrb_ary_push(mrb, parameters, krest); if (!mrb_nil_p(block)) mrb_ary_push(mrb, parameters, block); return parameters; } +/* ---------------------------*/ +static const mrb_mt_entry proc_ext_rom_entries[] = { + MRB_MT_ENTRY(proc_inspect, MRB_SYM(inspect), MRB_ARGS_NONE()), + MRB_MT_ENTRY(proc_lambda_p, MRB_SYM_Q(lambda), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_proc_parameters, MRB_SYM(parameters), MRB_ARGS_NONE()), + MRB_MT_ENTRY(proc_source_location, MRB_SYM(source_location), MRB_ARGS_NONE()), + MRB_MT_ENTRY(proc_inspect, MRB_SYM(to_s), MRB_ARGS_NONE()), +}; + void mrb_mruby_proc_ext_gem_init(mrb_state* mrb) { struct RClass *p = mrb->proc_class; - mrb_define_method(mrb, p, "lambda?", mrb_proc_lambda_p, MRB_ARGS_NONE()); - mrb_define_method(mrb, p, "source_location", mrb_proc_source_location_m, MRB_ARGS_NONE()); - mrb_define_method(mrb, p, "to_s", mrb_proc_inspect, MRB_ARGS_NONE()); - mrb_define_method(mrb, p, "inspect", mrb_proc_inspect, MRB_ARGS_NONE()); - mrb_define_method(mrb, p, "parameters", mrb_proc_parameters, MRB_ARGS_NONE()); - - mrb_define_class_method(mrb, mrb->kernel_module, "proc", mrb_kernel_proc, MRB_ARGS_NONE()|MRB_ARGS_BLOCK()); - mrb_define_method(mrb, mrb->kernel_module, "proc", mrb_kernel_proc, MRB_ARGS_NONE()|MRB_ARGS_BLOCK()); + + MRB_MT_INIT_ROM(mrb, p, proc_ext_rom_entries); + mrb_define_private_method_id(mrb, mrb->kernel_module, MRB_SYM(proc), kernel_proc, MRB_ARGS_BLOCK()); } void diff --git a/mrbgems/mruby-proc-ext/test/proc.c b/mrbgems/mruby-proc-ext/test/proc.c index 0253f2b608..9a6bae3caf 100644 --- a/mrbgems/mruby-proc-ext/test/proc.c +++ b/mrbgems/mruby-proc-ext/test/proc.c @@ -35,11 +35,11 @@ static mrb_value cfunc_env_get(mrb_state *mrb, mrb_value self) { mrb_sym n; - const mrb_value *argv; mrb_int argc; - mrb_method_t m; - struct RProc *p; + const mrb_value *argv; + mrb_int argc; mrb_get_args(mrb, "na", &n, &argv, &argc); - p = mrb_proc_new_cfunc_with_env(mrb, return_env, argc, argv); + struct RProc *p = mrb_proc_new_cfunc_with_env(mrb, return_env, argc, argv); + mrb_method_t m; MRB_METHOD_FROM_PROC(m, p); mrb_define_method_raw(mrb, mrb_class_ptr(self), n, m); return self; @@ -56,7 +56,7 @@ void mrb_mruby_proc_ext_gem_test(mrb_state *mrb) struct RClass *cls; cls = mrb_define_class(mrb, "ProcExtTest", mrb->object_class); - mrb_define_module_function(mrb, cls, "mrb_proc_new_cfunc_with_env", proc_new_cfunc_with_env, MRB_ARGS_REQ(1)); - mrb_define_module_function(mrb, cls, "mrb_cfunc_env_get", cfunc_env_get, MRB_ARGS_REQ(2)); - mrb_define_module_function(mrb, cls, "cfunc_without_env", cfunc_without_env, MRB_ARGS_NONE()); + mrb_define_class_method(mrb, cls, "mrb_proc_new_cfunc_with_env", proc_new_cfunc_with_env, MRB_ARGS_REQ(1)); + mrb_define_class_method(mrb, cls, "mrb_cfunc_env_get", cfunc_env_get, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, cls, "cfunc_without_env", cfunc_without_env, MRB_ARGS_NONE()); } diff --git a/mrbgems/mruby-proc-ext/test/proc.rb b/mrbgems/mruby-proc-ext/test/proc.rb index 06365b7d98..fcbeb49624 100644 --- a/mrbgems/mruby-proc-ext/test/proc.rb +++ b/mrbgems/mruby-proc-ext/test/proc.rb @@ -7,8 +7,8 @@ def enable_debug_info? raise rescue => e @enable_debug_info = !e.backtrace.empty? - if(@enable_debug_info && e.backtrace[0].include?("(unknown)")) - @enable_debug_info = false + if @enable_debug_info && e.backtrace[0].include?("(unknown)") + @enable_debug_info = false end return @enable_debug_info end @@ -30,12 +30,7 @@ def enable_debug_info? else file = line = "-" end - assert_match "#", ins -end - -assert('Proc#parameters') do - parameters = Proc.new{|x,y=42,*other|}.parameters - assert_equal [[:opt, :x], [:opt, :y], [:rest, :other]], parameters + assert_match "#", ins end assert('Proc#lambda?') do @@ -69,6 +64,13 @@ def enable_debug_info? assert_false(proc{}.curry.lambda?) assert_true(lambda{}.curry.lambda?) + + # #2855: lambda with optional param: curry must validate against max arity + l = lambda {|a, b=nil|} + assert_raise(ArgumentError) { l.curry(3) } # over max + assert_kind_of Proc, l.curry(2) # within range + assert_kind_of Proc, l.curry(1) # at min + assert_kind_of Proc, lambda {|a, *b|}.curry(99) # rest -> unbounded end assert('Proc#parameters') do @@ -78,8 +80,9 @@ def enable_debug_info? assert_equal([[:req, :a]], lambda {|a|}.parameters) assert_equal([[:opt, :a]], lambda {|a=nil|}.parameters) assert_equal([[:req, :a]], ->(a){}.parameters) - assert_equal([[:rest]], lambda { |*| }.parameters) + assert_equal([[:rest, :*]], lambda { |*| }.parameters) assert_equal([[:rest, :a]], Proc.new {|*a|}.parameters) + assert_equal([[:opt, :x], [:opt, :y], [:rest, :other]], Proc.new{|x,y=42,*other|}.parameters) assert_equal([[:opt, :a], [:opt, :b], [:opt, :c], [:opt, :d], [:rest, :e], [:opt, :f], [:opt, :g], [:block, :h]], Proc.new {|a,b,c=:c,d=:d,*e,f,g,&h|}.parameters) assert_equal([[:req, :a], [:req, :b], [:opt, :c], [:opt, :d], [:rest, :e], [:req, :f], [:req, :g], [:block, :h]], lambda {|a,b,c=:c,d=:d,*e,f,g,&h|}.parameters) end diff --git a/mrbgems/mruby-random/README.md b/mrbgems/mruby-random/README.md new file mode 100644 index 0000000000..0eff8cd3c3 --- /dev/null +++ b/mrbgems/mruby-random/README.md @@ -0,0 +1,272 @@ +# mruby-random + +mruby-random is an mrbgem that provides pseudo-random number generation facilities for mruby. + +## Features + +- Provides `Kernel#rand` method for generating pseudo-random numbers. +- Supports generating random numbers within a specific range. +- Allows setting a seed for reproducible random number sequences using `Kernel#srand`. + +## Global Random Number Generation + +### Generating a random number + +To generate a pseudo-random floating-point number between 0.0 (inclusive) and 1.0 (exclusive): + +```ruby +r = rand +p r # => 0.31415926535 +``` + +To generate a pseudo-random integer number between 0 (inclusive) and a given maximum integer (exclusive): + +```ruby +r = rand(100) +p r # => 42 +``` + +To generate a pseudo-random integer within a given `Range`: + +```ruby +r = rand(10..20) # or rand(10...20) +p r # => 15 (e.g., between 10 and 20, or 10 and 19) +``` + +### Seeding the random number generator + +To initialize the pseudo-random number generator with a specific seed: + +```ruby +srand(12345) +p rand(100) # => 81 +p rand(100) # => 81 (if you re-seed with srand(12345) again) + +# Using the same seed will produce the same sequence of random numbers +srand(12345) +p rand(100) # => 81 +srand(12345) +p rand(100) # => 81 +``` + +## The `Random` Class + +Besides the global `Kernel#rand` and `Kernel#srand` methods, `mruby-random` also provides a `Random` class for managing separate random number generators. + +### Creating an Instance + +You can create a new instance of the `Random` class with a system-generated seed: + +```ruby +rng = Random.new +p rng.rand(100) +``` + +Or you can provide a specific seed: + +```ruby +rng = Random.new(12345) +p rng.rand(100) # => 81 +``` + +### Instance Methods + +#### `rand` + +The `rand` instance method behaves similarly to `Kernel#rand`, but operates on the specific `Random` instance. + +- Called with no arguments, it returns a pseudo-random floating-point number between 0.0 (inclusive) and 1.0 (exclusive). + + ```ruby + rng = Random.new + p rng.rand # => 0.123456789 + ``` + +- Called with an integer `max` argument, it returns a pseudo-random integer between 0 (inclusive) and `max` (exclusive). + + ```ruby + rng = Random.new + p rng.rand(50) # => 23 + ``` + +- Called with a `Range` argument (`min..max` or `min...max`), it returns a pseudo-random integer within that range (inclusive of `min`, and inclusive or exclusive of `max` depending on the range type). + + ```ruby + rng = Random.new + p rng.rand(10..20) # => 15 (between 10 and 20, inclusive) + p rng.rand(10...20) # => 12 (between 10 and 19, inclusive) + ``` + +#### `srand` + +The `srand` instance method is used to seed the specific `Random` instance. It allows you to re-initialize the random number generator for that instance with a specific seed, making its sequence of generated numbers predictable. + +```ruby +rng = Random.new(111) +p rng.rand(1000) # => 100 +p rng.rand(1000) # => 283 + +rng.srand(111) # Re-seed the same instance +p rng.rand(1000) # => 100 (sequence repeats for this instance) + +rng2 = Random.new(111) # A different instance with the same seed +p rng2.rand(1000) # => 100 +``` + +It is important to note that `Random#srand` is an alias for `Random#initialize`. Re-seeding an existing `Random` object will reset its internal state. + +#### `bytes(n)` + +The `bytes` method returns a string containing `n` pseudo-random bytes. + +```ruby +rng = Random.new +p rng.bytes(5) # => "\xAB\xCD\xEF\x12\x34" (example output) +``` + +### Class Methods (Using the Default Generator) + +The `Random` class also provides class methods that operate on a global, default random number generator. This is the same generator used by `Kernel#rand` and `Kernel#srand`. + +#### `Random.rand` + +This method is equivalent to `Kernel.rand` (or simply `rand`). + +- With no arguments, returns a float between 0.0 and 1.0 (exclusive of 1.0): + + ```ruby + p Random.rand # => 0.7654321 + ``` + +- With an integer `max` argument, returns an integer between 0 and `max` (exclusive of `max`): + + ```ruby + p Random.rand(10) # => 7 + ``` + +- With a `Range` argument, returns an integer within the range: + + ```ruby + p Random.rand(50..60) # => 53 + ``` + +#### `Random.srand(seed)` + +This method is equivalent to `Kernel.srand(seed)` (or simply `srand(seed)`). It seeds the global default random number generator. + +```ruby +Random.srand(123) +p Random.rand(100) # => 13 +p rand(100) # => 80 (uses the same seeded generator) + +Random.srand(123) +p Random.rand(100) # => 13 (sequence repeats) +``` + +#### `Random.bytes(n)` + +This method returns a string containing `n` pseudo-random bytes, generated by the global default random number generator. + +```ruby +p Random.bytes(3) # => "\xDE\xAD\xBE" (example output) +``` + +## Array Methods + +`mruby-random` extends the `Array` class with methods for shuffling elements and sampling random elements. These methods can optionally accept a `Random` instance to use a specific random number generator. + +### `shuffle` + +The `shuffle` method returns a _new_ array with the elements of the original array in a random order. + +```ruby +a = [1, 2, 3, 4, 5] +p a.shuffle # => [3, 1, 5, 2, 4] (example output) +p a # => [1, 2, 3, 4, 5] (original array is unchanged) +``` + +You can provide a specific `Random` instance using the `random:` keyword argument. This is useful for reproducible shuffling. + +```ruby +rng = Random.new(123) +a = [1, 2, 3, 4, 5] +p a.shuffle(random: rng) # => [1, 5, 3, 2, 4] (example output, will be consistent with seed 123) + +rng2 = Random.new(123) # Same seed +p a.shuffle(random: rng2) # => [1, 5, 3, 2, 4] (same shuffled order) +``` + +### `shuffle!` + +The `shuffle!` method shuffles the elements of the array in-place. It modifies the original array. + +```ruby +a = [1, 2, 3, 4, 5] +p a.shuffle! # => [4, 2, 1, 5, 3] (example output) +p a # => [4, 2, 1, 5, 3] (original array is modified) +``` + +Similarly to `shuffle`, you can provide a specific `Random` instance using the `random:` keyword argument. + +```ruby +rng = Random.new(456) +a = [:a, :b, :c, :d, :e] +p a.shuffle!(random: rng) # => [:c, :a, :e, :d, :b] (example output, consistent with seed 456) +p a # => [:c, :a, :e, :d, :b] (original array is modified) +``` + +### `sample` + +The `sample` method chooses one or more random elements from the array. + +- When called with no arguments, it returns a single random element from the array. If the array is empty, it returns `nil`. + + ```ruby + a = ["apple", "banana", "cherry", "date"] + p a.sample # => "cherry" (example output) + + empty_array = [] + p empty_array.sample # => nil + ``` + +- When called with an integer `n` as an argument, it returns a new array containing `n` unique random elements from the original array. If the array does not have enough unique elements, it returns all elements in a shuffled order. If the array is empty, it returns an empty array. + + ```ruby + a = ["apple", "banana", "cherry", "date", "elderberry"] + p a.sample(3) # => ["date", "apple", "banana"] (example output) + p a.sample(10) # => ["banana", "elderberry", "apple", "date", "cherry"] (all elements, shuffled) + + + empty_array = [] + p empty_array.sample(3) # => [] + ``` + +- You can provide a specific `Random` instance using the `random:` keyword argument for both forms of `sample`. + + ```ruby + rng = Random.new(789) + a = [10, 20, 30, 40, 50] + p a.sample(random: rng) # => 30 (example output, consistent with seed 789) + + rng_b = Random.new(789) # Re-initialize with the same seed for predictable multi-sampling + p a.sample(2, random: rng_b) # => [30, 50] (example output) + ``` + +## Algorithm + +The `mruby-random` mrbgem uses the **PCG-XSH-RR** (Permuted Congruential Generator - XorShift High, Random Rotate) algorithm for pseudo-random number generation. + +### Key Features + +- **Compact State**: Uses only 64 bits of state (compared to 128 bits in the previous xoshiro128++ implementation), reducing memory footprint by 50% +- **Excellent Statistical Quality**: Passes rigorous statistical test suites (TestU01, PractRand) +- **Platform-Optimized**: Automatically adapts to platform characteristics for optimal performance + - On **32-bit platforms** (`MRB_32BIT`): Uses an optimized 32-bit multiplier that requires only 2 multiply operations instead of 3 + - On **64-bit platforms**: Uses the standard 64-bit multiplier for maximum statistical quality +- **Fast Performance**: Competitive speed with modern PRNG algorithms while maintaining smaller memory footprint + +The PCG family of algorithms was developed by Melissa O'Neill and is widely used in production systems. For more details, see . + +## License + +The `mruby-random` mrbgem is released under the MIT License. See the LICENSE file for details. diff --git a/mrbgems/mruby-random/src/random.c b/mrbgems/mruby-random/src/random.c index 45b069b951..e77f137fc1 100644 --- a/mrbgems/mruby-random/src/random.c +++ b/mrbgems/mruby-random/src/random.c @@ -7,56 +7,65 @@ #include #include #include +#include #include #include #include -#include +#include #include +#include #include -/* Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org) +/* PCG Random Number Generation + Based on the PCG family by Melissa O'Neill -To the extent possible under law, the author has dedicated all copyright -and related and neighboring rights to this software to the public domain -worldwide. This software is distributed without any warranty. - -See . */ - -/* This is xoshiro128++ 1.0, one of our 32-bit all-purpose, rock-solid - generators. It has excellent speed, a state size (128 bits) that is - large enough for mild parallelism, and it passes all tests we are aware - of. - - For generating just single-precision (i.e., 32-bit) floating-point - numbers, xoshiro128+ is even faster. - - The state must be seeded so that it is not everywhere zero. */ + This implements PCG-XSH-RR with 64-bit state and 32-bit output. + On 32-bit platforms, uses an optimized 32-bit multiplier for better + performance. On 64-bit platforms, uses the standard 64-bit multiplier + for maximum statistical quality. + See for details. */ +/* Platform-adaptive multiplier selection: + - 32-bit platforms: 0xf13283ad requires only 2 multiplies instead of 3 + - 64-bit platforms: standard multiplier for best statistical quality */ #ifdef MRB_32BIT -# define XORSHIFT96 -# define NSEEDS 3 -# define SEEDPOS 2 +# define PCG_MULTIPLIER 0xf13283adULL #else -# define NSEEDS 4 -# define SEEDPOS 0 +# define PCG_MULTIPLIER 6364136223846793005ULL #endif -#define LASTSEED (NSEEDS-1) +#define PCG_INCREMENT 1442695040888963407ULL typedef struct rand_state { - uint32_t seed[NSEEDS]; +#ifdef MRB_32BIT + /* On 32-bit platforms, split state to avoid alignment padding */ + uint32_t state_lo; + uint32_t state_hi; +#else + uint64_t state; +#endif + uint32_t seed_value; /* Track last seed for srand compatibility */ } rand_state; +/* Helper macros for 64-bit state access */ +#ifdef MRB_32BIT +# define GET_STATE(t) (((uint64_t)(t)->state_hi << 32) | (t)->state_lo) +# define SET_STATE(t, val) do { \ + uint64_t v_ = (val); \ + (t)->state_lo = (uint32_t)v_; \ + (t)->state_hi = (uint32_t)(v_ >> 32); \ + } while (0) +#else +# define GET_STATE(t) ((t)->state) +# define SET_STATE(t, val) ((t)->state = (val)) +#endif + static void rand_init(rand_state *t) { - t->seed[0] = 123456789; - t->seed[1] = 362436069; - t->seed[2] = 521288629; -#ifndef XORSHIFT96 - t->seed[3] = 88675123; -#endif + SET_STATE(t, 0x853c49e6748fea9bULL); + t->seed_value = 521288629; } static uint32_t rand_uint32(rand_state *state); @@ -64,55 +73,36 @@ static uint32_t rand_uint32(rand_state *state); static uint32_t rand_seed(rand_state *t, uint32_t seed) { - uint32_t old_seed = t->seed[SEEDPOS]; - rand_init(t); - t->seed[SEEDPOS] = seed; + uint32_t old_seed = t->seed_value; + + /* PCG initialization: state=0, step, add seed, step, then mix */ + SET_STATE(t, 0); + rand_uint32(t); + SET_STATE(t, GET_STATE(t) + seed); for (int i = 0; i < 10; i++) { rand_uint32(t); } - return old_seed; -} -#ifndef XORSHIFT96 -static inline uint32_t -rotl(const uint32_t x, int k) { - return (x << k) | (x >> (32 - k)); + t->seed_value = seed; + return old_seed; } -#endif static uint32_t -rand_uint32(rand_state *state) +rand_uint32(rand_state *rng) { -#ifdef XORSHIFT96 - uint32_t *seed = state->seed; - uint32_t x = seed[0]; - uint32_t y = seed[1]; - uint32_t z = seed[2]; - uint32_t t; - - t = (x ^ (x << 3)) ^ (y ^ (y >> 19)) ^ (z ^ (z << 6)); - x = y; y = z; z = t; - seed[0] = x; - seed[1] = y; - seed[2] = z; - - return z; -#else - uint32_t *s = state->seed; - const uint32_t result = rotl(s[0] + s[3], 7) + s[0]; - const uint32_t t = s[1] << 9; + /* PCG-XSH-RR: XorShift High (xorshift), then Random Rotate */ + uint64_t oldstate = GET_STATE(rng); - s[2] ^= s[0]; - s[3] ^= s[1]; - s[1] ^= s[2]; - s[0] ^= s[3]; + /* LCG step: advance internal state */ + SET_STATE(rng, oldstate * PCG_MULTIPLIER + PCG_INCREMENT); - s[2] ^= t; - s[3] = rotl(s[3], 11); + /* Output function: xorshift, then rotate by top bits */ + uint32_t xorshifted = (uint32_t)(((oldstate >> 18u) ^ oldstate) >> 27u); + uint32_t rot = (uint32_t)(oldstate >> 59u); - return result; -#endif /* XORSHIFT96 */ - } + /* Rotate right by rot bits (handles rot=0 case correctly) */ + return (xorshifted >> rot) | (xorshifted << ((32 - rot) & 31)); +} #ifndef MRB_NO_FLOAT static double @@ -139,20 +129,147 @@ random_rand(mrb_state *mrb, rand_state *t, mrb_int max) static mrb_int rand_i(rand_state *t, mrb_int max) { - return rand_uint32(t) % max; + /* return uniform integer in [0, max) without modulo bias */ + if (max <= 0) return 0; + +#ifdef MRB_INT64 + /* For large ranges that exceed 32-bit, use 64-bit random */ + if (max > (mrb_int)UINT32_MAX) { + uint64_t umax = (uint64_t)max; + uint64_t threshold = (uint64_t)(-(int64_t)umax) % umax; + uint64_t r; + do { + /* combine two 32-bit randoms into one 64-bit */ + r = ((uint64_t)rand_uint32(t) << 32) | rand_uint32(t); + } while (r < threshold); + return (mrb_int)(r % umax); + } +#endif + + uint32_t threshold = (uint32_t)(-max) % (uint32_t)max; /* power-of-two fast path => 0 */ + uint32_t r; + do { + r = rand_uint32(t); + } while (r < threshold); + return (mrb_int)(r % (uint32_t)max); } -static mrb_int -get_opt(mrb_state* mrb) +/* Full-width unsigned random value in [0, 2**MRB_INT_BIT). */ +static mrb_uint +rand_uint(rand_state *t) +{ +#ifdef MRB_INT64 + return ((mrb_uint)rand_uint32(t) << 32) | rand_uint32(t); +#else + return (mrb_uint)rand_uint32(t); +#endif +} + +/* Uniform unsigned value in [0, span) without modulo bias. + span == 0 selects the entire domain [0, 2**MRB_INT_BIT). */ +static mrb_uint +rand_u(rand_state *t, mrb_uint span) +{ + if (span == 0) return rand_uint(t); + mrb_uint threshold = (mrb_uint)(-span) % span; /* == 2**MRB_INT_BIT % span */ + mrb_uint r; + do { + r = rand_uint(t); + } while (r < threshold); + return r % span; +} + +static mrb_value +rand_range_int(mrb_state *mrb, rand_state *t, mrb_int begin, + mrb_int end, mrb_bool excl) { + /* Reversed or empty range -> nil (as CRuby does). Compare before + subtracting so extreme bounds cannot overflow mrb_int. */ + if (begin > end || (excl && begin == end)) + return mrb_nil_value(); + + /* Candidate count in unsigned arithmetic to avoid signed overflow. + An inclusive full-width range wraps to 0, which rand_u reads as + "the entire domain". */ + mrb_uint span = (mrb_uint)end - (mrb_uint)begin + (excl ? 0 : 1); + mrb_uint r = rand_u(t, span); + return mrb_int_value(mrb, (mrb_int)((mrb_uint)begin + r)); +} + +#ifndef MRB_NO_FLOAT +static mrb_value +rand_range_float(mrb_state *mrb, rand_state *t, + mrb_float begin, mrb_float end, + mrb_bool excl) { + mrb_float span = end - begin; + if (span <= 0.0) + return mrb_nil_value(); + + return mrb_float_value(mrb, rand_real(t) * span + begin); +} +#endif + +static mrb_noreturn void +range_error(mrb_state *mrb, mrb_value v) { - mrb_int arg; + mrb_raisef(mrb, E_TYPE_ERROR, "no implicit conversion of %Y into Integer", v); +} - arg = 0; - mrb_get_args(mrb, "|i", &arg); - if (arg < 0) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid argument"); +static mrb_value +random_range(mrb_state *mrb, rand_state *t, mrb_value rv) +{ + struct RRange *r = mrb_range_ptr(mrb, rv); + if (mrb_integer_p(RANGE_BEG(r)) && mrb_integer_p(RANGE_END(r))) { + return rand_range_int(mrb, t, mrb_integer(RANGE_BEG(r)), + mrb_integer(RANGE_END(r)), RANGE_EXCL(r)); } - return arg; + +#define cast_to_float(v) \ + (mrb_float_p(v) ? mrb_float(v) \ + : mrb_integer_p(v) ? (mrb_float)mrb_integer(v) \ + : (range_error(mrb, v), 0.0)) + + return rand_range_float(mrb, t, cast_to_float(RANGE_BEG(r)), + cast_to_float(RANGE_END(r)), RANGE_EXCL(r)); +#undef cast_to_float +} + +static mrb_value +random_rand_impl(mrb_state *mrb, rand_state *t, mrb_value self) +{ + mrb_value arg; + if (mrb_get_args(mrb, "|o", &arg) == 0) { + return random_rand(mrb, t, 0); + } + + if (mrb_float_p(arg)) { + return random_rand(mrb, t, (mrb_int)mrb_float(arg)); + } + + if (mrb_integer_p(arg)) { + return random_rand(mrb, t, mrb_integer(arg)); + } + + if (mrb_range_p(arg)) { + return random_range(mrb, t, arg); + } + +#ifdef MRB_USE_BIGINT + if (mrb_bigint_p(arg)) { + if (mrb_bint_sign(mrb, arg) < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative value as random limit"); + } + mrb_int size = mrb_bint_size(mrb, arg); + mrb_value bytes = mrb_str_new(mrb, NULL, size); + uint8_t *p = (uint8_t*)RSTRING_PTR(bytes); + for (mrb_int i = 0; i < size; i++) { + p[i] = (uint8_t)rand_uint32(t); + } + mrb_value rand_bint = mrb_bint_from_bytes(mrb, p, size); + return mrb_bint_mod(mrb, rand_bint, arg); + } +#endif + + range_error(mrb, arg); } #define ID_RANDOM MRB_SYM(mruby_Random) @@ -171,13 +288,23 @@ random_default(mrb_state *mrb) #define random_ptr(v) (rand_state*)mrb_istruct_ptr(v) #define random_default_state(mrb) random_ptr(random_default(mrb)) +/* + * call-seq: + * Random.new(seed = nil) -> random + * + * Creates a new random number generator. If seed is omitted or nil, + * the generator is initialized with a default seed. Otherwise, + * the generator is initialized with the given seed. + * + * Random.new #=> # + * Random.new(1234) #=> # + */ static mrb_value random_m_init(mrb_state *mrb, mrb_value self) { mrb_int seed; - rand_state *t; + rand_state *t = random_ptr(self); - t = random_ptr(self); if (mrb_get_args(mrb, "|i", &seed) == 0) { rand_init(t); } @@ -188,21 +315,45 @@ random_m_init(mrb_state *mrb, mrb_value self) return self; } +/* + * call-seq: + * random.rand -> float + * random.rand(max) -> number + * random.rand(range) -> number + * + * Returns a random number. When called without arguments, returns a + * random float between 0.0 and 1.0. When called with a positive integer, + * returns a random integer between 0 and max-1. When called with a range, + * returns a random number within that range. + * + * prng = Random.new + * prng.rand #=> 0.2725926052826416 + * prng.rand(10) #=> 7 + * prng.rand(1..6) #=> 4 + */ static mrb_value random_m_rand(mrb_state *mrb, mrb_value self) { - mrb_int max; rand_state *t = random_ptr(self); - - max = get_opt(mrb); - return random_rand(mrb, t, max); + return random_rand_impl(mrb, t, self); } +/* + * call-seq: + * random.srand(seed = nil) -> old_seed + * + * Seeds the random number generator with the given seed. If seed is + * omitted or nil, uses a combination of current time and internal state. + * Returns the previous seed value. + * + * prng = Random.new + * prng.srand(1234) #=> (previous seed) + * prng.srand #=> 1234 + */ static mrb_value random_m_srand(mrb_state *mrb, mrb_value self) { uint32_t seed; - uint32_t old_seed; mrb_int i; rand_state *t = random_ptr(self); @@ -212,23 +363,46 @@ random_m_srand(mrb_state *mrb, mrb_value self) else { seed = (uint32_t)i; } - old_seed = rand_seed(t, seed); + uint32_t old_seed = rand_seed(t, seed); return mrb_int_value(mrb, (mrb_int)old_seed); } +/* + * call-seq: + * random.bytes(size) -> string + * + * Returns a string of random bytes of the specified size. + * + * prng = Random.new + * prng.bytes(4) #=> "\x8F\x12\xA3\x7C" + * prng.bytes(10).length #=> 10 + */ static mrb_value random_m_bytes(mrb_state *mrb, mrb_value self) { rand_state *t = random_ptr(self); - - mrb_int i; - mrb_get_args(mrb, "i", &i); - + mrb_int i = mrb_as_int(mrb, mrb_get_arg1(mrb)); + if (i < 0) mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size"); mrb_value bytes = mrb_str_new(mrb, NULL, i); uint8_t *p = (uint8_t*)RSTRING_PTR(bytes); - for (; i > 0; i--, p++) { - *p = (uint8_t)rand_uint32(t); + + /* write 4 bytes per PRNG call */ + while (i >= 4) { + uint32_t x = rand_uint32(t); + p[0] = (uint8_t)(x); + p[1] = (uint8_t)(x >> 8); + p[2] = (uint8_t)(x >> 16); + p[3] = (uint8_t)(x >> 24); + p += 4; + i -= 4; + } + if (i > 0) { + uint32_t x = rand_uint32(t); + while (i-- > 0) { + *p++ = (uint8_t)x; + x >>= 8; + } } return bytes; @@ -262,24 +436,18 @@ static mrb_value mrb_ary_shuffle_bang(mrb_state *mrb, mrb_value ary) { if (RARRAY_LEN(ary) > 1) { - mrb_int i, max; - rand_state *random; - mrb_sym knames[3] = {MRB_SYM(random)}; + mrb_sym kname = MRB_SYM(random); mrb_value r; - const mrb_kwargs kw = {1, 0, knames, &r, NULL}; + const mrb_kwargs kw = {1, 0, &kname, &r, NULL}; mrb_get_args(mrb, ":", &kw); - random = check_random_arg(mrb, r); + rand_state *random = check_random_arg(mrb, r); mrb_ary_modify(mrb, mrb_ary_ptr(ary)); - max = RARRAY_LEN(ary); - for (i = RARRAY_LEN(ary) - 1; i > 0; i--) { - mrb_int j; - mrb_value *ptr = RARRAY_PTR(ary); - mrb_value tmp; - - j = rand_i(random, max); - - tmp = ptr[i]; + mrb_int len = RARRAY_LEN(ary); + mrb_value *ptr = RARRAY_PTR(ary); + for (mrb_int i = len - 1; i > 0; i--) { + mrb_int j = rand_i(random, i + 1); + mrb_value tmp = ptr[i]; ptr[i] = ptr[j]; ptr[j] = tmp; } @@ -298,7 +466,7 @@ mrb_ary_shuffle_bang(mrb_state *mrb, mrb_value ary) static mrb_value mrb_ary_shuffle(mrb_state *mrb, mrb_value ary) { - mrb_value new_ary = mrb_ary_new_from_values(mrb, RARRAY_LEN(ary), RARRAY_PTR(ary)); + mrb_value new_ary = mrb_ary_dup(mrb, ary); mrb_ary_shuffle_bang(mrb, new_ary); return new_ary; @@ -309,13 +477,13 @@ mrb_ary_shuffle(mrb_state *mrb, mrb_value ary) * ary.sample -> obj * ary.sample(n) -> new_ary * - * Choose a random element or +n+ random elements from the array. + * Choose a random element or `n` random elements from the array. * * The elements are chosen by using random and unique indices into the array * in order to ensure that an element doesn't repeat itself unless the array * already contained duplicate elements. * - * If the array is empty the first form returns +nil+ and the second form + * If the array is empty the first form returns `nil` and the second form * returns an empty array. */ @@ -324,15 +492,13 @@ mrb_ary_sample(mrb_state *mrb, mrb_value ary) { mrb_int n = 0; mrb_bool given; - rand_state *random; - mrb_int len; - mrb_sym knames[3] = {MRB_SYM(random)}; + mrb_sym kname = MRB_SYM(random); mrb_value r; - const mrb_kwargs kw = {1, 0, knames, &r, NULL}; + const mrb_kwargs kw = {1, 0, &kname, &r, NULL}; mrb_get_args(mrb, "|i?:", &n, &given, &kw); - random = check_random_arg(mrb, r); - len = RARRAY_LEN(ary); + rand_state *random = check_random_arg(mrb, r); + mrb_int len = RARRAY_LEN(ary); if (!given) { /* pick one element */ switch (len) { case 0: @@ -344,44 +510,69 @@ mrb_ary_sample(mrb_state *mrb, mrb_value ary) } } else { - mrb_value result; - mrb_int i, j; - if (n < 0) mrb_raise(mrb, E_ARGUMENT_ERROR, "negative sample number"); if (n > len) n = len; - result = mrb_ary_new_capa(mrb, n); - for (i=0; i 0 ? n : 1)); + for (mrb_int i = 0; i < n; i++) { + mrb_int v; for (;;) { retry: - r = rand_i(random, len); - - for (j=0; j float + * Random.rand(max) -> number + * Random.rand(range) -> number + * rand -> float + * rand(max) -> number + * rand(range) -> number + * + * Returns a random number using the default random number generator. + * Equivalent to Random.new.rand. When called without arguments, returns + * a random float between 0.0 and 1.0. When called with a positive integer, + * returns a random integer between 0 and max-1. When called with a range, + * returns a random number within that range. + * + * Random.rand #=> 0.8444218515250481 + * Random.rand(10) #=> 5 + * rand(1..6) #=> 3 + */ static mrb_value random_f_rand(mrb_state *mrb, mrb_value self) { rand_state *t = random_default_state(mrb); - return random_rand(mrb, t, get_opt(mrb)); + return random_rand_impl(mrb, t, self); } +/* + * call-seq: + * Random.srand(seed = nil) -> old_seed + * srand(seed = nil) -> old_seed + * + * Seeds the default random number generator with the given seed. + * If seed is omitted or nil, uses current time and internal state. + * Returns the previous seed value. + * + * Random.srand(1234) #=> (previous seed) + * srand #=> 1234 + */ static mrb_value random_f_srand(mrb_state *mrb, mrb_value self) { @@ -389,6 +580,16 @@ random_f_srand(mrb_state *mrb, mrb_value self) return random_m_srand(mrb, random); } +/* + * call-seq: + * Random.bytes(size) -> string + * + * Returns a string of random bytes of the specified size using + * the default random number generator. + * + * Random.bytes(4) #=> "\x8F\x12\xA3\x7C" + * Random.bytes(10).length #=> 10 + */ static mrb_value random_f_bytes(mrb_state *mrb, mrb_value self) { @@ -397,31 +598,32 @@ random_f_bytes(mrb_state *mrb, mrb_value self) } +static const mrb_mt_entry random_rom_entries[] = { + MRB_MT_ENTRY(random_m_init, MRB_SYM(initialize), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(random_m_rand, MRB_SYM(rand), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(random_m_srand, MRB_SYM(srand), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(random_m_bytes, MRB_SYM(bytes), MRB_ARGS_REQ(1)), +}; + void mrb_mruby_random_gem_init(mrb_state *mrb) { - struct RClass *random; struct RClass *array = mrb->array_class; mrb_static_assert(sizeof(rand_state) <= ISTRUCT_DATA_SIZE); - mrb_define_method(mrb, mrb->kernel_module, "rand", random_f_rand, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, mrb->kernel_module, "srand", random_f_srand, MRB_ARGS_OPT(1)); - - random = mrb_define_class(mrb, "Random", mrb->object_class); - mrb_const_set(mrb, mrb_obj_value(mrb->object_class), ID_RANDOM, mrb_obj_value(random)); // for class check + struct RClass *random = mrb_define_class_id(mrb, MRB_SYM(Random), mrb->object_class); + mrb_const_set(mrb, mrb_obj_value(mrb->object_class), ID_RANDOM, mrb_obj_value(random)); MRB_SET_INSTANCE_TT(random, MRB_TT_ISTRUCT); - mrb_define_class_method(mrb, random, "rand", random_f_rand, MRB_ARGS_OPT(1)); - mrb_define_class_method(mrb, random, "srand", random_f_srand, MRB_ARGS_OPT(1)); - mrb_define_class_method(mrb, random, "bytes", random_f_bytes, MRB_ARGS_REQ(1)); - - mrb_define_method(mrb, random, "initialize", random_m_init, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, random, "rand", random_m_rand, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, random, "srand", random_m_srand, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, random, "bytes", random_m_bytes, MRB_ARGS_REQ(1)); - - mrb_define_method(mrb, array, "shuffle", mrb_ary_shuffle, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, array, "shuffle!", mrb_ary_shuffle_bang, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, array, "sample", mrb_ary_sample, MRB_ARGS_OPT(2)); + mrb_define_class_method_id(mrb, random, MRB_SYM(rand), random_f_rand, MRB_ARGS_OPT(1)); + mrb_define_class_method_id(mrb, random, MRB_SYM(srand), random_f_srand, MRB_ARGS_OPT(1)); + mrb_define_class_method_id(mrb, random, MRB_SYM(bytes), random_f_bytes, MRB_ARGS_REQ(1)); + + mrb_define_private_method_id(mrb, mrb->kernel_module, MRB_SYM(rand), random_f_rand, MRB_ARGS_OPT(1)); + mrb_define_private_method_id(mrb, mrb->kernel_module, MRB_SYM(srand), random_f_srand, MRB_ARGS_OPT(1)); + MRB_MT_INIT_ROM(mrb, random, random_rom_entries); + mrb_define_method_id(mrb, array, MRB_SYM(shuffle), mrb_ary_shuffle, MRB_ARGS_OPT(1)); + mrb_define_method_id(mrb, array, MRB_SYM_B(shuffle), mrb_ary_shuffle_bang, MRB_ARGS_OPT(1)); + mrb_define_method_id(mrb, array, MRB_SYM(sample), mrb_ary_sample, MRB_ARGS_OPT(2)); mrb_value d = mrb_obj_new(mrb, random, 0, NULL); rand_state *t = random_ptr(d); diff --git a/mrbgems/mruby-random/test/random.rb b/mrbgems/mruby-random/test/random.rb index 7426aad205..7f007aaa5b 100644 --- a/mrbgems/mruby-random/test/random.rb +++ b/mrbgems/mruby-random/test/random.rb @@ -9,7 +9,7 @@ assert_not_equal(r1.rand, r3.rand) end -assert("Kernel.srand") do +assert("Kernel#srand") do srand(234) r1 = rand srand(234) @@ -45,7 +45,7 @@ assert_equal num / 2, b.bytesize end -assert("return class of Kernel.rand") do +assert("return class of Kernel#rand") do assert_kind_of(Integer, rand(3)) assert_kind_of(Integer, rand(1.5)) skip unless Object.const_defined?(:Float) @@ -135,3 +135,47 @@ assert_equal(samples1, samples2) assert_not_equal(samples1, samples3) end + +assert("Kernel#rand()") do + 100.times { + assert_include(0.0..1.0, rand) + assert_include(0...100, rand(0...100)) + assert_include(0...100, rand(100)) + } + + assert_equal(rand(0...0), nil) + assert_equal(rand(0.0...0), nil) + assert_equal(rand(0...0.0), nil) + assert_equal(rand(0.0...0.0), nil) + assert_equal(rand(1..0), nil) +end + +assert("Kernel#rand integer range overflow") do + # Width-independent guard behaviour for reversed/empty/single ranges. + assert_equal(5, rand(5..5)) # single-element inclusive range + assert_nil(rand(5...5)) # empty exclusive range + assert_nil(rand(10..3)) # reversed inclusive range + assert_nil(rand(10...3)) # reversed exclusive range + 100.times { assert_include(3..7, rand(3..7)) } + + # Wide fixnum ranges whose span exceeds the mrb_int range only exist on + # 64-bit mrb_int builds. Such bounds used to overflow `end - begin` in C + # (UndefinedBehaviorSanitizer signed-integer-overflow), found via a + # minimized mruby_fuzzer testcase. On 32-bit mrb_int (or when bigint + # promotes the bounds) these take a different path, so probe the + # integer-range path first and skip if absent. + hi = ((1 << 62) + (1 << 61)) rescue nil + if hi && (Integer === (rand(-hi..hi) rescue nil)) + lo = -hi + # reversed wide range -> nil; old code overflowed `end - begin`. + assert_nil(rand(hi..lo)) + assert_nil(rand(hi...lo)) + # valid wide range -> in-range Integer; old code overflowed and wrongly + # returned nil instead of a uniform value. + 100.times do + v = rand(lo..hi) + assert_kind_of(Integer, v) + assert_true(lo <= v && v <= hi) + end + end +end diff --git a/mrbgems/mruby-range-ext/README.md b/mrbgems/mruby-range-ext/README.md new file mode 100644 index 0000000000..7b713bba4d --- /dev/null +++ b/mrbgems/mruby-range-ext/README.md @@ -0,0 +1,93 @@ +# mruby-range-ext + +This gem extends the functionality of Ruby's `Range` class in mruby. It provides several new methods to make working with ranges more convenient and powerful. + +## Methods + +### `first` / `first(n)` + +Returns the first object in the range, or an array of the first `n` elements. + +**Example:** + +```ruby +(10..20).first #=> 10 +(10..20).first(3) #=> [10, 11, 12] +``` + +### `last` / `last(n)` + +Returns the last object in the range, or an array of the last `n` elements. Note that with no arguments `last` will return the object that defines the end of the range even if `exclude_end?` is true. + +**Example:** + +```ruby +(10..20).last #=> 20 +(10...20).last #=> 20 +(10..20).last(3) #=> [18, 19, 20] +(10...20).last(3) #=> [17, 18, 19] +``` + +### `max` + +Returns the maximum value in the range. Returns `nil` if the range is empty or excludes its end and the end is not an Integer. For non-numeric ranges or when a block is given, it delegates to `Enumerable#max`. + +**Example:** + +```ruby +(10..20).max #=> 20 +(10...20).max #=> 19 +('a'..'z').max #=> "z" +``` + +### `min` + +Returns the minimum value in the range. For non-numeric ranges or when a block is given, it delegates to `Enumerable#min`. + +**Example:** + +```ruby +(10..20).min #=> 10 +('a'..'z').min #=> "a" +``` + +### `overlap?(other_range)` + +Returns `true` if `self` and `other_range` have at least one element in common. + +**Example:** + +```ruby +(1..5).overlap?(4..6) #=> true +(1..5).overlap?(7..9) #=> false +``` + +### `cover?(obj_or_range)` + +Returns `true` if the given object or all elements of the given range are within `self`. + +**Example:** + +```ruby +("a".."z").cover?("c") #=> true +("a".."z").cover?("5") #=> false +("a".."z").cover?("cc") #=> true # Note: This behavior might be surprising for strings. +(1..5).cover?(2..4) #=> true +(1..5).cover?(2..7) #=> false +``` + +### `size` + +Returns the number of elements in the range. Both the begin and the end of the Range must be Numeric, otherwise `nil` is returned. For endless ranges with an Integer beginning, it returns `Infinity`. + +**Example:** + +```ruby +(10..20).size #=> 11 +('a'..'z').size #=> nil +(1..).size #=> Infinity +``` + +## License + +This gem is released under the MIT License. diff --git a/mrbgems/mruby-range-ext/mrblib/range.rb b/mrbgems/mruby-range-ext/mrblib/range.rb index 8b670afeec..b79ed41fc5 100644 --- a/mrbgems/mruby-range-ext/mrblib/range.rb +++ b/mrbgems/mruby-range-ext/mrblib/range.rb @@ -4,7 +4,7 @@ class Range # rng.first -> obj # rng.first(n) -> an_array # - # Returns the first object in the range, or an array of the first +n+ + # Returns the first object in the range, or an array of the first `n` # elements. # # (10..20).first #=> 10 @@ -33,10 +33,10 @@ def first(*args) # rng.last(n) -> an_array # # Returns the last object in the range, - # or an array of the last +n+ elements. + # or an array of the last `n` elements. # - # Note that with no arguments +last+ will return the object that defines - # the end of the range even if #exclude_end? is +true+. + # Note that with no arguments `last` will return the object that defines + # the end of the range even if #exclude_end? is `true`. # # (10..20).last #=> 20 # (10...20).last #=> 20 @@ -53,6 +53,19 @@ def last(*args) return self.to_a.last(nv) end + ## + # call-seq: + # rng.max -> obj + # rng.max {|a,b| block } -> obj + # + # Returns the maximum value in the range. Returns nil if the range is empty + # or excludes its end and the end is not an Integer. For non-numeric ranges + # or when a block is given, it delegates to Enumerable#max. + # + # (10..20).max #=> 20 + # (10...20).max #=> 19 + # ('a'..'z').max #=> "z" + # def max(&block) val = self.begin last = self.end @@ -75,6 +88,17 @@ def max(&block) super() end + ## + # call-seq: + # rng.min -> obj + # rng.min {|a,b| block } -> obj + # + # Returns the minimum value in the range. For non-numeric ranges or when + # a block is given, it delegates to Enumerable#min. + # + # (10..20).min #=> 10 + # ('a'..'z').min #=> "a" + # def min(&block) val = self.begin last = self.end @@ -96,4 +120,36 @@ def min(&block) # delegate to Enumerable super() end + + ## + # call-seq: + # rng.overlap?(other_range) -> true or false + # + # Returns true if self and other_range have at least one element in common, + # false otherwise. + # + # (1..5).overlap?(4..6) #=> true + # (1..5).overlap?(7..9) #=> false + # + def overlap?(other) + raise TypeError, "argument must be a range" unless other.kind_of?(Range) + + self_begin = self.begin + other_end = other.end + other_excl = other.exclude_end? + + return false if __empty_range?(self_begin, other_end, other_excl) + + other_begin = other.begin + self_end = self.end + self_excl = self.exclude_end? + + return false if __empty_range?(other_begin, self_end, self_excl) + return true if self_begin == other_begin + + return false if __empty_range?(self_begin, self_end, self_excl) + return false if __empty_range?(other_begin, other_end, other_excl) + + true + end end diff --git a/mrbgems/mruby-range-ext/src/range.c b/mrbgems/mruby-range-ext/src/range.c index 9aaeac810a..2a88e321af 100644 --- a/mrbgems/mruby-range-ext/src/range.c +++ b/mrbgems/mruby-range-ext/src/range.c @@ -1,5 +1,7 @@ #include #include +#include +#include static mrb_bool r_less(mrb_state *mrb, mrb_value a, mrb_value b, mrb_bool excl) @@ -18,14 +20,14 @@ r_less(mrb_state *mrb, mrb_value a, mrb_value b, mrb_bool excl) /* * call-seq: - * rng.cover?(obj) -> true or false + * rng.cover?(obj) -> true or false * rng.cover?(range) -> true or false * - * Returns +true+ if the given argument is within +self+, +false+ otherwise. + * Returns true if the given argument is within self, false otherwise. * - * With non-range argument +object+, evaluates with <= and <. + * With non-range argument object, evaluates with <= and <. * - * For range +self+ with included end value (#exclude_end? == false), + * For range self with included end value (exclude_end? == false), * evaluates thus: * * self.begin <= object <= self.end @@ -39,10 +41,8 @@ range_cover(mrb_state *mrb, mrb_value range) { struct RRange *r = mrb_range_ptr(mrb, range); mrb_value val = mrb_get_arg1(mrb); - mrb_value beg, end; - - beg = RANGE_BEG(r); - end = RANGE_END(r); + mrb_value beg = RANGE_BEG(r); + mrb_value end = RANGE_END(r); if (mrb_nil_p(beg) && mrb_nil_p(end)) return mrb_true_value(); @@ -110,18 +110,23 @@ static mrb_value range_size(mrb_state *mrb, mrb_value range) { struct RRange *r = mrb_range_ptr(mrb, range); - mrb_value beg, end; - mrb_float beg_f, end_f; - mrb_bool num_p = TRUE; - mrb_bool excl; + mrb_value beg = RANGE_BEG(r); + mrb_value end = RANGE_END(r); - beg = RANGE_BEG(r); - end = RANGE_END(r); - if ((mrb_integer_p(beg) || mrb_float_p(beg)) && mrb_nil_p(end)) { + if (mrb_float_p(beg)) { + mrb_raise(mrb, E_TYPE_ERROR, "can't iterate from Float"); + } + if (mrb_nil_p(beg)) { + mrb_raise(mrb, E_TYPE_ERROR, "can't iterate from nil"); + } + if (mrb_integer_p(beg) && mrb_nil_p(end)) { return mrb_float_value(mrb, INFINITY); } - excl = RANGE_EXCL(r); + mrb_bool excl = RANGE_EXCL(r); + mrb_float beg_f, end_f; + mrb_bool num_p = TRUE; + if (mrb_integer_p(beg)) { beg_f = (mrb_float)mrb_integer(beg); } @@ -167,17 +172,17 @@ static mrb_value range_size(mrb_state *mrb, mrb_value range) { struct RRange *r = mrb_range_ptr(mrb, range); - mrb_value beg, end; - mrb_int excl; - beg = RANGE_BEG(r); - end = RANGE_END(r); + mrb_value beg = RANGE_BEG(r); + mrb_value end = RANGE_END(r); + if (mrb_nil_p(beg)) { + mrb_raise(mrb, E_TYPE_ERROR, "can't iterate from nil"); + } if (mrb_integer_p(beg) && mrb_nil_p(end)) { return mrb_nil_value(); } - excl = RANGE_EXCL(r) ? 0 : 1; - + mrb_int excl = RANGE_EXCL(r) ? 0 : 1; if (mrb_integer_p(beg) && mrb_integer_p(end)) { mrb_int a = mrb_integer(beg); mrb_int b = mrb_integer(end); @@ -189,13 +194,39 @@ range_size(mrb_state *mrb, mrb_value range) } #endif /* MRB_NO_FLOAT */ +/* + * Internal helper method to check if a range would be empty given + * the specified begin, end, and exclude_end parameters. + * Returns true if the range would be empty, false otherwise. + * Used internally by overlap? and other range methods. + */ + +static mrb_value +range_empty_p(mrb_state *mrb, mrb_value range) +{ + mrb_value b, e; + mrb_bool excl; + + mrb_get_args(mrb, "oob", &b, &e, &excl); + if (mrb_nil_p(b) || mrb_nil_p(e)) + return mrb_false_value(); + + mrb_int comp = mrb_cmp(mrb, b, e); + return mrb_bool_value(comp == -2 || comp > 0 || (comp == 0 && excl)); +} + +static const mrb_mt_entry range_ext_rom_entries[] = { + MRB_MT_ENTRY(range_cover, MRB_SYM_Q(cover), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(range_size, MRB_SYM(size), MRB_ARGS_NONE()), + MRB_MT_ENTRY(range_empty_p, MRB_SYM_Q(__empty_range), MRB_ARGS_REQ(3)), +}; + void mrb_mruby_range_ext_gem_init(mrb_state* mrb) { - struct RClass * s = mrb->range_class; + struct RClass *s = mrb->range_class; - mrb_define_method(mrb, s, "cover?", range_cover, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "size", range_size, MRB_ARGS_NONE()); + MRB_MT_INIT_ROM(mrb, s, range_ext_rom_entries); } void diff --git a/mrbgems/mruby-range-ext/test/range.rb b/mrbgems/mruby-range-ext/test/range.rb index 4ae575669a..96ced593e6 100644 --- a/mrbgems/mruby-range-ext/test/range.rb +++ b/mrbgems/mruby-range-ext/test/range.rb @@ -55,13 +55,10 @@ skip unless Object.const_defined?(:Float) assert_equal 6, (1...6.3).size assert_equal 5, (1...6.0).size - assert_equal 5, (1.1...6).size - assert_equal 15, (1.0..15.9).size assert_equal Float::INFINITY, (0..Float::INFINITY).size assert_equal Float::INFINITY, (1..).size assert_equal Float::INFINITY, (1...).size - assert_equal Float::INFINITY, (1.0..).size end assert('Range#max') do @@ -178,3 +175,37 @@ assert_equal nil, ((100..10).min { |x, y| x <=> y }) assert_equal nil, ((5...5).min { |x, y| x <=> y }) end + +assert('Range#overlap?') do + assert_false((0..2).overlap?(-2..-1)) + assert_false((0..2).overlap?(-2...0)) + assert_true((0..2).overlap?(-1..0)) + assert_true((0..2).overlap?(1..2)) + assert_true((0..2).overlap?(2..3)) + assert_false((0..2).overlap?(3...4)) + assert_false((0...2).overlap?(2..3)) + + assert_true((..0).overlap?(-1..0)) + assert_true((...0).overlap?(-1..0)) + assert_true((..0).overlap?(0..1)) + assert_true((..0).overlap?(..1)) + assert_false((..0).overlap?(1..2)) + assert_false((...0).overlap?(0..1)) + + assert_false((0..).overlap?(-2..-1)) + assert_false((0..).overlap?(...0)) + assert_true((0..).overlap?(..0)) + assert_true((0..).overlap?(0..1)) + assert_true((0..).overlap?(1..2)) + assert_true((0..).overlap?(-1..0)) + assert_true((0..).overlap?(1..)) + + assert_true((0..).overlap?(-1..0)) + assert_true((0..).overlap?(..0)) + assert_true((0..).overlap?(0..1)) + assert_true((0..).overlap?(1..2)) + assert_true((0..).overlap?(1..)) + + assert_raise(TypeError) { (0..).overlap?(1) } + assert_raise(TypeError) { (0..).overlap?(nil) } +end diff --git a/mrbgems/mruby-rational/README.md b/mrbgems/mruby-rational/README.md new file mode 100644 index 0000000000..537c2fdaaf --- /dev/null +++ b/mrbgems/mruby-rational/README.md @@ -0,0 +1,61 @@ +# mruby-rational + +This mrbgem provides a `Rational` class for mruby, allowing you to work with rational numbers (fractions). + +## Usage + +To use the `Rational` class, you first need to include the mrbgem in your mruby build. + +### Include in build_config.rb + +Add the following line to your `build_config.rb` file: + +```ruby +conf.gem :core => 'mruby-rational' +``` + +### Creating Rational Objects + +You can create `Rational` objects using the `Rational()` method: + +```ruby +r1 = Rational(1, 2) # Represents 1/2 +r2 = Rational(3, 4) # Represents 3/4 +``` + +### Arithmetic Operations + +The `Rational` class supports standard arithmetic operations: + +```ruby +r_add = Rational(1, 2) + Rational(1, 3) # (5/6) +r_sub = Rational(1, 2) - Rational(1, 3) # (1/6) +r_mul = Rational(1, 2) * Rational(1, 3) # (1/6) +r_div = Rational(1, 2) / Rational(1, 3) # (3/2) +``` + +## Available Methods + +The `Rational` class provides the following important methods: + +- `numerator`: Returns the numerator of the rational number. +- `denominator`: Returns the denominator of the rational number. +- `to_f`: Converts the rational number to a `Float`. +- `to_i`: Converts the rational number to an `Integer` (truncates towards zero). +- `to_s`: Returns a string representation of the rational number (e.g., "1/2"). +- `inspect`: Returns a string representation suitable for debugging (e.g., "(1/2)"). +- `==`: Checks for equality with another number. +- `<=>`: Compares the rational number with another number. +- `positive?`: Returns `true` if the rational number is greater than zero. +- `negative?`: Returns `true` if the rational number is less than zero. + +## Error Handling + +The mrbgem handles common errors such as: + +- **DivisionByZeroError**: Raised when attempting to create a rational number with a denominator of zero. +- **RangeError**: Raised in case of integer overflow during calculations. + +## License + +mruby-rational is licensed under the MIT License. See LICENSE for details. diff --git a/mrbgems/mruby-rational/mrbgem.rake b/mrbgems/mruby-rational/mrbgem.rake index 5341148628..2cd4816f9a 100644 --- a/mrbgems/mruby-rational/mrbgem.rake +++ b/mrbgems/mruby-rational/mrbgem.rake @@ -3,5 +3,5 @@ MRuby::Gem::Specification.new('mruby-rational') do |spec| spec.author = 'mruby developers' spec.summary = 'Rational class' spec.build.defines << "MRB_USE_RATIONAL" - spec.add_test_dependency('mruby-complex') + spec.add_test_dependency('mruby-complex', :core => 'mruby-complex') end diff --git a/mrbgems/mruby-rational/mrblib/rational.rb b/mrbgems/mruby-rational/mrblib/rational.rb index ebe6829b01..153dd48f66 100644 --- a/mrbgems/mruby-rational/mrblib/rational.rb +++ b/mrbgems/mruby-rational/mrblib/rational.rb @@ -1,14 +1,65 @@ class Rational < Numeric + # + # call-seq: + # rat.inspect -> string + # + # Returns the value as a string for inspection. + # + # Rational(2).inspect #=> "(2/1)" + # Rational(-8, 6).inspect #=> "(-4/3)" + # Rational(1, 2).inspect #=> "(1/2)" + # def inspect "(#{to_s})" end + # + # call-seq: + # rat.to_s -> string + # + # Returns the value as a string. + # + # Rational(2).to_s #=> "2/1" + # Rational(-8, 6).to_s #=> "-4/3" + # Rational(1, 2).to_s #=> "1/2" + # def to_s "#{numerator}/#{denominator}" end + + # + # call-seq: + # rat <=> numeric -> -1, 0, +1, or nil + # + # Returns -1, 0, or +1 depending on whether rat is less than, equal to, + # or greater than numeric. This is the basis for the tests in the Comparable module. + # Returns nil if the two values are incomparable. + # + # Rational(2, 3) <=> Rational(2, 3) #=> 0 + # Rational(5) <=> 5 #=> 0 + # Rational(2, 3) <=> Rational(1, 3) #=> 1 + # Rational(1, 3) <=> 1 #=> -1 + # Rational(1, 3) <=> 0.3 #=> 1 + # + def <=>(other) + return nil unless other.kind_of?(Numeric) + self.to_f <=> other.to_f + rescue + nil + end end class Numeric + # + # call-seq: + # num.to_r -> rational + # + # Returns the value as a rational. + # + # 1.to_r #=> (1/1) + # (1+2i).to_r #=> (1+2i)/1) + # nil.to_r #=> TypeError + # def to_r Rational(self, 1) end diff --git a/mrbgems/mruby-rational/src/rational.c b/mrbgems/mruby-rational/src/rational.c index 164ae9f2ff..4d265647fb 100644 --- a/mrbgems/mruby-rational/src/rational.c +++ b/mrbgems/mruby-rational/src/rational.c @@ -2,7 +2,6 @@ #include #include #include -#include #ifndef MRB_NO_FLOAT #include @@ -13,11 +12,35 @@ mrb_value mrb_complex_add(mrb_state *mrb, mrb_value, mrb_value); mrb_value mrb_complex_sub(mrb_state *mrb, mrb_value, mrb_value); mrb_value mrb_complex_mul(mrb_state *mrb, mrb_value, mrb_value); mrb_value mrb_complex_div(mrb_state *mrb, mrb_value, mrb_value); +mrb_value mrb_bint_mul_n(mrb_state *mrb, mrb_value x, mrb_value y); +void mrb_bint_reduce(mrb_state *mrb, mrb_value *x, mrb_value *y); +#ifdef MRB_USE_BIGINT +struct mrb_rational { + union { + struct { + mrb_int num; + mrb_int den; + } i; + struct { + struct RBasic *num; + struct RBasic *den; + } b; + }; +}; +#define numerator i.num +#define denominator i.den +#define RAT_BIGINT 1 +#define RAT_BIGINT_P(obj) (mrb_obj_ptr(obj)->flags & RAT_BIGINT) +#else struct mrb_rational { mrb_int numerator; mrb_int denominator; }; +#endif + +#define ONE mrb_fixnum_value(1) +#define ZERO mrb_fixnum_value(0) #if defined(MRB_INT64) && defined(MRB_32BIT) struct RRational { @@ -26,7 +49,7 @@ struct RRational { }; static struct mrb_rational* -rational_ptr(mrb_state *mrb, mrb_value v) +rat_ptr(mrb_state *mrb, mrb_value v) { struct RRational *r = (struct RRational*)mrb_obj_ptr(v); @@ -41,81 +64,141 @@ struct RRational { MRB_OBJECT_HEADER; struct mrb_rational r; }; -#define rational_ptr(mrb, v) (&((struct RRational*)mrb_obj_ptr(v))->r) +#define rat_ptr(mrb, v) (&((struct RRational*)mrb_obj_ptr(v))->r) #endif mrb_static_assert_object_size(struct RRational); -static struct RBasic* -rational_alloc(mrb_state *mrb, struct RClass *c, struct mrb_rational **p) +static struct mrb_rational* +rat_alloc(mrb_state *mrb, struct RClass *c, struct RBasic **obj) { - struct RRational *s; - s = MRB_OBJ_ALLOC(mrb, MRB_TT_RATIONAL, c); + struct RRational *s = MRB_OBJ_ALLOC(mrb, MRB_TT_RATIONAL, c); + struct mrb_rational *p; #ifdef RATIONAL_INLINE - *p = &s->r; + p = &s->r; #else - *p = s->p = (struct mrb_rational*)mrb_malloc(mrb, sizeof(struct mrb_rational)); + p = s->p = (struct mrb_rational*)mrb_malloc(mrb, sizeof(struct mrb_rational)); #endif - return (struct RBasic*)s; + *obj = (struct RBasic*)s; + return p; } +#ifdef RAT_BIGINT +int +mrb_rational_mark(mrb_state *mrb, struct RBasic *rat) +{ + if (!(rat->flags & RAT_BIGINT)) return 0; + + mrb_value self = mrb_obj_value(rat); + struct mrb_rational *p = rat_ptr(mrb, self); + mrb_gc_mark(mrb, p->b.num); + mrb_gc_mark(mrb, p->b.den); + return 2; +} +#endif + static mrb_value -rational_numerator(mrb_state *mrb, mrb_value self) +rat_numerator(mrb_state *mrb, mrb_value self) { - struct mrb_rational *p = rational_ptr(mrb, self); + struct mrb_rational *p = rat_ptr(mrb, self); +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(self)) { + return mrb_obj_value(p->b.num); + } +#endif return mrb_int_value(mrb, p->numerator); } +/* + * call-seq: + * rational.numerator -> integer + * + * Returns the numerator of the rational number. + * + * Rational(3, 4).numerator #=> 3 + * Rational(-2, 5).numerator #=> -2 + * Rational(6, 8).numerator #=> 3 (reduced form) + */ +/* normalized version of rat_numerator() */ static mrb_value -rational_denominator(mrb_state *mrb, mrb_value self) +rational_numerator(mrb_state *mrb, mrb_value self) +{ + mrb_value n = rat_numerator(mrb, self); + if (mrb_bigint_p(n)) { + /* normalize bigint */ + return mrb_bint_mul(mrb, n, ONE); + } + return n; +} + +static mrb_value +rat_denominator(mrb_state *mrb, mrb_value self) { - struct mrb_rational *p = rational_ptr(mrb, self); + struct mrb_rational *p = rat_ptr(mrb, self); +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(self)) { + return mrb_obj_value(p->b.den); + } +#endif return mrb_int_value(mrb, p->denominator); } -static void +/* + * call-seq: + * rational.denominator -> integer + * + * Returns the denominator of the rational number. + * The denominator is always positive. + * + * Rational(3, 4).denominator #=> 4 + * Rational(-2, 5).denominator #=> 5 + * Rational(6, 8).denominator #=> 4 (reduced form) + */ +/* normalized version of rat_denominator() */ +static mrb_value +rational_denominator(mrb_state *mrb, mrb_value self) +{ + mrb_value n = rat_denominator(mrb, self); + if (mrb_bigint_p(n)) { + /* normalize bigint */ + return mrb_bint_mul(mrb, n, ONE); + } + return n; +} + +static mrb_noreturn void rat_overflow(mrb_state *mrb) { mrb_raise(mrb, E_RANGE_ERROR, "integer overflow in rational"); } -static void +static mrb_noreturn void rat_zerodiv(mrb_state *mrb) { mrb_raise(mrb, E_ZERODIV_ERROR, "divided by 0 in rational"); } -mrb_value -mrb_rational_new(mrb_state *mrb, mrb_int numerator, mrb_int denominator) +static mrb_noreturn void +rat_type_error(mrb_state *mrb, mrb_value x) { - struct RClass *c = mrb_class_get_id(mrb, MRB_SYM(Rational)); - struct mrb_rational *p; - struct RBasic *rat; - - if (denominator == 0) { - rat_zerodiv(mrb); - } - if (denominator < 0) { - if (numerator == MRB_INT_MIN || denominator == MRB_INT_MIN) { - rat_overflow(mrb); - } - numerator *= -1; - denominator *= -1; - } - rat = rational_alloc(mrb, c, &p); - p->numerator = numerator; - p->denominator = denominator; - MRB_SET_FROZEN_FLAG(rat); - return mrb_obj_value(rat); + mrb_raisef(mrb, E_TYPE_ERROR, "%T cannot be converted to Rational", x); } -#define rational_new(mrb,n,d) mrb_rational_new(mrb, n, d) - void mrb_rational_copy(mrb_state *mrb, mrb_value x, mrb_value y) { - struct mrb_rational *p1 = rational_ptr(mrb, x); - struct mrb_rational *p2 = rational_ptr(mrb, y); + struct mrb_rational *p1 = rat_ptr(mrb, x); + struct mrb_rational *p2 = rat_ptr(mrb, y); +#ifdef RAT_BIGINT + struct RRational *r = (struct RRational*)mrb_obj_ptr(x); + if (RAT_BIGINT_P(y)) { + p1->b.num = p2->b.num; + p1->b.den = p2->b.den; + r->flags |= RAT_BIGINT; + return; + } + r->flags &= ~RAT_BIGINT; +#endif p1->numerator = p2->numerator; p1->denominator = p2->denominator; } @@ -138,7 +221,7 @@ i_gcd(mrb_int x, mrb_int y) u = (mrb_uint)x; v = (mrb_uint)y; - for (shift = 0; ((u | v) & 1) == 0; ++shift) { + for (shift = 0; ((u | v) & 1) == 0; shift++) { u >>= 1; v >>= 1; } @@ -161,21 +244,70 @@ i_gcd(mrb_int x, mrb_int y) return (mrb_int)(u << shift); } +#ifdef RAT_BIGINT static mrb_value -rational_new_i(mrb_state *mrb, mrb_int n, mrb_int d) +rational_new_b(mrb_state *mrb, mrb_value n, mrb_value d) { - mrb_int a; + /* bigint check */ + mrb_assert(mrb_bigint_p(n)); + d = mrb_as_bint(mrb, d); + mrb_int cmp = mrb_bint_cmp(mrb, d, ZERO); + if (cmp == 0) { + rat_zerodiv(mrb); + } + /* negative */ + if (cmp < 0) { + n = mrb_bint_neg(mrb, n); + d = mrb_bint_neg(mrb, d); + } + /* normalize (n/gcd, d/gcd) */ + mrb_bint_reduce(mrb, &n, &d); + struct RClass *c = mrb_class_get_id(mrb, MRB_SYM(Rational)); + struct RBasic *rat; + struct mrb_rational *p = rat_alloc(mrb, c, &rat); + rat->flags |= RAT_BIGINT; + p->b.num = (struct RBasic*)mrb_obj_ptr(n); + p->b.den = (struct RBasic*)mrb_obj_ptr(d); + rat->frozen = 1; + return mrb_obj_value(rat); +} +#endif - if (d == 0) { +mrb_value +mrb_rational_new(mrb_state *mrb, mrb_int nume, mrb_int deno) +{ + if (deno == 0) { rat_zerodiv(mrb); } - if (n == MRB_INT_MIN || d == MRB_INT_MIN) { + if (nume == MRB_INT_MIN || deno == MRB_INT_MIN) { +#ifdef RAT_BIGINT + mrb_value num = mrb_as_bint(mrb, mrb_int_value(mrb, nume)); + mrb_value den = mrb_as_bint(mrb, mrb_int_value(mrb, deno)); + return rational_new_b(mrb, num, den); +#else rat_overflow(mrb); +#endif } - a = i_gcd(n, d); - return rational_new(mrb, n/a, d/a); + if (deno < 0) { + nume *= -1; + deno *= -1; + } + + mrb_int a = i_gcd(nume, deno); + nume /= a; + deno /= a; + + struct RClass *c = mrb_class_get_id(mrb, MRB_SYM(Rational)); + struct RBasic *rat; + struct mrb_rational *p = rat_alloc(mrb, c, &rat); + p->numerator = nume; + p->denominator = deno; + rat->frozen = 1; + return mrb_obj_value(rat); } +#define rational_new_i(mrb,n,d) mrb_rational_new(mrb, n, d) + #ifndef MRB_NO_FLOAT #if defined(MRB_INT32) || defined(MRB_USE_FLOAT32) @@ -192,180 +324,210 @@ rational_new_i(mrb_state *mrb, mrb_int n, mrb_int d) #define RAT_HUGE_VAL HUGE_VAL #endif -static void -float_decode_internal(mrb_state *mrb, mrb_float f, mrb_float *rf, int *n) -{ - f = (mrb_float)frexp_rat(f, n); - if (isinf(f)) rat_overflow(mrb); - f = (mrb_float)ldexp_rat(f, RAT_MANT_DIG); - *n -= RAT_MANT_DIG; - *rf = f; -} - -void mrb_check_num_exact(mrb_state *mrb, mrb_float num); - static mrb_value -rational_new_f(mrb_state *mrb, mrb_float f0) -{ - mrb_float f; - int n; - - mrb_check_num_exact(mrb, f0); - float_decode_internal(mrb, f0, &f, &n); -#if FLT_RADIX == 2 - if (n == 0) - return rational_new(mrb, (mrb_int)f, 1); - if (n > 0) { - f = ldexp_rat(f, n); - if (f == RAT_HUGE_VAL || f > (mrb_float)MRB_INT_MAX) { - rat_overflow(mrb); - } - return rational_new(mrb, (mrb_uint)f, 1); - } - if (n < -RAT_INT_LIMIT) { - f = ldexp_rat(f, n+RAT_INT_LIMIT); - n = RAT_INT_LIMIT; - } - else { - n = -n; +rational_new_f(mrb_state *mrb, mrb_float f) +{ + mrb_check_num_exact(mrb, f); + if (f == 0.0) { + return rational_new_i(mrb, 0, 1); } - return rational_new_i(mrb, (mrb_int)f, ((mrb_int)1)< 0) { + mrb_int temp; + /* Check exp < MRB_INT_BIT-1 to avoid undefined behavior from shifting into sign bit */ + if (exp >= MRB_INT_BIT - 1 || mrb_int_mul_overflow(nume, ((mrb_int)1)< RAT_INT_LIMIT) { - f /= 2; - n--; - } - while (n--) { - pow *= FLT_RADIX; - } - return rational_new_i(mrb, f, pow); - } - else { - while (n--) { - if (MRB_INT_MAX/FLT_RADIX < pow) { - rat_overflow(mrb); - } - pow *= FLT_RADIX; - } - return rational_new(mrb, (mrb_int)f*pow, 1); - } -#endif -} + mrb_value n = mrb_bint_lshift(mrb, mrb_bint_new_int(mrb, nume), exp); + return rational_new_b(mrb, n, mrb_int_value(mrb, deno)); #endif - -static mrb_value -rational_s_new(mrb_state *mrb, mrb_value self) -{ - mrb_int numerator, denominator; - -#ifdef MRB_NO_FLOAT - mrb_get_args(mrb, "ii", &numerator, &denominator); -#else - - mrb_value numv, denomv; - - mrb_get_args(mrb, "oo", &numv, &denomv); - if (mrb_integer_p(numv)) { - numerator = mrb_integer(numv); - - if (mrb_integer_p(denomv)) { - denominator = mrb_integer(denomv); - } - else { - mrb_float numf = (mrb_float)numerator; - mrb_float denomf = mrb_as_float(mrb, denomv); - - return rational_new_f(mrb, numf/denomf); } + nume = temp; } - else { - mrb_float numf = mrb_as_float(mrb, numv); - mrb_float denomf; - - if (mrb_integer_p(denomv)) { - denomf = (mrb_float)mrb_integer(denomv); - } - else { - denomf = mrb_as_float(mrb, denomv); + else if (exp < 0) { + /* exp is negative, so we need to multiply denominator by 2^(-exp) */ + int neg_exp = -exp; + if (neg_exp >= MRB_INT_BIT - 1 || mrb_int_mul_overflow(deno, ((mrb_int)1)<denominator == 0.0) { - f = INFINITY; - } - else { - f = (mrb_float)p->numerator / (mrb_float)p->denominator; +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(x)) { + return mrb_bint_as_float(mrb, mrb_obj_value(p->b.num)) / mrb_bint_as_float(mrb, mrb_obj_value(p->b.den)); } - - return f; +#endif + return (mrb_float)p->numerator / (mrb_float)p->denominator; } mrb_value mrb_rational_to_f(mrb_state *mrb, mrb_value self) { - struct mrb_rational *p = rational_ptr(mrb, self); - return mrb_float_value(mrb, rat_float(p)); + mrb_float f = rat_float(mrb, self); + return mrb_float_value(mrb, f); } #endif +/* + * call-seq: + * rational.to_i -> integer + * + * Returns the rational number truncated to an integer. + * + * Rational(3, 4).to_i #=> 0 + * Rational(7, 3).to_i #=> 2 + * Rational(-5, 2).to_i #=> -2 + */ mrb_value mrb_rational_to_i(mrb_state *mrb, mrb_value self) { - struct mrb_rational *p = rational_ptr(mrb, self); - if (p->denominator == 0) { - rat_zerodiv(mrb); + struct mrb_rational *p = rat_ptr(mrb, self); +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(self)) { + return mrb_bint_div(mrb, mrb_obj_value(p->b.num), mrb_obj_value(p->b.den)); } +#endif return mrb_int_value(mrb, p->numerator / p->denominator); } -static mrb_value -rational_to_r(mrb_state *mrb, mrb_value self) +mrb_value +mrb_as_rational(mrb_state *mrb, mrb_value x) { - return self; + switch(mrb_type(x)) { + case MRB_TT_INTEGER: + return rational_new_i(mrb, mrb_integer(x), 1); +#ifdef RAT_BIGINT + case MRB_TT_BIGINT: + return rational_new_b(mrb, x, ONE); +#endif + case MRB_TT_RATIONAL: + return x; +#ifndef MRB_NO_FLOAT +#ifdef MRB_USE_COMPLEX + case MRB_TT_COMPLEX: +#endif + case MRB_TT_FLOAT: + return rational_new_f(mrb, mrb_as_float(mrb, x)); +#endif + default: + rat_type_error(mrb, x); + } } +/* + * call-seq: + * rational.negative? -> true or false + * + * Returns true if the rational number is negative, false otherwise. + * + * Rational(-1, 2).negative? #=> true + * Rational(1, 2).negative? #=> false + * Rational(0, 1).negative? #=> false + */ static mrb_value rational_negative_p(mrb_state *mrb, mrb_value self) { - struct mrb_rational *p = rational_ptr(mrb, self); - if (p->numerator < 0) { - return mrb_true_value(); + struct mrb_rational *p = rat_ptr(mrb, self); +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(self)) { + mrb_int cmp = mrb_bint_cmp(mrb, mrb_obj_value(p->b.num), ZERO); + return mrb_bool_value(cmp < 0); + } +#endif + return mrb_bool_value(p->numerator < 0); +} + +#ifndef MRB_NO_FLOAT +/* + * call-seq: + * float.to_r -> rational + * + * Converts the float to a rational number. The conversion preserves + * the exact value of the float as a fraction. + * + * 0.5.to_r #=> Rational(1, 2) + * 0.25.to_r #=> Rational(1, 4) + * 1.5.to_r #=> Rational(3, 2) + */ +static mrb_value +float_to_r(mrb_state *mrb, mrb_value self) +{ + return rational_new_f(mrb, mrb_float(self)); +} +#endif + +/* + * call-seq: + * integer.to_r -> rational + * + * Converts the integer to a rational number with denominator 1. + * + * 5.to_r #=> Rational(5, 1) + * (-3).to_r #=> Rational(-3, 1) + * 0.to_r #=> Rational(0, 1) + */ +static mrb_value +int_to_r(mrb_state *mrb, mrb_value self) +{ +#ifdef RAT_BIGINT + if (mrb_bigint_p(self)) { + return rational_new_b(mrb, self, ONE); } - return mrb_false_value(); +#endif + return rational_new_i(mrb, mrb_integer(self), 1); } +/* + * call-seq: + * nil.to_r -> rational + * + * Converts nil to Rational(0, 1). + * + * nil.to_r #=> Rational(0, 1) + */ static mrb_value -fix_to_r(mrb_state *mrb, mrb_value self) +nil_to_r(mrb_state *mrb, mrb_value self) { - return rational_new(mrb, mrb_integer(self), 1); + return rational_new_i(mrb, 0, 1); } +#if !defined(MRB_NO_FLOAT) || defined(RAT_BIGINT) static mrb_value -rational_m(mrb_state *mrb, mrb_value self) +rational_new(mrb_state *mrb, mrb_value a, mrb_value b) { #ifdef MRB_NO_FLOAT - mrb_int n, d = 1; - mrb_get_args(mrb, "i|i", &n, &d); - return rational_new_i(mrb, n, d); + a = mrb_as_int(mrb, a); + b = mrb_as_int(mrb, b); + return rational_new_i(mrb, mrb_integer(a), mrb_integer(b)); #else - mrb_value a, b = mrb_fixnum_value(1); - mrb_get_args(mrb, "o|o", &a, &b); if (mrb_integer_p(a) && mrb_integer_p(b)) { return rational_new_i(mrb, mrb_integer(a), mrb_integer(b)); } +#ifdef RAT_BIGINT + else if (mrb_bigint_p(a) || mrb_bigint_p(b)) { + return rational_new_b(mrb, mrb_as_bint(mrb, a), b); + } +#endif else { mrb_float x = mrb_as_float(mrb, a); mrb_float y = mrb_as_float(mrb, b); @@ -374,11 +536,131 @@ rational_m(mrb_state *mrb, mrb_value self) #endif } +/* + * call-seq: + * Rational(numerator, denominator = 1) -> rational + * + * Creates a rational number from numerator and denominator. + * The rational is automatically reduced to lowest terms. + * + * Rational(1, 2) #=> Rational(1, 2) + * Rational(6, 8) #=> Rational(3, 4) + * Rational(5) #=> Rational(5, 1) + * Rational(-2, 4) #=> Rational(-1, 2) + */ +static mrb_value +rational_m(mrb_state *mrb, mrb_value self) +{ + mrb_value a, b = ONE; + mrb_get_args(mrb, "o|o", &a, &b); + return rational_new(mrb, a, b); +} + +#else + +/* + * call-seq: + * Rational(numerator, denominator = 1) -> rational + * + * Creates a rational number from numerator and denominator. + * The rational is automatically reduced to lowest terms. + * + * Rational(1, 2) #=> Rational(1, 2) + * Rational(6, 8) #=> Rational(3, 4) + * Rational(5) #=> Rational(5, 1) + * Rational(-2, 4) #=> Rational(-1, 2) + */ +static mrb_value +rational_m(mrb_state *mrb, mrb_value self) +{ + mrb_int n, d = 1; + mrb_get_args(mrb, "i|i", &n, &d); + return rational_new_i(mrb, n, d); +} +#endif + +static mrb_value +rational_eq_b(mrb_state *mrb, mrb_value x, mrb_value y) +{ + struct mrb_rational *p1 = rat_ptr(mrb, x); + mrb_bool result; + + switch (mrb_type(y)) { + case MRB_TT_INTEGER: + { + /* For bigint-backed rationals, check if denominator is 1 */ + mrb_value den = mrb_obj_value(p1->b.den); + mrb_int den_cmp = mrb_bint_cmp(mrb, den, mrb_int_value(mrb, 1)); + if (den_cmp != 0) return mrb_false_value(); + mrb_value num = mrb_obj_value(p1->b.num); + result = mrb_bint_cmp(mrb, num, y) == 0; + break; + } +#ifdef MRB_USE_BIGINT + case MRB_TT_BIGINT: + { + /* For bigint-backed rationals comparing with bigint */ + mrb_value den = mrb_obj_value(p1->b.den); + mrb_int den_cmp = mrb_bint_cmp(mrb, den, mrb_int_value(mrb, 1)); + if (den_cmp != 0) return mrb_false_value(); + mrb_value num = mrb_obj_value(p1->b.num); + result = mrb_bint_cmp(mrb, num, y) == 0; + break; + } +#endif +#ifndef MRB_NO_FLOAT + case MRB_TT_FLOAT: + { + /* For bigint-backed rationals, convert to float and compare */ + mrb_float num_f = mrb_bint_as_float(mrb, mrb_obj_value(p1->b.num)); + mrb_float den_f = mrb_bint_as_float(mrb, mrb_obj_value(p1->b.den)); + result = (num_f / den_f) == mrb_float(y); + break; + } +#endif + case MRB_TT_RATIONAL: + { + /* Compare by converting to float - less precise but safe */ + mrb_float v1 = mrb_bint_as_float(mrb, mrb_obj_value(p1->b.num)) / + mrb_bint_as_float(mrb, mrb_obj_value(p1->b.den)); + mrb_float v2 = rat_float(mrb, y); + result = v1 == v2; + break; + } + +#ifdef MRB_USE_COMPLEX + case MRB_TT_COMPLEX: + { + result = mrb_complex_eq(mrb, y, mrb_rational_to_f(mrb, x)); + break; + } +#endif + default: + result = mrb_equal(mrb, y, x); + break; + } + return mrb_bool_value(result); +} + +/* + * call-seq: + * rational == other -> true or false + * + * Returns true if rational equals other. Comparison is done by cross-multiplication + * to avoid floating point precision issues. + * + * Rational(1, 2) == Rational(2, 4) #=> true + * Rational(1, 2) == 0.5 #=> true + * Rational(1, 2) == Rational(1, 3) #=> false + */ static mrb_value rational_eq(mrb_state *mrb, mrb_value x) { mrb_value y = mrb_get_arg1(mrb); - struct mrb_rational *p1 = rational_ptr(mrb, x); +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(x)) return rational_eq_b(mrb, x, y); +#endif + struct mrb_rational *p1 = rat_ptr(mrb, x); mrb_bool result; switch (mrb_type(y)) { @@ -386,6 +668,13 @@ rational_eq(mrb_state *mrb, mrb_value x) if (p1->denominator != 1) return mrb_false_value(); result = p1->numerator == mrb_integer(y); break; +#ifdef MRB_USE_BIGINT + case MRB_TT_BIGINT: + /* Non-bigint rational comparing with bigint */ + if (p1->denominator != 1) return mrb_false_value(); + result = mrb_bint_cmp(mrb, y, mrb_int_value(mrb, p1->numerator)) == 0; + break; +#endif #ifndef MRB_NO_FLOAT case MRB_TT_FLOAT: result = ((double)p1->numerator/p1->denominator) == mrb_float(y); @@ -393,7 +682,7 @@ rational_eq(mrb_state *mrb, mrb_value x) #endif case MRB_TT_RATIONAL: { - struct mrb_rational *p2 = rational_ptr(mrb, y); + struct mrb_rational *p2 = rat_ptr(mrb, y); mrb_int a, b; if (p1->numerator == p2->numerator && p1->denominator == p2->denominator) { @@ -426,78 +715,80 @@ rational_eq(mrb_state *mrb, mrb_value x) return mrb_bool_value(result); } +/* + * call-seq: + * -rational -> rational + * + * Returns the negation of the rational number. + * + * -Rational(1, 2) #=> Rational(-1, 2) + * -Rational(-3, 4) #=> Rational(3, 4) + */ static mrb_value -rational_cmp(mrb_state *mrb, mrb_value x) +rational_minus(mrb_state *mrb, mrb_value x) { - struct mrb_rational *p1 = rational_ptr(mrb, x); - mrb_value y = mrb_get_arg1(mrb); - - switch(mrb_type(y)) { - case MRB_TT_RATIONAL: - { - struct mrb_rational *p2 = rational_ptr(mrb, y); - mrb_int a, b; - - if (mrb_int_mul_overflow(p1->numerator, p2->denominator, &a) || - mrb_int_mul_overflow(p1->denominator, p2->numerator, &b)) { - return mrb_nil_value(); - } - if (a > b) - return mrb_fixnum_value(1); - else if (a < b) - return mrb_fixnum_value(-1); - return mrb_fixnum_value(0); - } - case MRB_TT_INTEGER: -#ifndef MRB_NO_FLOAT - case MRB_TT_FLOAT: - { - mrb_float a = rat_float(p1), b = mrb_as_float(mrb, y); - if (a > b) - return mrb_fixnum_value(1); - else if (a < b) - return mrb_fixnum_value(-1); - return mrb_fixnum_value(0); - } + struct mrb_rational *p = rat_ptr(mrb, x); +#ifdef RAT_BIGINT + mrb_value num; + if (RAT_BIGINT_P(x)) { + num = mrb_obj_value(p->b.num); + bint: + return rational_new_b(mrb, mrb_bint_neg(mrb, num), mrb_obj_value(p->b.den)); + } +#endif + mrb_int n = p->numerator; + if (n == MRB_INT_MIN) { +#ifdef RAT_BIGINT + num = mrb_as_bint(mrb, mrb_int_value(mrb, p->numerator)); + goto bint; #else - { - mrb_int a = p1->numerator, b; - if (mrb_int_mul_overflow(p1->denominator, mrb_integer(y), &b)) { - return mrb_nil_value(); - } - if (a > b) - return mrb_fixnum_value(1); - else if (a < b) - return mrb_fixnum_value(-1); - return mrb_fixnum_value(0); - } + rat_overflow(mrb); #endif - default: - x = mrb_funcall_id(mrb, y, MRB_OPSYM(cmp), 1, x); - if (mrb_integer_p(x)) { - mrb_int z = mrb_integer(x); - return mrb_fixnum_value(-z); - } - return mrb_nil_value(); - } + } + return rational_new_i(mrb, -n, p->denominator); } +#ifdef RAT_BIGINT static mrb_value -rational_minus(mrb_state *mrb, mrb_value x) +rat_add_b(mrb_state *mrb, mrb_value x, mrb_value y) { - struct mrb_rational *p = rational_ptr(mrb, x); - mrb_int n = p->numerator; - if (n == MRB_INT_MIN) rat_overflow(mrb); - return rational_new(mrb, -n, p->denominator); + mrb_value num1 = rat_numerator(mrb, x); + mrb_value den1 = rat_denominator(mrb, x); + mrb_value num2, den2; + + switch(mrb_type(y)) { + case MRB_TT_RATIONAL: + num2 = rat_numerator(mrb, y); + den2 = rat_denominator(mrb, y); + break; + case MRB_TT_INTEGER: + case MRB_TT_BIGINT: + num2 = y; + den2 = ONE; + break; + default: + /* should not happen */ + rat_type_error(mrb, y); + } + + mrb_value a = mrb_bint_mul_n(mrb, mrb_as_bint(mrb, num1), den2); + mrb_value b = mrb_bint_mul_n(mrb, mrb_as_bint(mrb, num2), den1); + a = mrb_bint_add_n(mrb, a, b); + b = mrb_bint_mul_n(mrb, mrb_as_bint(mrb, den1), den2); + return rational_new_b(mrb, a, b); } +#endif mrb_value mrb_rational_add(mrb_state *mrb, mrb_value x, mrb_value y) { - struct mrb_rational *p1 = rational_ptr(mrb, x); + struct mrb_rational *p1 = rat_ptr(mrb, x); switch (mrb_type(y)) { case MRB_TT_INTEGER: +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(x)) return rat_add_b(mrb, x, y); +#endif { mrb_int z = mrb_integer(y); if (mrb_int_mul_overflow(z, p1->denominator, &z)) rat_overflow(mrb); @@ -505,8 +796,12 @@ mrb_rational_add(mrb_state *mrb, mrb_value x, mrb_value y) return rational_new_i(mrb, z, p1->denominator); } case MRB_TT_RATIONAL: +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(x) || RAT_BIGINT_P(y)) + return rat_add_b(mrb, x, y); +#endif { - struct mrb_rational *p2 = rational_ptr(mrb, y); + struct mrb_rational *p2 = rat_ptr(mrb, y); mrb_int a, b; if (mrb_int_mul_overflow(p1->numerator, p2->denominator, &a)) rat_overflow(mrb); @@ -524,16 +819,32 @@ mrb_rational_add(mrb_state *mrb, mrb_value x, mrb_value y) } #endif +#ifdef RAT_BIGINT + case MRB_TT_BIGINT: + return rat_add_b(mrb, x, y); +#endif + #if defined(MRB_USE_COMPLEX) case MRB_TT_COMPLEX: - return mrb_complex_add(mrb, mrb_complex_new(mrb, rat_float(p1), 0), y); + return mrb_complex_add(mrb, mrb_complex_new(mrb, rat_float(mrb, x), 0), y); #endif default: - return mrb_funcall_id(mrb, y, MRB_OPSYM(add), 1, x); + return mrb_funcall_argv(mrb, y, MRB_OPSYM(add), 1, &x); } } +/* + * call-seq: + * rational + numeric -> rational or numeric + * + * Returns the sum of rational and numeric. If numeric is a rational, + * returns a rational. If numeric is a float, returns a float. + * + * Rational(1, 2) + Rational(1, 3) #=> Rational(5, 6) + * Rational(1, 2) + 1 #=> Rational(3, 2) + * Rational(1, 2) + 0.5 #=> 1.0 + */ static mrb_value rational_add(mrb_state *mrb, mrb_value x) { @@ -541,13 +852,47 @@ rational_add(mrb_state *mrb, mrb_value x) return mrb_rational_add(mrb, x, y); } +#ifdef RAT_BIGINT +static mrb_value +rat_sub_b(mrb_state *mrb, mrb_value x, mrb_value y) +{ + mrb_value num1 = rat_numerator(mrb, x); + mrb_value den1 = rat_denominator(mrb, x); + mrb_value num2, den2; + + switch(mrb_type(y)) { + case MRB_TT_RATIONAL: + num2 = rat_numerator(mrb, y); + den2 = rat_denominator(mrb, y); + break; + case MRB_TT_INTEGER: + case MRB_TT_BIGINT: + num2 = y; + den2 = ONE; + break; + default: + /* should not happen */ + rat_type_error(mrb, y); + } + + mrb_value a = mrb_bint_mul_n(mrb, mrb_as_bint(mrb, num1), den2); + mrb_value b = mrb_bint_mul_n(mrb, mrb_as_bint(mrb, num2), den1); + a = mrb_bint_sub_n(mrb, a, b); + b = mrb_bint_mul_n(mrb, mrb_as_bint(mrb, den1), den2); + return rational_new_b(mrb, a, b); +} +#endif + mrb_value mrb_rational_sub(mrb_state *mrb, mrb_value x, mrb_value y) { - struct mrb_rational *p1 = rational_ptr(mrb, x); + struct mrb_rational *p1 = rat_ptr(mrb, x); switch (mrb_type(y)) { case MRB_TT_INTEGER: +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(x)) return rat_sub_b(mrb, x, y); +#endif { mrb_int z = mrb_integer(y); if (mrb_int_mul_overflow(z, p1->denominator, &z)) rat_overflow(mrb); @@ -555,8 +900,12 @@ mrb_rational_sub(mrb_state *mrb, mrb_value x, mrb_value y) return rational_new_i(mrb, z, p1->denominator); } case MRB_TT_RATIONAL: +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(x) || RAT_BIGINT_P(y)) + return rat_sub_b(mrb, x, y); +#endif { - struct mrb_rational *p2 = rational_ptr(mrb, y); + struct mrb_rational *p2 = rat_ptr(mrb, y); mrb_int a, b; if (mrb_int_mul_overflow(p1->numerator, p2->denominator, &a)) rat_overflow(mrb); @@ -566,9 +915,14 @@ mrb_rational_sub(mrb_state *mrb, mrb_value x, mrb_value y) return rational_new_i(mrb, a, b); } +#ifdef RAT_BIGINT + case MRB_TT_BIGINT: + return rat_sub_b(mrb, x, y); +#endif + #if defined(MRB_USE_COMPLEX) case MRB_TT_COMPLEX: - return mrb_complex_sub(mrb, mrb_complex_new(mrb, rat_float(p1), 0), y); + return mrb_complex_sub(mrb, mrb_complex_new(mrb, rat_float(mrb, x), 0), y); #endif #ifndef MRB_NO_FLOAT @@ -580,11 +934,22 @@ mrb_rational_sub(mrb_state *mrb, mrb_value x, mrb_value y) } #else default: - mrb_raise(mrb, E_TYPE_ERROR, "non integer subtraction"); + rat_type_error(mrb, y); #endif } } +/* + * call-seq: + * rational - numeric -> rational or numeric + * + * Returns the difference of rational and numeric. If numeric is a rational, + * returns a rational. If numeric is a float, returns a float. + * + * Rational(1, 2) - Rational(1, 3) #=> Rational(1, 6) + * Rational(3, 2) - 1 #=> Rational(1, 2) + * Rational(1, 2) - 0.25 #=> 0.25 + */ static mrb_value rational_sub(mrb_state *mrb, mrb_value x) { @@ -592,21 +957,55 @@ rational_sub(mrb_state *mrb, mrb_value x) return mrb_rational_sub(mrb, x, y); } +#ifdef RAT_BIGINT +static mrb_value +rat_mul_b(mrb_state *mrb, mrb_value x, mrb_value y) +{ + mrb_value num, den; + + switch(mrb_type(y)) { + case MRB_TT_RATIONAL: + num = rat_numerator(mrb, y); + den = rat_denominator(mrb, y); + break; + case MRB_TT_INTEGER: + case MRB_TT_BIGINT: + num = y; + den = ONE; + break; + default: + /* should not happen */ + rat_type_error(mrb, y); + } + + mrb_value a = mrb_bint_mul_n(mrb, mrb_as_bint(mrb, rat_numerator(mrb, x)), num); + mrb_value b = mrb_bint_mul_n(mrb, mrb_as_bint(mrb, rat_denominator(mrb, x)), den); + return rational_new_b(mrb, a, b); +} +#endif + mrb_value mrb_rational_mul(mrb_state *mrb, mrb_value x, mrb_value y) { - struct mrb_rational *p1 = rational_ptr(mrb, x); - switch (mrb_type(y)) { case MRB_TT_INTEGER: +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(x)) return rat_mul_b(mrb, x, y); +#endif { + struct mrb_rational *p1 = rat_ptr(mrb, x); mrb_int z = mrb_integer(y); if (mrb_int_mul_overflow(p1->numerator, z, &z)) rat_overflow(mrb); return rational_new_i(mrb, z, p1->denominator); } case MRB_TT_RATIONAL: +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(x) || RAT_BIGINT_P(y)) + return rat_mul_b(mrb, x, y); +#endif { - struct mrb_rational *p2 = rational_ptr(mrb, y); + struct mrb_rational *p1 = rat_ptr(mrb, x); + struct mrb_rational *p2 = rat_ptr(mrb, y); mrb_int a, b; if (mrb_int_mul_overflow(p1->numerator, p2->numerator, &a)) rat_overflow(mrb); @@ -614,9 +1013,15 @@ mrb_rational_mul(mrb_state *mrb, mrb_value x, mrb_value y) return rational_new_i(mrb, a, b); } +#ifdef RAT_BIGINT + case MRB_TT_BIGINT: + return rat_mul_b(mrb, x, y); +#endif + #ifndef MRB_NO_FLOAT case MRB_TT_FLOAT: { + struct mrb_rational *p1 = rat_ptr(mrb, x); mrb_float z = p1->numerator * mrb_float(y); return mrb_float_value(mrb, mrb_div_float(z, (mrb_float)p1->denominator)); } @@ -624,14 +1029,25 @@ mrb_rational_mul(mrb_state *mrb, mrb_value x, mrb_value y) #if defined(MRB_USE_COMPLEX) case MRB_TT_COMPLEX: - return mrb_complex_mul(mrb, mrb_complex_new(mrb, rat_float(p1), 0), y); + return mrb_complex_mul(mrb, mrb_complex_new(mrb, rat_float(mrb, x), 0), y); #endif default: - return mrb_funcall_id(mrb, y, MRB_OPSYM(mul), 1, x); + return mrb_funcall_argv(mrb, y, MRB_OPSYM(mul), 1, &x); } } +/* + * call-seq: + * rational * numeric -> rational or numeric + * + * Returns the product of rational and numeric. Uses standard rational + * multiplication: (a/b) * (c/d) = (a*c)/(b*d). + * + * Rational(1, 2) * Rational(2, 3) #=> Rational(1, 3) + * Rational(1, 2) * 3 #=> Rational(3, 2) + * Rational(1, 2) * 2.0 #=> 1.0 + */ static mrb_value rational_mul(mrb_state *mrb, mrb_value x) { @@ -639,22 +1055,57 @@ rational_mul(mrb_state *mrb, mrb_value x) return mrb_rational_mul(mrb, x, y); } +#ifdef RAT_BIGINT +static mrb_value +rat_div_b(mrb_state *mrb, mrb_value x, mrb_value y) +{ + mrb_value num, den; + + switch(mrb_type(y)) { + case MRB_TT_RATIONAL: + num = rat_numerator(mrb, y); + den = rat_denominator(mrb, y); + break; + case MRB_TT_INTEGER: +#ifdef MRB_USE_BIGINT + case MRB_TT_BIGINT: +#endif + num = y; + den = ONE; + break; + default: + /* should not happen */ + rat_type_error(mrb, y); + } + + mrb_value a = mrb_bint_mul_n(mrb, mrb_as_bint(mrb, rat_numerator(mrb, x)), den); + mrb_value b = mrb_bint_mul_n(mrb, mrb_as_bint(mrb, rat_denominator(mrb, x)), num); + return rational_new_b(mrb, a, b); +} +#endif + mrb_value mrb_rational_div(mrb_state *mrb, mrb_value x, mrb_value y) { - struct mrb_rational *p1 = rational_ptr(mrb, x); - switch (mrb_type(y)) { case MRB_TT_INTEGER: +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(x)) return rat_div_b(mrb, x, y); +#endif { + struct mrb_rational *p1 = rat_ptr(mrb, x); mrb_int z = mrb_integer(y); if (z == 0) mrb_int_zerodiv(mrb); if (mrb_int_mul_overflow(p1->denominator, z, &z)) rat_overflow(mrb); return rational_new_i(mrb, p1->numerator, z); } case MRB_TT_RATIONAL: +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(x) || RAT_BIGINT_P(y)) return rat_div_b(mrb, x, y); +#endif { - struct mrb_rational *p2 = rational_ptr(mrb, y); + struct mrb_rational *p1 = rat_ptr(mrb, x); + struct mrb_rational *p2 = rat_ptr(mrb, y); mrb_int a, b; if (mrb_int_mul_overflow(p1->numerator, p2->denominator, &a)) rat_overflow(mrb); @@ -662,24 +1113,44 @@ mrb_rational_div(mrb_state *mrb, mrb_value x, mrb_value y) return rational_new_i(mrb, a, b); } -#if defined(MRB_USE_COMPLEX) +#ifdef RAT_BIGINT + case MRB_TT_BIGINT: + return rat_div_b(mrb, x, y); +#endif + +#ifdef MRB_USE_COMPLEX case MRB_TT_COMPLEX: - return mrb_complex_div(mrb, mrb_complex_new(mrb, rat_float(p1), 0), y); + return mrb_complex_div(mrb, mrb_complex_new(mrb, rat_float(mrb, x), 0), y); #endif - default: #ifndef MRB_NO_FLOAT case MRB_TT_FLOAT: { + struct mrb_rational *p1 = rat_ptr(mrb, x); mrb_float z = mrb_div_float((mrb_float)p1->numerator, mrb_as_float(mrb, y)); return mrb_float_value(mrb, mrb_div_float(z, (mrb_float)p1->denominator)); } -#else - mrb_raise(mrb, E_TYPE_ERROR, "non integer division"); #endif + + default: + rat_type_error(mrb, y); + /* not reached */ + return mrb_nil_value(); } } +/* + * call-seq: + * rational / numeric -> rational or numeric + * rational.quo(numeric) -> rational or numeric + * + * Returns the quotient of rational divided by numeric. Uses standard rational + * division: (a/b) / (c/d) = (a/b) * (d/c) = (a*d)/(b*c). + * + * Rational(1, 2) / Rational(1, 3) #=> Rational(3, 2) + * Rational(3, 4) / 2 #=> Rational(3, 8) + * Rational(1, 2) / 0.5 #=> 1.0 + */ static mrb_value rational_div(mrb_state *mrb, mrb_value x) { @@ -687,97 +1158,108 @@ rational_div(mrb_state *mrb, mrb_value x) return mrb_rational_div(mrb, x, y); } +mrb_value mrb_int_pow(mrb_state *mrb, mrb_value x, mrb_value y); + +/* + * call-seq: + * rational ** numeric -> numeric + * + * Returns rational raised to the power of numeric. The result is typically + * a float unless the result can be exactly represented as a rational. + * + * Rational(1, 2) ** 2 #=> Rational(1, 4) + * Rational(4, 1) ** 0.5 #=> 2.0 + * Rational(2, 1) ** 3 #=> Rational(8, 1) + */ static mrb_value rational_pow(mrb_state *mrb, mrb_value x) { - mrb_value y = mrb_get_arg1(mrb); - struct mrb_rational *p1 = rational_ptr(mrb, x); #ifndef MRB_NO_FLOAT - double d1, d2; + mrb_value y = mrb_get_arg1(mrb); + double d1 = rat_float(mrb, x); + double d2 = mrb_as_float(mrb, y); + d1 = pow(d1, d2); switch (mrb_type(y)) { - case MRB_TT_RATIONAL: - { - struct mrb_rational *p2 = rational_ptr(mrb, y); - if (p2->numerator == 0) { - return mrb_rational_new(mrb, 1, 1); - } - if (p2->numerator == p2->denominator) { - return x; - } - if (p2->denominator == 1) { - return rational_new_i(mrb, (mrb_int)pow((mrb_float)p1->numerator, (mrb_float)p2->numerator), - (mrb_int)pow((mrb_float)p1->denominator, (mrb_float)p2->numerator)); - } - d1 = rat_float(p1); - d2 = rat_float(p2); - } - break; case MRB_TT_FLOAT: - { - d1 = rat_float(p1); - d2 = mrb_float(y); - } - break; + return mrb_float_value(mrb, d1); case MRB_TT_INTEGER: - { - mrb_int i = mrb_integer(y); - if (i == 0) { - return mrb_rational_new(mrb, 1, 1); - } - if (i == 1) { - return x; - } - return rational_new_i(mrb, (mrb_int)pow((mrb_float)p1->numerator, (mrb_float)i), - (mrb_int)pow((mrb_float)p1->denominator, (mrb_float)i)); - } - break; + case MRB_TT_RATIONAL: + return rational_new_f(mrb, d1); + case MRB_TT_BIGINT: default: - mrb_raisef(mrb, E_TYPE_ERROR, "%T cannot be converted to Rational", y); + return mrb_float_value(mrb, d1); } - return mrb_float_value(mrb, pow(d1, d2)); #else mrb_raisef(mrb, E_NOTIMP_ERROR, "Rational#** not implemented with MRB_NO_FLOAT"); + /* not reached */ + return mrb_nil_value(); #endif } +/* + * call-seq: + * rational.hash -> integer + * + * Returns a hash value for the rational number. Two rationals with + * the same value will have the same hash value. + * + * Rational(1, 2).hash == Rational(2, 4).hash #=> true + */ static mrb_value rational_hash(mrb_state *mrb, mrb_value rat) { - struct mrb_rational *r = rational_ptr(mrb, rat); - uint32_t hash = mrb_byte_hash((uint8_t*)&r->numerator, sizeof(mrb_int)); + struct mrb_rational *r = rat_ptr(mrb, rat); + uint32_t hash; + +#ifdef RAT_BIGINT + if (RAT_BIGINT_P(rat)) { + mrb_value tmp = mrb_bint_hash(mrb, mrb_obj_value(r->b.num)); + hash = (uint32_t)mrb_integer(tmp); + tmp = mrb_bint_hash(mrb, mrb_obj_value(r->b.den)); + hash ^= (uint32_t)mrb_integer(tmp); + return mrb_int_value(mrb, hash); + } +#endif + hash = mrb_byte_hash((uint8_t*)&r->numerator, sizeof(mrb_int)); hash = mrb_byte_hash_step((uint8_t*)&r->denominator, sizeof(mrb_int), hash); return mrb_int_value(mrb, hash); } +/* ---------------------------*/ +static const mrb_mt_entry rational_rom_entries[] = { + MRB_MT_ENTRY(rational_numerator, MRB_SYM(numerator), MRB_ARGS_NONE()), + MRB_MT_ENTRY(rational_denominator, MRB_SYM(denominator), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_rational_to_i, MRB_SYM(to_i), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_obj_itself, MRB_SYM(to_r), MRB_ARGS_NONE()), /* Returns self - already a rational */ + MRB_MT_ENTRY(rational_negative_p, MRB_SYM_Q(negative), MRB_ARGS_NONE()), + MRB_MT_ENTRY(rational_eq, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(rational_minus, MRB_OPSYM(minus), MRB_ARGS_NONE()), + MRB_MT_ENTRY(rational_add, MRB_OPSYM(add), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(rational_sub, MRB_OPSYM(sub), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(rational_mul, MRB_OPSYM(mul), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(rational_div, MRB_OPSYM(div), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(rational_div, MRB_SYM(quo), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(rational_pow, MRB_OPSYM(pow), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(rational_hash, MRB_SYM(hash), MRB_ARGS_NONE()), +#ifndef MRB_NO_FLOAT + MRB_MT_ENTRY(mrb_rational_to_f, MRB_SYM(to_f), MRB_ARGS_NONE()), +#endif +}; + void mrb_mruby_rational_gem_init(mrb_state *mrb) { - struct RClass *rat; - - rat = mrb_define_class_id(mrb, MRB_SYM(Rational), mrb_class_get_id(mrb, MRB_SYM(Numeric))); + struct RClass *rat = mrb_define_class_id(mrb, MRB_SYM(Rational), mrb_class_get_id(mrb, MRB_SYM(Numeric))); MRB_SET_INSTANCE_TT(rat, MRB_TT_RATIONAL); - mrb_undef_class_method(mrb, rat, "new"); - mrb_define_class_method(mrb, rat, "_new", rational_s_new, MRB_ARGS_REQ(2)); - mrb_define_method(mrb, rat, "numerator", rational_numerator, MRB_ARGS_NONE()); - mrb_define_method(mrb, rat, "denominator", rational_denominator, MRB_ARGS_NONE()); + MRB_UNDEF_ALLOCATOR(rat); + mrb_undef_class_method_id(mrb, rat, MRB_SYM(new)); + MRB_MT_INIT_ROM(mrb, rat, rational_rom_entries); + mrb_define_method_id(mrb, mrb->integer_class, MRB_SYM(to_r), int_to_r, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, mrb->nil_class, MRB_SYM(to_r), nil_to_r, MRB_ARGS_NONE()); + mrb_define_private_method_id(mrb, mrb->kernel_module, MRB_SYM(Rational), rational_m, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); #ifndef MRB_NO_FLOAT - mrb_define_method(mrb, rat, "to_f", mrb_rational_to_f, MRB_ARGS_NONE()); -#endif - mrb_define_method(mrb, rat, "to_i", mrb_rational_to_i, MRB_ARGS_NONE()); - mrb_define_method(mrb, rat, "to_r", rational_to_r, MRB_ARGS_NONE()); - mrb_define_method(mrb, rat, "negative?", rational_negative_p, MRB_ARGS_NONE()); - mrb_define_method(mrb, rat, "==", rational_eq, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, rat, "<=>", rational_cmp, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, rat, "-@", rational_minus, MRB_ARGS_NONE()); - mrb_define_method(mrb, rat, "+", rational_add, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, rat, "-", rational_sub, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, rat, "*", rational_mul, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, rat, "/", rational_div, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, rat, "quo", rational_div, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, rat, "**", rational_pow, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, rat, "hash", rational_hash, MRB_ARGS_NONE()); - mrb_define_method(mrb, mrb->integer_class, "to_r", fix_to_r, MRB_ARGS_NONE()); - mrb_define_method(mrb, mrb->kernel_module, "Rational", rational_m, MRB_ARGS_ARG(1,1)); + mrb_define_method_id(mrb, mrb->float_class, MRB_SYM(to_r), float_to_r, MRB_ARGS_NONE()); +#endif } void diff --git a/mrbgems/mruby-rational/test/rational.rb b/mrbgems/mruby-rational/test/rational.rb index 0038f141b8..6e4426cb18 100644 --- a/mrbgems/mruby-rational/test/rational.rb +++ b/mrbgems/mruby-rational/test/rational.rb @@ -1,27 +1,3 @@ -class UserDefinedNumeric < Numeric - def initialize(n) - @n = n - end - - def <=>(rhs) - return nil unless rhs.respond_to?(:to_i) - rhs = rhs.to_i - rhs < 0 ? nil : @n <=> rhs - end - - def inspect - "#{self.class}(#{@n})" - end -end - -class ComplexLikeNumeric < UserDefinedNumeric - def ==(rhs) - @n == 0 && rhs == 0 - end - - undef <=> -end - def assert_rational(exp, real) assert "assert_rational" do assert_kind_of Rational, real @@ -159,13 +135,6 @@ def assert_complex(real, imag) assert_equal_rational(false, Rational(2,1), 1r) assert_equal_rational(false, Rational(1), nil) assert_equal_rational(false, Rational(1), '') - assert_equal_rational(true, 0r, UserDefinedNumeric.new(0)) - assert_equal_rational(true, 1r, UserDefinedNumeric.new(1)) - assert_equal_rational(false, 1r, UserDefinedNumeric.new(2)) - assert_equal_rational(false, -1r, UserDefinedNumeric.new(-1)) - assert_equal_rational(true, 0r, ComplexLikeNumeric.new(0)) - assert_equal_rational(false, 1r, ComplexLikeNumeric.new(1)) - assert_equal_rational(false, 1r, ComplexLikeNumeric.new(2)) end assert 'Integer#==(Rational), Integer#!=(Rational)' do @@ -203,12 +172,6 @@ def assert_complex(real, imag) assert_cmp(-1, Rational(1,2), Rational(2,3)) assert_cmp(-1, Rational(1,2), Rational(2,3)) assert_cmp(nil, 3r, "3") - assert_cmp(1, 3r, UserDefinedNumeric.new(2)) - assert_cmp(0, 3r, UserDefinedNumeric.new(3)) - assert_cmp(-1, 3r, UserDefinedNumeric.new(4)) - assert_cmp(nil, Rational(-3), UserDefinedNumeric.new(5)) - assert_raise(NoMethodError) { 0r <=> ComplexLikeNumeric.new(0) } - assert_raise(NoMethodError) { 1r <=> ComplexLikeNumeric.new(2) } end assert 'Integer#<=>(Rational)' do @@ -346,3 +309,8 @@ def assert_complex(real, imag) assert_float(16.0, (4r)**(2.0)) assert_float(3.5**1.5, (7/2r)**(1.5)) end + +assert 'Integer#quo' do + a = 6.quo(5) + assert_equal 6/5r, a +end diff --git a/mrbgems/mruby-regexp/README.md b/mrbgems/mruby-regexp/README.md new file mode 100644 index 0000000000..1a29ed813f --- /dev/null +++ b/mrbgems/mruby-regexp/README.md @@ -0,0 +1,131 @@ +# mruby-regexp + +Built-in regular expression engine for mruby using a Pike VM (NFA +simulation) with backtracking fallback. + +## Features + +### Pattern Syntax + +- `.` any character (except newline by default) +- `*`, `+`, `?` greedy quantifiers +- `*?`, `+?`, `??` non-greedy quantifiers +- `{n}`, `{n,}`, `{n,m}` repetition counts +- `[abc]`, `[a-z]`, `[^abc]` character classes +- `\d`, `\w`, `\s` digit, word, whitespace shortcuts +- `\D`, `\W`, `\S` negated shortcuts +- `(...)` capture group +- `(?:...)` non-capturing group +- `(?...)` named capture group +- `|` alternation +- `\1`-`\9` backreferences +- `(?=...)` positive lookahead +- `(?!...)` negative lookahead +- `(?<=...)` positive lookbehind (fixed-length only) +- `(? MatchData or nil +re.match?("string") # => true/false +re =~ "string" # => index or nil +re === "string" # => true/false (for case/when) +re.source # => "pattern" +re.options # => flags integer +Regexp.escape("a.b") # => "a\\.b" +Regexp.last_match(n) # => nth capture from last match + +# MatchData +md = /(\w+)@(\w+)/.match("user@host") +md[0] # => "user@host" (full match) +md[1] # => "user" +md[2] # => "host" +md[:name] # named capture access +md.captures # => ["user", "host"] +md.to_a # => ["user@host", "user", "host"] +md.begin(0) # => match start position +md.end(0) # => match end position +md.pre_match # => string before match +md.post_match # => string after match +md.named_captures # => {"name" => "value", ...} + +# String methods +str.match(re) # => MatchData or nil +str.match?(re) # => true/false +str =~ re # => index or nil +str.sub(re, replacement) # replace first occurrence +str.sub(re) { |m| ... } # replace with block +str.gsub(re, replacement) # replace all occurrences +str.gsub(re) { |m| ... } # replace all with block +str.scan(re) # => array of matches +str.split(re) # => array of parts + +# Global variables +$~ # last MatchData +``` + +## Engine Architecture + +The gem uses two execution engines: + +**Pike VM (NFA simulation)**: Used for patterns without +backreferences, non-greedy quantifiers, or lookahead. Guarantees +O(pattern x text) time complexity, making it immune to ReDoS +attacks. + +**Backtracking engine**: Used when patterns contain `\1`-`\9` +backreferences, non-greedy quantifiers (`*?`, `+?`, `??`), or +lookahead assertions (`(?=...)`, `(?!...)`). Protected by a +configurable step limit (`MRB_REGEXP_STEP_LIMIT`, default 1M) to +prevent excessive backtracking. + +The engine is selected automatically at compile time based on +pattern analysis. + +## Limitations + +- **Fixed-length lookbehind only**: `(?<=...)` and `(? +#include + +/* Bytecode instructions for the NFA engine */ +enum re_opcode { + RE_CHAR, /* match literal byte: operand = byte value */ + RE_ANY, /* match any character (. without DOTALL) */ + RE_ANY_NL, /* match any character including newline (. with DOTALL) */ + RE_CLASS, /* match character class: operand = class_id */ + RE_NCLASS, /* match negated character class: operand = class_id */ + RE_MATCH, /* successful match */ + RE_JMP, /* unconditional jump: operand = target offset */ + RE_SPLIT, /* fork: operand = target offset (greedy: try next first) */ + RE_SPLITNG, /* fork: operand = target offset (non-greedy: try jump first) */ + RE_SAVE, /* save capture position: operand = slot number */ + RE_BOL, /* assert beginning of line (^) */ + RE_EOL, /* assert end of line ($) */ + RE_BOT, /* assert beginning of text (\A) */ + RE_EOT, /* assert end of text (\z) */ + RE_EOTNL, /* assert end of text or before final \n (\Z) */ + RE_WBOUND, /* assert word boundary (\b) */ + RE_NWBOUND, /* assert non-word boundary (\B) */ + RE_BACKREF, /* backreference: operand = group number */ + RE_LOOKAHEAD, /* positive lookahead: offset = end of sub-pattern */ + RE_NEG_LOOKAHEAD, /* negative lookahead: offset = end of sub-pattern */ + RE_LOOKBEHIND, /* positive lookbehind: a = byte length, offset = end */ + RE_NEG_LOOKBEHIND, /* negative lookbehind: a = byte length, offset = end */ +}; + +/* Bytecode instruction (4 bytes each for alignment) */ +typedef struct { + uint8_t op; + uint8_t a; /* small operand or class id */ + uint16_t offset; /* jump target or extended operand */ +} re_inst; + +/* Character class bitmap (ASCII range) */ +#define RE_CLASS_BITMAP_SIZE 16 /* 128 bits = 16 bytes for ASCII */ +typedef struct { + uint8_t bitmap[RE_CLASS_BITMAP_SIZE]; /* bitmap for 0-127 */ + mrb_bool negated; + mrb_bool utf8_any; /* match any non-ASCII byte if true */ +} re_charclass; + +/* Named capture entry */ +typedef struct { + const char *name; + uint16_t name_len; + uint16_t group; +} re_named_capture; + +/* Compiled regexp pattern */ +typedef struct mrb_regexp_pattern { + re_inst *code; /* bytecode array */ + uint32_t code_len; /* number of instructions */ + re_charclass *classes; /* character class table */ + uint16_t num_classes; + uint16_t num_captures; /* number of capture groups (including group 0) */ + uint32_t flags; + re_named_capture *named_captures; + char *named_arena; /* owned storage for named_captures[i].name; NULL if num_named == 0 */ + uint16_t num_named; + mrb_bool has_backref; /* true if pattern uses \1-\9 */ + mrb_bool needs_backtrack; /* true if pattern needs backtracking engine */ + uint8_t *prefix; /* literal prefix bytes for fast skip (or NULL) */ + uint8_t prefix_len; /* length of prefix (0 = no prefix) */ + uint8_t first_bytes[16]; /* bitmap of possible first bytes (128-bit, ASCII) */ + mrb_bool has_first_bytes; /* true if first_bytes is usable for skipping */ + mrb_bool is_literal; /* true if pattern is pure literal (no metacharacters) */ + /* Cached VM state for pike_vm (avoids malloc per mrb_re_exec call) */ + uint32_t *cached_visited; /* generation-based visited array */ + void *cached_threads[2]; /* curr/next thread lists */ + int cached_list_capa; /* capacity of cached thread lists */ + mrb_bool cache_in_use; /* re-entrancy guard */ +} mrb_regexp_pattern; + +/* Regexp flags */ +#define RE_FLAG_IGNORECASE 1 +#define RE_FLAG_MULTILINE 2 /* ^ and $ match at \n boundaries */ +#define RE_FLAG_DOTALL 4 /* . matches \n (Ruby's /m for dot behavior) */ +#define RE_FLAG_EXTENDED 8 /* ignore whitespace and #comments in pattern */ + +/* Note: Ruby's /m flag means BOTH multiline anchors AND dotall. + Ruby's /i flag is ignorecase. Ruby's /x flag is extended. */ + +/* Step limit for ReDoS protection */ +#ifndef MRB_REGEXP_STEP_LIMIT +#define MRB_REGEXP_STEP_LIMIT 1000000 +#endif + +/* Recursion-depth limit for bt_match: bounds C stack growth on + patterns like `(?=)+` that recurse without consuming input. */ +#ifndef MRB_REGEXP_RECURSION_LIMIT +#define MRB_REGEXP_RECURSION_LIMIT 1000 +#endif + +/* Maximum captures */ +#define RE_MAX_CAPTURES 32 + +/* Thread struct for Pike VM (also used for cache sizing) */ +typedef struct { + uint32_t pc; + int cap_slot; +} re_thread_cache; + +/* Compile a pattern string into bytecode */ +mrb_regexp_pattern* mrb_re_compile(mrb_state *mrb, const char *pattern, mrb_int len, uint32_t flags); + +/* Free a compiled pattern */ +void mrb_re_free(mrb_state *mrb, mrb_regexp_pattern *pat); + +/* Execute a match. + Returns number of captures filled (0 = no match). + captures[2*n] = start, captures[2*n+1] = end for group n. */ +int mrb_re_exec(mrb_state *mrb, const mrb_regexp_pattern *pat, + const char *str, mrb_int len, mrb_int start, + int *captures, int captures_size); + +/* UTF-8 helpers */ +int mrb_re_utf8_charlen(const char *s, const char *end); +uint32_t mrb_re_utf8_decode(const char *s, int *len); +mrb_bool mrb_re_is_word_char(uint32_t c); + +#endif /* MRB_RE_INTERNAL_H */ diff --git a/mrbgems/mruby-regexp/mrbgem.rake b/mrbgems/mruby-regexp/mrbgem.rake new file mode 100644 index 0000000000..c5879908dc --- /dev/null +++ b/mrbgems/mruby-regexp/mrbgem.rake @@ -0,0 +1,7 @@ +MRuby::Gem::Specification.new('mruby-regexp') do |spec| + spec.license = 'MIT' + spec.authors = 'mruby developers' + spec.summary = 'Regexp class (built-in NFA engine)' + + spec.add_dependency 'mruby-string-ext', :core => 'mruby-string-ext' +end diff --git a/mrbgems/mruby-regexp/mrblib/regexp.rb b/mrbgems/mruby-regexp/mrblib/regexp.rb new file mode 100644 index 0000000000..84740d578b --- /dev/null +++ b/mrbgems/mruby-regexp/mrblib/regexp.rb @@ -0,0 +1,24 @@ +class Regexp + def self.compile(pattern, *args) + new(pattern, *args) + end + + # Return named captures hash: {"name" => group_number, ...} + def named_captures + @named_captures || {} + end + + # options is implemented in C (internal flags -> Ruby constants conversion) + + def self.last_match(n = nil) + md = $~ + return md if n.nil? + md ? md[n] : nil + end + + # named capture info is set via C create_matchdata +end + +class MatchData + # named_captures is implemented in C via md->regexp +end diff --git a/mrbgems/mruby-regexp/mrblib/string_regexp.rb b/mrbgems/mruby-regexp/mrblib/string_regexp.rb new file mode 100644 index 0000000000..e15e507a3a --- /dev/null +++ b/mrbgems/mruby-regexp/mrblib/string_regexp.rb @@ -0,0 +1,109 @@ +class String + # Capture the C-defined String#split under `__split` before the override + # below replaces it, so the override can delegate non-regexp patterns + # back to the core implementation. + alias __split split + + def match(re, pos = 0) + re = Regexp.new(re) if re.is_a?(String) + re.match(self, pos) + end + + def match?(re, pos = 0) + re = Regexp.new(re) if re.is_a?(String) + re.match?(self, pos) + end + + def =~(re) + re =~ self + end + + def sub(pattern, replacement = nil, &block) + pattern = Regexp.new(Regexp.escape(pattern)) if pattern.is_a?(String) + unless block + return pattern.__sub_str(self, replacement.to_s) + end + md = pattern.match(self) + return self.dup unless md + md.pre_match + block.call(md[0]).to_s + md.post_match + end + + def gsub(pattern, replacement = nil, &block) + pattern = Regexp.new(Regexp.escape(pattern)) if pattern.is_a?(String) + unless block + return pattern.__gsub_str(self, replacement.to_s) + end + # block case: keep in Ruby to avoid VM callback from C + parts = [] + rest = self + while rest.length > 0 + md = pattern.match(rest) + break unless md + parts << md.pre_match + parts << block.call(md[0]).to_s + matched_len = md[0].length + if matched_len == 0 + parts << rest[0] if rest.length > 0 + rest = rest[1..-1] || "" + else + rest = md.post_match + end + end + parts << rest + parts.join + end + + def scan(pattern) + pattern = Regexp.new(Regexp.escape(pattern)) if pattern.is_a?(String) + result = pattern.__scan(self) + if block_given? + result.each { |m| yield m } + self + else + result + end + end + + # Regexp-aware split. Falls back to the C-defined split (aliased as + # `__split` in mrb_mruby_regexp_gem_init before this override loads) for + # nil or simple-string patterns; converts string-with-backslash to a + # Regexp and handles regexp patterns in Ruby. + def split(pattern = nil, limit = -1) + return __split(pattern, limit) if pattern.nil? + if pattern.is_a?(String) + return __split(pattern, limit) if pattern.length == 1 || !pattern.include?('\\') + pattern = Regexp.new(Regexp.escape(pattern)) + end + result = [] + rest = self + count = 0 + while rest.length > 0 + if limit > 0 && count >= limit - 1 + result << rest + return result + end + md = pattern.match(rest) + break unless md + result << md.pre_match + rest = md.post_match + count += 1 + # skip zero-length match at beginning + if md[0].length == 0 + if rest.length > 0 + result[-1] = result[-1] + rest[0] + rest = rest[1..-1] || "" + else + break + end + end + end + result << rest + # remove trailing empty strings if no limit + if limit < 0 + while result.length > 0 && result[-1] == "" + result.pop + end + end + result + end +end diff --git a/mrbgems/mruby-regexp/src/re_compile.c b/mrbgems/mruby-regexp/src/re_compile.c new file mode 100644 index 0000000000..edbb968908 --- /dev/null +++ b/mrbgems/mruby-regexp/src/re_compile.c @@ -0,0 +1,1000 @@ +/* +** re_compile.c - regexp pattern compiler +** +** Compiles a regular expression pattern string into bytecode +** for the NFA execution engine. +** +** See Copyright Notice in mruby.h +*/ + +#include "re_internal.h" +#include +#include + +/* Compiler state */ +typedef struct { + mrb_state *mrb; + const char *src; /* pattern source */ + const char *src_end; + const char *p; /* current position */ + re_inst *code; /* instruction array */ + uint32_t code_len; + uint32_t code_capa; + re_charclass *classes; + uint16_t num_classes; + uint16_t class_capa; + uint16_t num_captures; + uint32_t flags; + re_named_capture *named_captures; + uint16_t num_named; + mrb_bool has_backref; + mrb_bool needs_backtrack; + char *stripped; /* allocated buffer for x-mode preprocessing */ +} re_compiler; + +static void compile_alt(re_compiler *c); /* forward */ + +static void +compile_error(re_compiler *c, const char *msg) +{ + /* Format the message before freeing c->stripped (which may alias c->src + in extended mode). c->src is not NUL-terminated, so use %l with the + explicit length from c->src_end. */ + mrb_value emsg = mrb_format(c->mrb, "%s: /%l/", + msg, c->src, (size_t)(c->src_end - c->src)); + + /* Free compile buffers before raising, since mrb_exc_raise longjmps out + and the stack-local re_compiler is abandoned without a chance to clean + up. mrb_free doesn't trigger GC, so emsg stays valid across these. */ + mrb_free(c->mrb, c->code); + c->code = NULL; + mrb_free(c->mrb, c->classes); + c->classes = NULL; + mrb_free(c->mrb, c->named_captures); + c->named_captures = NULL; + if (c->stripped) mrb_free(c->mrb, c->stripped); + c->stripped = NULL; + + mrb_exc_raise(c->mrb, + mrb_exc_new_str(c->mrb, mrb_exc_get_id(c->mrb, MRB_SYM(RegexpError)), emsg)); +} + +static uint32_t +emit(re_compiler *c, uint8_t op, uint8_t a, uint16_t offset) +{ + if (c->code_len >= c->code_capa) { + c->code_capa = c->code_capa ? c->code_capa * 2 : 64; + c->code = (re_inst*)mrb_realloc(c->mrb, c->code, sizeof(re_inst) * c->code_capa); + } + uint32_t pos = c->code_len++; + c->code[pos].op = op; + c->code[pos].a = a; + c->code[pos].offset = offset; + return pos; +} + +static void +patch(re_compiler *c, uint32_t pos, uint16_t offset) +{ + c->code[pos].offset = offset; +} + +/* Insert an instruction at position `pos` by shifting code. + Adjusts all jump offsets >= pos by +1. */ +static void +insert_inst(re_compiler *c, uint32_t pos, uint8_t op, uint8_t a, uint16_t offset) +{ + emit(c, RE_JMP, 0, 0); /* grow array */ + uint32_t len = c->code_len - 1 - pos; + memmove(&c->code[pos + 1], &c->code[pos], sizeof(re_inst) * len); + c->code[pos].op = op; + c->code[pos].a = a; + c->code[pos].offset = offset; + + /* Fix jump targets that point past the insertion point. An offset equal + to `pos` already points to the inserted instruction's new location and + must not be bumped -- bumping it would shift the target to whatever + code got displaced by the insertion (e.g. the body of the quantified + atom), corrupting "skip past this atom" jumps emitted earlier. */ + for (uint32_t i = 0; i < c->code_len; i++) { + if (i == pos) continue; + switch (c->code[i].op) { + case RE_JMP: case RE_SPLIT: case RE_SPLITNG: + if (c->code[i].offset > pos && c->code[i].offset < 0xffff) { + c->code[i].offset++; + } + break; + default: + break; + } + } +} + +static int +peek(re_compiler *c) +{ + if (c->p >= c->src_end) return -1; + return (uint8_t)*c->p; +} + +static int +next_char(re_compiler *c) +{ + if (c->p >= c->src_end) return -1; + return (uint8_t)*c->p++; +} + +/* Class IDs are stored in re_inst.a (uint8_t), so at most 256 distinct + character classes can be encoded. Without this cap, class_capa + (uint16_t) overflows on doubling past 32768 (8 -> 16 -> ... -> 32768 + -> 0), mrb_realloc with size 0 returns NULL, and the next memset + crashes; even before that, the (uint8_t)id cast at emit sites would + silently alias different classes. */ +#define RE_MAX_CLASSES 256 + +static uint16_t +add_class(re_compiler *c) +{ + if (c->num_classes >= RE_MAX_CLASSES) { + compile_error(c, "too many character classes"); + } + if (c->num_classes >= c->class_capa) { + c->class_capa = c->class_capa ? c->class_capa * 2 : 8; + c->classes = (re_charclass*)mrb_realloc(c->mrb, c->classes, sizeof(re_charclass) * c->class_capa); + } + uint16_t id = c->num_classes++; + memset(&c->classes[id], 0, sizeof(re_charclass)); + return id; +} + +static void +class_set_bit(re_charclass *cc, uint8_t ch) +{ + if (ch < 128) { + cc->bitmap[ch >> 3] |= (1 << (ch & 7)); + } +} + +static void +class_set_range(re_charclass *cc, uint8_t lo, uint8_t hi) +{ + for (int i = lo; i <= hi; i++) { + class_set_bit(cc, (uint8_t)i); + } +} + +static void +class_add_shorthand(re_charclass *cc, int ch) +{ + switch (ch) { + case 'd': + class_set_range(cc, '0', '9'); + break; + case 'D': + class_set_range(cc, 0, '0'-1); + class_set_range(cc, '9'+1, 127); + cc->utf8_any = TRUE; + break; + case 'w': + class_set_range(cc, 'a', 'z'); + class_set_range(cc, 'A', 'Z'); + class_set_range(cc, '0', '9'); + class_set_bit(cc, '_'); + break; + case 'W': + for (int i = 0; i < 128; i++) { + if (!mrb_re_is_word_char(i)) class_set_bit(cc, (uint8_t)i); + } + cc->utf8_any = TRUE; + break; + case 's': + class_set_bit(cc, ' '); + class_set_bit(cc, '\t'); + class_set_bit(cc, '\n'); + class_set_bit(cc, '\r'); + class_set_bit(cc, '\f'); + class_set_bit(cc, '\v'); + break; + case 'S': + for (int i = 0; i < 128; i++) { + if (i != ' ' && i != '\t' && i != '\n' && i != '\r' && i != '\f' && i != '\v') + class_set_bit(cc, (uint8_t)i); + } + cc->utf8_any = TRUE; + break; + } +} + +static int +parse_escape(re_compiler *c) +{ + int ch = next_char(c); + if (ch < 0) compile_error(c, "trailing backslash"); + switch (ch) { + case 'n': return '\n'; + case 't': return '\t'; + case 'r': return '\r'; + case 'f': return '\f'; + case 'v': return '\v'; + case 'a': return '\a'; + case 'e': return 0x1b; + case 'b': return '\b'; /* backspace; only reachable inside [...] since the + top-level dispatcher emits RE_WBOUND for `\b` */ + default: return ch; /* literal: \., \\, \/, \(, etc. */ + } +} + +/* Parse [...] character class */ +static void +compile_charclass(re_compiler *c) +{ + uint16_t id = add_class(c); + re_charclass *cc = &c->classes[id]; + mrb_bool negated = FALSE; + + if (peek(c) == '^') { + next_char(c); + negated = TRUE; + } + + mrb_bool first = TRUE; + while (peek(c) != ']' || first) { + int ch; + + if (peek(c) < 0) compile_error(c, "unterminated character class"); + first = FALSE; + + if (peek(c) == '\\') { + next_char(c); + int esc = peek(c); + if (esc == 'd' || esc == 'D' || esc == 'w' || esc == 'W' || esc == 's' || esc == 'S') { + next_char(c); + class_add_shorthand(cc, esc); + continue; + } + ch = parse_escape(c); + } + else { + ch = next_char(c); + } + + /* check for range a-z */ + if (peek(c) == '-' && c->p + 1 < c->src_end && c->p[1] != ']') { + next_char(c); /* skip '-' */ + int hi; + if (peek(c) == '\\') { + next_char(c); + hi = parse_escape(c); + } + else { + hi = next_char(c); + } + if (ch < 128 && hi < 128) { + class_set_range(cc, (uint8_t)ch, (uint8_t)hi); + } + } + else { + if (ch < 128) class_set_bit(cc, (uint8_t)ch); + } + } + next_char(c); /* skip ']' */ + + cc->negated = negated; + emit(c, negated ? RE_NCLASS : RE_CLASS, (uint8_t)id, 0); +} + +/* Maximum value for {n}/{n,m} quantifiers. Each unit becomes (min-1) + + (max-min) emitted copies of the inner atom; the cap keeps both the + parse free of integer overflow and the bytecode size sane. */ +#define RE_MAX_REPEAT 32768 + +/* Parse {n}, {n,}, {n,m} quantifier. Returns min,max via pointers. */ +static mrb_bool +parse_quantifier(re_compiler *c, int *min_out, int *max_out) +{ + const char *save = c->p; + int min = 0, max = -1; + + while (peek(c) >= '0' && peek(c) <= '9') { + min = min * 10 + (next_char(c) - '0'); + if (min > RE_MAX_REPEAT) compile_error(c, "quantifier too large"); + } + if (peek(c) == ',') { + next_char(c); + if (peek(c) >= '0' && peek(c) <= '9') { + max = 0; + while (peek(c) >= '0' && peek(c) <= '9') { + max = max * 10 + (next_char(c) - '0'); + if (max > RE_MAX_REPEAT) compile_error(c, "quantifier too large"); + } + } + /* else max = -1 (unlimited) */ + } + else { + max = min; /* {n} means exactly n */ + } + if (peek(c) != '}') { + c->p = save; /* not a quantifier, treat { as literal */ + return FALSE; + } + next_char(c); /* skip '}' */ + *min_out = min; + *max_out = max; + return TRUE; +} + +/* + * Compute the fixed byte length consumed by bytecode in range [start, end). + * Returns -1 if the pattern has variable length (quantifiers, alternation + * with different-length branches, etc.). + * Used for lookbehind: we need to know exactly how far back to look. + */ +static int +compute_fixed_len(re_compiler *c, uint32_t start, uint32_t end) +{ + int len = 0; + uint32_t pc = start; + + while (pc < end) { + re_inst inst = c->code[pc]; + switch (inst.op) { + case RE_CHAR: + case RE_CLASS: + case RE_NCLASS: + len += 1; + pc++; + break; + case RE_ANY: + case RE_ANY_NL: + /* . matches one character which can be 1-4 bytes in UTF-8. + For ASCII-only mode this is 1 byte; for safety, only allow + if we can determine it's ASCII context. Return -1 for now. */ + return -1; + case RE_SAVE: + pc++; + break; /* zero-width */ + case RE_BOL: case RE_EOL: case RE_BOT: case RE_EOT: case RE_EOTNL: + case RE_WBOUND: case RE_NWBOUND: + pc++; + break; /* zero-width assertions */ + case RE_JMP: + pc = inst.offset; + break; + case RE_SPLIT: { + /* alternation: both branches must have the same fixed length */ + /* branch 1: pc+1 to next JMP before branch 2 */ + /* branch 2: inst.offset to ... */ + /* For simplicity, reject alternation in lookbehind */ + return -1; + } + case RE_MATCH: + return len; + default: + return -1; /* unknown/variable-length instruction */ + } + } + return len; +} + +/* Compile a single atom (character, class, group, etc.) */ +static void +compile_atom(re_compiler *c) +{ + int ch = peek(c); + + switch (ch) { + case '(': + { + next_char(c); + mrb_bool capturing = TRUE; + + const char *cap_name = NULL; + uint16_t cap_name_len = 0; + + if (peek(c) == '?' && c->p + 1 < c->src_end) { + if (c->p[1] == ':') { + next_char(c); next_char(c); /* skip ?: */ + capturing = FALSE; + } + else if (c->p[1] == '=' || c->p[1] == '!') { + /* lookahead (?=...) or (?!...) */ + mrb_bool negative = (c->p[1] == '!'); + next_char(c); next_char(c); /* skip ?= or ?! */ + uint32_t la_pos = emit(c, negative ? RE_NEG_LOOKAHEAD : RE_LOOKAHEAD, 0, 0); + compile_alt(c); + emit(c, RE_MATCH, 0, 0); /* end of lookahead sub-pattern */ + c->code[la_pos].offset = (uint16_t)c->code_len; /* patch: skip past sub-pattern */ + if (peek(c) != ')') compile_error(c, "unmatched '('"); + next_char(c); + c->needs_backtrack = TRUE; /* needs backtracking engine */ + break; /* done with this atom */ + } + else if (c->p[1] == '<' && c->p + 2 < c->src_end && (c->p[2] == '=' || c->p[2] == '!')) { + /* lookbehind (?<=...) or (?p[2] == '!'); + next_char(c); next_char(c); next_char(c); /* skip ?<= or ?code_len; + compile_alt(c); + emit(c, RE_MATCH, 0, 0); + c->code[lb_pos].offset = (uint16_t)c->code_len; + + /* compute fixed byte length of lookbehind sub-pattern */ + int fixed_len = compute_fixed_len(c, sub_start, c->code_len); + if (fixed_len < 0) { + compile_error(c, "lookbehind must be fixed length"); + } + if (fixed_len > 255) { + compile_error(c, "lookbehind too long (max 255 bytes)"); + } + c->code[lb_pos].a = (uint8_t)fixed_len; + + if (peek(c) != ')') compile_error(c, "unmatched '('"); + next_char(c); + c->needs_backtrack = TRUE; /* needs backtracking engine */ + break; + } + else if (c->p[1] == '<' && c->p + 2 < c->src_end && c->p[2] != '=' && c->p[2] != '!') { + next_char(c); next_char(c); /* skip ?< */ + cap_name = c->p; + while (peek(c) != '>' && peek(c) >= 0) next_char(c); + if (peek(c) != '>') compile_error(c, "unterminated named capture"); + cap_name_len = (uint16_t)(c->p - cap_name); + next_char(c); /* skip > */ + } + } + + uint16_t group = 0; + if (capturing) { + if (c->num_captures >= RE_MAX_CAPTURES) { + compile_error(c, "too many capture groups"); + } + group = c->num_captures++; + emit(c, RE_SAVE, 0, group * 2); + if (cap_name) { + /* register named capture */ + c->named_captures = (re_named_capture*)mrb_realloc(c->mrb, c->named_captures, + sizeof(re_named_capture) * (c->num_named + 1)); + c->named_captures[c->num_named].name = cap_name; + c->named_captures[c->num_named].name_len = cap_name_len; + c->named_captures[c->num_named].group = group; + c->num_named++; + } + } + + compile_alt(c); + + if (peek(c) != ')') compile_error(c, "unmatched '('"); + next_char(c); + + if (capturing) { + emit(c, RE_SAVE, 0, group * 2 + 1); + } + } + break; + + case '[': + next_char(c); + compile_charclass(c); + break; + + case '.': + next_char(c); + emit(c, (c->flags & RE_FLAG_DOTALL) ? RE_ANY_NL : RE_ANY, 0, 0); + break; + + case '^': + next_char(c); + emit(c, RE_BOL, 0, 0); + break; + + case '$': + next_char(c); + emit(c, RE_EOL, 0, 0); + break; + + case '\\': + next_char(c); + ch = peek(c); + if (ch >= '1' && ch <= '9') { + next_char(c); + emit(c, RE_BACKREF, (uint8_t)(ch - '0'), 0); + c->has_backref = TRUE; + } + else if (ch == 'd' || ch == 'D' || ch == 'w' || ch == 'W' || ch == 's' || ch == 'S') { + next_char(c); + uint16_t id = add_class(c); + class_add_shorthand(&c->classes[id], ch); + emit(c, (ch >= 'A' && ch <= 'Z') ? RE_NCLASS : RE_CLASS, (uint8_t)id, 0); + } + else if (ch == 'A') { + next_char(c); + emit(c, RE_BOT, 0, 0); + } + else if (ch == 'z') { + next_char(c); + emit(c, RE_EOT, 0, 0); + } + else if (ch == 'Z') { + next_char(c); + emit(c, RE_EOTNL, 0, 0); + } + else if (ch == 'b') { + next_char(c); + emit(c, RE_WBOUND, 0, 0); + } + else if (ch == 'B') { + next_char(c); + emit(c, RE_NWBOUND, 0, 0); + } + else { + ch = parse_escape(c); + if (c->flags & RE_FLAG_IGNORECASE) { + if (ch >= 'A' && ch <= 'Z') { + uint16_t id = add_class(c); + class_set_bit(&c->classes[id], (uint8_t)ch); + class_set_bit(&c->classes[id], (uint8_t)(ch + 32)); + emit(c, RE_CLASS, (uint8_t)id, 0); + break; + } + else if (ch >= 'a' && ch <= 'z') { + uint16_t id = add_class(c); + class_set_bit(&c->classes[id], (uint8_t)ch); + class_set_bit(&c->classes[id], (uint8_t)(ch - 32)); + emit(c, RE_CLASS, (uint8_t)id, 0); + break; + } + } + emit(c, RE_CHAR, (uint8_t)ch, 0); + } + break; + + default: + if (ch < 0 || ch == ')' || ch == '|' || ch == '*' || ch == '+' || ch == '?' || ch == '{') { + return; /* not an atom */ + } + next_char(c); + if ((c->flags & RE_FLAG_IGNORECASE) && ch < 128) { + if (ch >= 'A' && ch <= 'Z') { + uint16_t id = add_class(c); + class_set_bit(&c->classes[id], (uint8_t)ch); + class_set_bit(&c->classes[id], (uint8_t)(ch + 32)); + emit(c, RE_CLASS, (uint8_t)id, 0); + break; + } + else if (ch >= 'a' && ch <= 'z') { + uint16_t id = add_class(c); + class_set_bit(&c->classes[id], (uint8_t)ch); + class_set_bit(&c->classes[id], (uint8_t)(ch - 32)); + emit(c, RE_CLASS, (uint8_t)id, 0); + break; + } + } + emit(c, RE_CHAR, (uint8_t)ch, 0); + break; + } +} + +/* Compile atom with quantifiers (*, +, ?, {n,m}) */ +static void +compile_quantified(re_compiler *c) +{ + uint32_t start = c->code_len; + compile_atom(c); + if (c->code_len == start) return; /* no atom emitted */ + + int ch = peek(c); + if (ch == '*' || ch == '+' || ch == '?') { + next_char(c); + mrb_bool nongreedy = (peek(c) == '?'); + if (nongreedy) { + next_char(c); + c->needs_backtrack = TRUE; + } + + + if (ch == '*') { + /* e* → L: SPLIT(body, end); body; JMP L; end: + SPLIT offset = end (after JMP), patched after JMP is emitted */ + insert_inst(c, start, nongreedy ? RE_SPLITNG : RE_SPLIT, 0, 0); + emit(c, RE_JMP, 0, start); + c->code[start].offset = (uint16_t)c->code_len; /* patch: skip to end */ + } + else if (ch == '+') { + /* e+ → body; SPLIT/SPLITNG(start) + SPLIT: first=pc+1(end), second=offset(start) → non-greedy + SPLITNG: first=offset(start), second=pc+1(end) → greedy */ + emit(c, nongreedy ? RE_SPLIT : RE_SPLITNG, 0, start); + } + else { /* ? */ + /* e? → SPLIT(body, end); body; end: */ + insert_inst(c, start, nongreedy ? RE_SPLITNG : RE_SPLIT, 0, 0); + c->code[start].offset = (uint16_t)c->code_len; /* patch: skip to end */ + } + } + else if (ch == '{') { + const char *save = c->p; + next_char(c); + int min, max; + if (!parse_quantifier(c, &min, &max)) { + c->p = save; + return; /* not a quantifier */ + } + mrb_bool nongreedy = (peek(c) == '?'); + if (nongreedy) { + next_char(c); + c->needs_backtrack = TRUE; + } + + /* For {n,m}: repeat atom min times, then optional (max-min) times */ + uint32_t atom_end = c->code_len; + uint32_t atom_size = atom_end - start; + + /* First, we have one copy already. We need min-1 more mandatory copies. */ + for (int i = 1; i < min; i++) { + for (uint32_t j = 0; j < atom_size; j++) { + emit(c, c->code[start + j].op, c->code[start + j].a, c->code[start + j].offset); + } + } + /* Then optional copies */ + if (max < 0) { + /* {n,} = min copies + * */ + uint32_t loop_start = c->code_len; + uint32_t split_pos = emit(c, nongreedy ? RE_SPLITNG : RE_SPLIT, 0, 0); + for (uint32_t j = 0; j < atom_size; j++) { + emit(c, c->code[start + j].op, c->code[start + j].a, c->code[start + j].offset); + } + emit(c, RE_JMP, 0, loop_start); + patch(c, split_pos, c->code_len); + } + else { + for (int i = min; i < max; i++) { + uint32_t split_pos = emit(c, nongreedy ? RE_SPLITNG : RE_SPLIT, 0, 0); + for (uint32_t j = 0; j < atom_size; j++) { + emit(c, c->code[start + j].op, c->code[start + j].a, c->code[start + j].offset); + } + patch(c, split_pos, c->code_len); + } + } + } +} + +/* Compile a sequence of quantified atoms */ +static void +compile_seq(re_compiler *c) +{ + while (peek(c) >= 0 && peek(c) != ')' && peek(c) != '|') { + compile_quantified(c); + } +} + +/* Compile alternation: seq | seq | ... */ +static void +compile_alt(re_compiler *c) +{ + uint32_t alt_start = c->code_len; + compile_seq(c); + + if (peek(c) != '|') return; + + /* a|b → SPLIT L1 L2; L1: a; JMP END; L2: b; END: + We need to insert SPLIT before already-emitted code for first alt. + Strategy: emit JMP after first alt, then for each subsequent alt, + insert a SPLIT before it by shifting code. */ + + /* Collect all alternatives, then emit SPLIT chain at the end. + This avoids insert_inst offset corruption for multi-way alternation. */ + uint32_t alt_starts[64]; /* start positions of each alternative */ + int num_alts = 0; + alt_starts[num_alts++] = alt_start; + + while (peek(c) == '|') { + next_char(c); + emit(c, RE_JMP, 0, 0); /* placeholder: jump to end */ + alt_starts[num_alts++] = c->code_len; + if (num_alts >= 64) compile_error(c, "too many alternatives"); + compile_seq(c); + } + + if (num_alts <= 1) return; /* shouldn't happen, but safety */ + + /* Now insert SPLIT chain before the alternatives. + For n alternatives: n-1 SPLIT instructions, each pointing to + their respective alternative. */ + uint32_t split_count = (uint32_t)(num_alts - 1); + /* Insert split_count instructions at alt_starts[0] */ + for (uint32_t i = 0; i < split_count; i++) { + insert_inst(c, alt_starts[0], RE_JMP, 0, 0); /* placeholder */ + /* adjust all alt_starts by +1 due to insertion */ + for (int j = 0; j < num_alts; j++) { + alt_starts[j]++; + } + } + + /* Now set up SPLIT chain: each SPLIT tries next instruction or jumps to alt */ + for (uint32_t i = 0; i < split_count; i++) { + uint32_t pos = alt_starts[0] - split_count + i; + c->code[pos].op = RE_SPLIT; + c->code[pos].a = 0; + c->code[pos].offset = (uint16_t)alt_starts[i + 1]; + } + + /* Patch JMPs (they are right before each alt_starts[1..n-1]) to point to end */ + uint32_t end = c->code_len; + for (int i = 1; i < num_alts; i++) { + uint32_t jmp_pos = alt_starts[i] - 1; + c->code[jmp_pos].op = RE_JMP; + c->code[jmp_pos].offset = (uint16_t)end; + } +} + +/* + * Strip whitespace and #comments for extended mode (/x flag). + * Whitespace inside [...] character classes is preserved. + * Escaped characters (\ followed by anything) are preserved. + */ +static char* +strip_extended(mrb_state *mrb, const char *src, mrb_int len, mrb_int *out_len) +{ + char *buf = (char*)mrb_malloc(mrb, len); + mrb_int o = 0; + mrb_bool in_class = FALSE; + const char *end = src + len; + + while (src < end) { + char ch = *src; + if (ch == '\\' && src + 1 < end) { + buf[o++] = *src++; + buf[o++] = *src++; + continue; + } + if (in_class) { + if (ch == ']') in_class = FALSE; + buf[o++] = *src++; + continue; + } + if (ch == '[') { + in_class = TRUE; + buf[o++] = *src++; + continue; + } + if (ch == '#') { + /* skip to end of line */ + while (src < end && *src != '\n') src++; + continue; + } + if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\f' || ch == '\v') { + src++; + continue; + } + buf[o++] = *src++; + } + *out_len = o; + return buf; +} + +/* + * Compute the set of bytes that could be the first consumed byte of a match. + * Walks bytecode from pc=0, following epsilon transitions (SAVE, JMP, SPLIT). + * Returns TRUE if the set is narrower than "any byte" (i.e., useful for skip). + */ +static mrb_bool +first_set_walk(const re_inst *code, uint32_t code_len, + const re_charclass *classes, uint32_t pc, + uint8_t *bm, uint8_t *seen) +{ + while (pc < code_len) { + if (seen[pc]) return TRUE; /* already visited */ + seen[pc] = 1; + switch (code[pc].op) { + case RE_SAVE: + case RE_BOL: case RE_EOL: case RE_BOT: case RE_EOT: case RE_EOTNL: + case RE_WBOUND: case RE_NWBOUND: + pc++; + continue; /* zero-width, keep walking */ + case RE_JMP: + pc = code[pc].offset; + continue; + case RE_SPLIT: + /* both branches: pc+1 and offset */ + if (!first_set_walk(code, code_len, classes, code[pc].offset, bm, seen)) + return FALSE; + pc++; + continue; + case RE_SPLITNG: + if (!first_set_walk(code, code_len, classes, pc + 1, bm, seen)) + return FALSE; + pc = code[pc].offset; + continue; + case RE_CHAR: + if (code[pc].a >= 128) return FALSE; /* non-ASCII: bm covers ASCII only */ + bm[code[pc].a >> 3] |= (1 << (code[pc].a & 7)); + return TRUE; + case RE_CLASS: { + const re_charclass *cc = &classes[code[pc].a]; + for (int i = 0; i < 16; i++) bm[i] |= cc->bitmap[i]; + if (cc->utf8_any) return FALSE; /* non-ASCII possible */ + return TRUE; + } + case RE_NCLASS: { + /* negated class: complement of bitmap. Too many bits; not useful. */ + return FALSE; + } + case RE_ANY: case RE_ANY_NL: + return FALSE; /* any byte possible */ + case RE_MATCH: + /* Reaching MATCH via epsilon transitions means the regex can match + zero characters at any position. Skipping bytes that aren't in the + first-byte set would skip past valid empty-match positions, so the + optimization isn't safe -- bail out and accept any starting byte. */ + return FALSE; + default: + return FALSE; + } + } + /* Walked off the end without hitting MATCH or a consuming op. Treat as + empty-matchable, same as RE_MATCH. */ + return FALSE; +} + +static mrb_bool +compute_first_set(const re_inst *code, uint32_t code_len, + const re_charclass *classes, uint8_t *bm) +{ + uint8_t seen[4096]; + if (code_len >= sizeof(seen)) return FALSE; /* pattern too large */ + memset(seen, 0, code_len + 1); + if (!first_set_walk(code, code_len, classes, 0, bm, seen)) + return FALSE; + /* Check if bitmap is all-ones (no benefit to skip) */ + int set_bits = 0; + for (int i = 0; i < 16; i++) { + for (int b = 0; b < 8; b++) { + if (bm[i] & (1 << b)) set_bits++; + } + } + return set_bits < 96; /* useful only if fewer than 75% of bytes match */ +} + +mrb_regexp_pattern* +mrb_re_compile(mrb_state *mrb, const char *pattern, mrb_int len, uint32_t flags) +{ + re_compiler c; + memset(&c, 0, sizeof(c)); + + if (flags & RE_FLAG_EXTENDED) { + mrb_int slen; + c.stripped = strip_extended(mrb, pattern, len, &slen); + pattern = c.stripped; + len = slen; + } + c.mrb = mrb; + c.src = pattern; + c.src_end = pattern + len; + c.p = pattern; + c.flags = flags; + c.num_captures = 1; /* group 0 = whole match */ + + /* group 0 start */ + emit(&c, RE_SAVE, 0, 0); + + compile_alt(&c); + + if (c.p < c.src_end) { + compile_error(&c, "unmatched ')'"); + } + + /* group 0 end */ + emit(&c, RE_SAVE, 0, 1); + emit(&c, RE_MATCH, 0, 0); + + mrb_regexp_pattern *pat = (mrb_regexp_pattern*)mrb_malloc(mrb, sizeof(mrb_regexp_pattern)); + pat->code = c.code; + pat->code_len = c.code_len; + pat->classes = c.classes; + pat->num_classes = c.num_classes; + pat->num_captures = c.num_captures; + pat->flags = flags; + pat->named_captures = c.named_captures; + pat->named_arena = NULL; + pat->num_named = c.num_named; + + /* Copy capture names into an owned arena. Until this point the names + point into the pattern source (or into c.stripped, which gets freed + below in /x mode). After this loop the regexp owns its names. */ + if (c.num_named > 0) { + size_t total = 0; + for (uint16_t i = 0; i < c.num_named; i++) total += c.named_captures[i].name_len; + if (total > 0) { + pat->named_arena = (char*)mrb_malloc(mrb, total); + size_t off = 0; + for (uint16_t i = 0; i < c.num_named; i++) { + uint16_t n = c.named_captures[i].name_len; + memcpy(pat->named_arena + off, c.named_captures[i].name, n); + pat->named_captures[i].name = pat->named_arena + off; + off += n; + } + } + } + pat->has_backref = c.has_backref; + pat->needs_backtrack = c.needs_backtrack; + + /* Extract literal prefix for fast search skip. + Walk bytecode from the start, skipping SAVE, collecting RE_CHAR. */ + { + uint8_t pbuf[256]; + int plen = 0; + for (uint32_t i = 0; i < pat->code_len && plen < 255; i++) { + if (pat->code[i].op == RE_SAVE) continue; + if (pat->code[i].op == RE_CHAR) { + pbuf[plen++] = pat->code[i].a; + } + else break; + } + if (plen > 0) { + pat->prefix = (uint8_t*)mrb_malloc(mrb, plen); + memcpy(pat->prefix, pbuf, plen); + pat->prefix_len = (uint8_t)plen; + } + else { + pat->prefix = NULL; + pat->prefix_len = 0; + } + } + + /* Check if pattern is pure literal: SAVE CHAR* SAVE MATCH only. + prefix_len already holds the literal char count if so. */ + pat->is_literal = FALSE; + if (pat->prefix_len > 0 && pat->num_captures == 1 && + !pat->has_backref && !pat->needs_backtrack) { + /* bytecode should be: SAVE(0), CHAR*N, SAVE(1), MATCH + = 2 + prefix_len + 2 = prefix_len + 2 instructions + (SAVE(0) at 0, CHARs at 1..N, SAVE(1) at N+1, MATCH at N+2) */ + if (pat->code_len == (uint32_t)(pat->prefix_len + 3) && + pat->code[0].op == RE_SAVE && + pat->code[pat->code_len - 2].op == RE_SAVE && + pat->code[pat->code_len - 1].op == RE_MATCH) { + pat->is_literal = TRUE; + } + } + + /* Compute first-byte bitmap: set of bytes that could start a match. + Used when prefix is empty (e.g. alternation, character class patterns). */ + { + uint8_t bm[16]; + memset(bm, 0, sizeof(bm)); + pat->has_first_bytes = compute_first_set(pat->code, pat->code_len, pat->classes, bm); + if (pat->has_first_bytes) { + memcpy(pat->first_bytes, bm, 16); + } + } + + /* Pre-allocate VM state cache for pike_vm */ + { + int list_capa = (int)pat->code_len * 2 + 16; + pat->cached_visited = (uint32_t*)mrb_calloc(mrb, pat->code_len + 1, sizeof(uint32_t)); + pat->cached_threads[0] = mrb_malloc(mrb, sizeof(re_thread_cache) * list_capa); + pat->cached_threads[1] = mrb_malloc(mrb, sizeof(re_thread_cache) * list_capa); + pat->cached_list_capa = list_capa; + pat->cache_in_use = FALSE; + } + + if (c.stripped) mrb_free(mrb, c.stripped); + return pat; +} + +void +mrb_re_free(mrb_state *mrb, mrb_regexp_pattern *pat) +{ + if (pat) { + mrb_free(mrb, pat->code); + mrb_free(mrb, pat->classes); + mrb_free(mrb, pat->named_captures); + mrb_free(mrb, pat->named_arena); + mrb_free(mrb, pat->prefix); + mrb_free(mrb, pat->cached_visited); + mrb_free(mrb, pat->cached_threads[0]); + mrb_free(mrb, pat->cached_threads[1]); + mrb_free(mrb, pat); + } +} diff --git a/mrbgems/mruby-regexp/src/re_exec.c b/mrbgems/mruby-regexp/src/re_exec.c new file mode 100644 index 0000000000..8f6e047319 --- /dev/null +++ b/mrbgems/mruby-regexp/src/re_exec.c @@ -0,0 +1,625 @@ +/* +** re_exec.c - NFA execution engine (Pike VM) +** +** Executes compiled regexp bytecode using Thompson/Pike NFA simulation. +** O(pattern * text) time complexity guarantees ReDoS resistance. +** +** See Copyright Notice in mruby.h +*/ + +#include "re_internal.h" +#include + +/* + * Skip to the next position where the pattern's literal prefix could match. + * Uses memchr on the first byte for fast scanning, then verifies the rest. + * Returns the found position, or NULL if no match is possible. + */ +static const char* +skip_to_prefix(const mrb_regexp_pattern *pat, const char *sp, const char *str_end) +{ + if (pat->prefix_len == 0) return sp; + + uint8_t first = pat->prefix[0]; + int plen = pat->prefix_len; + + while (sp + plen <= str_end) { + const char *found = (const char*)memchr(sp, first, str_end - sp); + if (!found || found + plen > str_end) return NULL; + if (plen == 1 || memcmp(found + 1, pat->prefix + 1, plen - 1) == 0) { + return found; + } + sp = found + 1; + } + return NULL; +} + +/* Check if a byte is in the first-byte bitmap */ +#define FIRST_BYTE_OK(pat, ch) \ + ((ch) >= 128 || ((pat)->first_bytes[(ch) >> 3] & (1 << ((ch) & 7)))) + +/* Check if character matches a character class */ +static mrb_bool +class_match(const re_charclass *cc, uint8_t ch) +{ + if (ch >= 128) return cc->utf8_any; + return (cc->bitmap[ch >> 3] >> (ch & 7)) & 1; +} + +/* + * Pike VM with optimized thread storage. + * + * Key optimizations vs naive approach: + * - Captures stored in a flat pool, sized to actual ncap (not RE_MAX_CAPTURES) + * - Generation counter for visited[] eliminates per-step memset + * - Threads reference captures by pool index, avoiding 260-byte struct copies + */ + +typedef re_thread_cache re_thread; + +typedef struct { + re_thread *threads; + int count; + int capa; +} re_threadlist; + +/* All Pike VM state */ +typedef struct { + mrb_state *mrb; + const mrb_regexp_pattern *pat; + int ncap; /* actual capture count (num_captures * 2) */ + int *cap_pool; /* flat: cap_pool[slot * ncap .. (slot+1) * ncap) */ + int pool_next; /* next free slot */ + int pool_capa; /* total slots allocated */ + uint32_t *visited; /* generation-based */ + uint32_t gen; + const char *str; + const char *str_end; + mrb_bool matched; + mrb_bool match_only; /* true: skip capture tracking (match? path) */ + int *result_caps; /* best match (ncap ints) */ +} pike_state; + +static int +pool_alloc(pike_state *s) +{ + if (s->pool_next >= s->pool_capa) { + int new_capa = s->pool_capa * 2; + s->cap_pool = (int*)mrb_realloc(s->mrb, s->cap_pool, + sizeof(int) * new_capa * s->ncap); + s->pool_capa = new_capa; + } + return s->pool_next++; +} + +static int +pool_copy(pike_state *s, int src_slot) +{ + int dst = pool_alloc(s); + memcpy(&s->cap_pool[dst * s->ncap], + &s->cap_pool[src_slot * s->ncap], + sizeof(int) * s->ncap); + return dst; +} + +#define CAP(s, slot) (&(s)->cap_pool[(slot) * (s)->ncap]) + +/* Add thread following epsilon transitions. + visited[pc] == gen means already visited this step. */ +static void +add_thread(pike_state *s, re_threadlist *list, + uint32_t pc, int cap_slot, const char *sp) +{ + for (;;) { + if (pc >= s->pat->code_len) return; + if (s->visited[pc] == s->gen) return; + s->visited[pc] = s->gen; + + re_inst inst = s->pat->code[pc]; + switch (inst.op) { + case RE_JMP: + pc = inst.offset; + continue; + + case RE_SPLIT: + { + int cp = s->match_only ? 0 : pool_copy(s, cap_slot); + add_thread(s, list, inst.offset, cp, sp); + } + pc++; + continue; + + case RE_SPLITNG: + { + int cp = s->match_only ? 0 : pool_copy(s, cap_slot); + add_thread(s, list, pc + 1, cp, sp); + } + pc = inst.offset; + continue; + + case RE_SAVE: + if (!s->match_only) { + CAP(s, cap_slot)[inst.offset] = (int)(sp - s->str); + } + pc++; + continue; + + case RE_BOL: + if (sp == s->str || ((s->pat->flags & RE_FLAG_MULTILINE) && sp > s->str && sp[-1] == '\n')) { + pc++; continue; + } + return; + + case RE_EOL: + if (sp == s->str_end || ((s->pat->flags & RE_FLAG_MULTILINE) && *sp == '\n')) { + pc++; continue; + } + return; + + case RE_BOT: + if (sp == s->str) { pc++; continue; } + return; + + case RE_EOT: + if (sp == s->str_end) { pc++; continue; } + return; + + case RE_EOTNL: + if (sp == s->str_end || (sp + 1 == s->str_end && *sp == '\n')) { pc++; continue; } + return; + + case RE_WBOUND: + { + mrb_bool before = (sp > s->str) && mrb_re_is_word_char((uint8_t)sp[-1]); + mrb_bool after = (sp < s->str_end) && mrb_re_is_word_char((uint8_t)*sp); + if (before != after) { pc++; continue; } + } + return; + + case RE_NWBOUND: + { + mrb_bool before = (sp > s->str) && mrb_re_is_word_char((uint8_t)sp[-1]); + mrb_bool after = (sp < s->str_end) && mrb_re_is_word_char((uint8_t)*sp); + if (before == after) { pc++; continue; } + } + return; + + case RE_MATCH: + s->matched = TRUE; + if (s->result_caps) { + memcpy(s->result_caps, CAP(s, cap_slot), sizeof(int) * s->ncap); + } + return; + + default: + break; + } + break; + } + + if (list->count < list->capa) { + re_thread *t = &list->threads[list->count++]; + t->pc = pc; + t->cap_slot = cap_slot; + } +} + +static int +pike_vm(mrb_state *mrb, const mrb_regexp_pattern *pat, + const char *str, mrb_int len, mrb_int start, + int *captures, int captures_size) +{ + const char *sp = str + start; + const char *str_end = str + len; + int ncap = pat->num_captures * 2; + if (ncap == 0) ncap = 2; + + int list_capa = (int)pat->code_len * 2 + 16; + + mrb_bool match_only = (captures == NULL || captures_size == 0); + + /* Use cached VM state if available (avoids malloc per call) */ + mrb_regexp_pattern *mpat = (mrb_regexp_pattern*)pat; /* for cache_in_use flag */ + mrb_bool use_cache = !mpat->cache_in_use && mpat->cached_visited != NULL; + if (use_cache) mpat->cache_in_use = TRUE; + + pike_state s; + s.mrb = mrb; + s.pat = pat; + s.ncap = ncap; + s.str = str; + s.str_end = str_end; + s.matched = FALSE; + s.match_only = match_only; + s.gen = 1; + if (match_only) { + s.pool_capa = 1; + s.pool_next = 0; + s.cap_pool = (int*)mrb_malloc(mrb, sizeof(int) * ncap); + s.result_caps = NULL; + } + else { + s.pool_capa = list_capa * 2; + s.pool_next = 0; + s.cap_pool = (int*)mrb_malloc(mrb, sizeof(int) * s.pool_capa * ncap); + s.result_caps = (int*)mrb_malloc(mrb, sizeof(int) * ncap); + memset(s.result_caps, -1, sizeof(int) * ncap); + } + + re_threadlist curr, next; + if (use_cache) { + s.visited = mpat->cached_visited; + memset(s.visited, 0, sizeof(uint32_t) * (pat->code_len + 1)); + curr.threads = (re_thread*)mpat->cached_threads[0]; + next.threads = (re_thread*)mpat->cached_threads[1]; + curr.capa = next.capa = mpat->cached_list_capa; + } + else { + s.visited = (uint32_t*)mrb_calloc(mrb, pat->code_len + 1, sizeof(uint32_t)); + curr.threads = (re_thread*)mrb_malloc(mrb, sizeof(re_thread) * list_capa); + next.threads = (re_thread*)mrb_malloc(mrb, sizeof(re_thread) * list_capa); + curr.capa = next.capa = list_capa; + } + curr.count = next.count = 0; + + for (; sp <= str_end; sp++) { + if (!s.matched) { + /* Skip ahead when no active threads */ + if (curr.count == 0) { + if (pat->prefix_len > 0) { + const char *skip = skip_to_prefix(pat, sp, str_end); + if (!skip) break; + sp = skip; + } + else if (pat->has_first_bytes) { + while (sp < str_end && !FIRST_BYTE_OK(pat, (uint8_t)*sp)) sp++; + if (sp > str_end) break; + } + } + int slot = match_only ? 0 : pool_alloc(&s); + if (!match_only) memset(CAP(&s, slot), -1, sizeof(int) * ncap); + s.gen++; + add_thread(&s, &curr, 0, slot, sp); + if (s.matched && curr.count == 0) break; + } + + if (sp >= str_end) break; + + if (!match_only) { + /* Compact: copy live thread captures to the front of the pool. */ + for (int i = 0; i < curr.count; i++) { + if (curr.threads[i].cap_slot != i) { + memcpy(CAP(&s, i), CAP(&s, curr.threads[i].cap_slot), + sizeof(int) * ncap); + curr.threads[i].cap_slot = i; + } + } + s.pool_next = curr.count; + } + + s.gen++; + next.count = 0; + + int ch = (uint8_t)*sp; + int advance = mrb_re_utf8_charlen(sp, str_end); + + for (int i = 0; i < curr.count; i++) { + re_thread *th = &curr.threads[i]; + if (th->pc >= pat->code_len) continue; + + re_inst inst = pat->code[th->pc]; + switch (inst.op) { + case RE_CHAR: + if (ch == inst.a) { + int cp = match_only ? 0 : pool_copy(&s, th->cap_slot); + add_thread(&s, &next, th->pc + 1, cp, sp + 1); + } + break; + + case RE_ANY: + if (ch != '\n') { + int cp = match_only ? 0 : pool_copy(&s, th->cap_slot); + add_thread(&s, &next, th->pc + 1, cp, sp + advance); + } + break; + + case RE_ANY_NL: + { + int cp = match_only ? 0 : pool_copy(&s, th->cap_slot); + add_thread(&s, &next, th->pc + 1, cp, sp + advance); + } + break; + + case RE_CLASS: + if (class_match(&pat->classes[inst.a], (uint8_t)ch)) { + int cp = match_only ? 0 : pool_copy(&s, th->cap_slot); + add_thread(&s, &next, th->pc + 1, cp, sp + advance); + } + break; + + case RE_NCLASS: + if (!class_match(&pat->classes[inst.a], (uint8_t)ch)) { + int cp = match_only ? 0 : pool_copy(&s, th->cap_slot); + add_thread(&s, &next, th->pc + 1, cp, sp + advance); + } + break; + + default: + break; + } + } + + /* swap curr and next */ + { + re_threadlist tmp = curr; + curr = next; + next = tmp; + } + + if (s.matched && curr.count == 0) break; + } + + int ret = 0; + if (s.matched) { + if (captures && s.result_caps) { + int copy = ncap < captures_size ? ncap : captures_size; + memcpy(captures, s.result_caps, sizeof(int) * copy); + } + ret = ncap > 0 ? ncap : 1; + } + + if (use_cache) { + mpat->cache_in_use = FALSE; + } + else { + mrb_free(mrb, curr.threads); + mrb_free(mrb, next.threads); + mrb_free(mrb, s.visited); + } + mrb_free(mrb, s.cap_pool); + if (s.result_caps) mrb_free(mrb, s.result_caps); + + return ret; +} + +/* + * Backtracking engine for patterns with backreferences. + * Step-limited to prevent ReDoS. + */ +static mrb_bool +bt_match(const mrb_regexp_pattern *pat, const char *str, const char *str_end, + const char *sp, uint32_t pc, int *captures, int ncap, int *steps, + int depth) +{ + if (depth > MRB_REGEXP_RECURSION_LIMIT) return FALSE; + while (pc < pat->code_len) { + if (++(*steps) > MRB_REGEXP_STEP_LIMIT) return FALSE; + + re_inst inst = pat->code[pc]; + switch (inst.op) { + case RE_CHAR: + if (sp >= str_end || (uint8_t)*sp != inst.a) return FALSE; + sp++; pc++; + break; + + case RE_ANY: + if (sp >= str_end || *sp == '\n') return FALSE; + sp += mrb_re_utf8_charlen(sp, str_end); pc++; + break; + + case RE_ANY_NL: + if (sp >= str_end) return FALSE; + sp += mrb_re_utf8_charlen(sp, str_end); pc++; + break; + + case RE_CLASS: + if (sp >= str_end || !class_match(&pat->classes[inst.a], (uint8_t)*sp)) return FALSE; + sp += mrb_re_utf8_charlen(sp, str_end); pc++; + break; + + case RE_NCLASS: + if (sp >= str_end || class_match(&pat->classes[inst.a], (uint8_t)*sp)) return FALSE; + sp += mrb_re_utf8_charlen(sp, str_end); pc++; + break; + + case RE_MATCH: + return TRUE; + + case RE_JMP: + pc = inst.offset; + break; + + case RE_SPLIT: + if (bt_match(pat, str, str_end, sp, pc + 1, captures, ncap, steps, depth + 1)) return TRUE; + pc = inst.offset; + break; + + case RE_SPLITNG: + if (bt_match(pat, str, str_end, sp, inst.offset, captures, ncap, steps, depth + 1)) return TRUE; + pc++; + break; + + case RE_SAVE: + { + int slot = inst.offset; + if (slot < ncap) { + int old = captures[slot]; + captures[slot] = (int)(sp - str); + if (bt_match(pat, str, str_end, sp, pc + 1, captures, ncap, steps, depth + 1)) return TRUE; + captures[slot] = old; + } + return FALSE; + } + + case RE_BOL: + if (sp != str && !(pat->flags & RE_FLAG_MULTILINE && sp > str && sp[-1] == '\n')) return FALSE; + pc++; + break; + + case RE_EOL: + if (sp != str_end && !(pat->flags & RE_FLAG_MULTILINE && *sp == '\n')) return FALSE; + pc++; + break; + + case RE_BOT: + if (sp != str) return FALSE; + pc++; + break; + + case RE_EOT: + if (sp != str_end) return FALSE; + pc++; + break; + + case RE_WBOUND: + { + mrb_bool before = (sp > str) && mrb_re_is_word_char((uint8_t)sp[-1]); + mrb_bool after = (sp < str_end) && mrb_re_is_word_char((uint8_t)*sp); + if (before == after) return FALSE; + } + pc++; + break; + + case RE_NWBOUND: + { + mrb_bool before = (sp > str) && mrb_re_is_word_char((uint8_t)sp[-1]); + mrb_bool after = (sp < str_end) && mrb_re_is_word_char((uint8_t)*sp); + if (before != after) return FALSE; + } + pc++; + break; + + case RE_BACKREF: + { + int group = inst.a; + if (group * 2 + 1 >= ncap) return FALSE; + int gs = captures[group * 2]; + int ge = captures[group * 2 + 1]; + if (gs < 0 || ge < 0) return FALSE; + int blen = ge - gs; + if (sp + blen > str_end) return FALSE; + if (memcmp(sp, str + gs, blen) != 0) return FALSE; + sp += blen; + pc++; + } + break; + + case RE_LOOKAHEAD: + if (!bt_match(pat, str, str_end, sp, pc + 1, captures, ncap, steps, depth + 1)) + return FALSE; + pc = inst.offset; + break; + + case RE_NEG_LOOKAHEAD: + if (bt_match(pat, str, str_end, sp, pc + 1, captures, ncap, steps, depth + 1)) + return FALSE; + pc = inst.offset; + break; + + case RE_LOOKBEHIND: + { + int lb_len = inst.a; + if (sp - str < lb_len) return FALSE; /* not enough text before */ + if (!bt_match(pat, str, str_end, sp - lb_len, pc + 1, captures, ncap, steps, depth + 1)) + return FALSE; + pc = inst.offset; + } + break; + + case RE_NEG_LOOKBEHIND: + { + int lb_len = inst.a; + if (sp - str >= lb_len) { + if (bt_match(pat, str, str_end, sp - lb_len, pc + 1, captures, ncap, steps, depth + 1)) + return FALSE; + } + /* if not enough text before, negative lookbehind succeeds */ + pc = inst.offset; + } + break; + + default: + return FALSE; + } + } + return FALSE; +} + +static int +backtrack_exec(mrb_state *mrb, const mrb_regexp_pattern *pat, + const char *str, mrb_int len, mrb_int start, + int *captures, int captures_size) +{ + const char *str_end = str + len; + int ncap = pat->num_captures * 2; + if (ncap == 0) ncap = 2; + + int *caps = (int*)mrb_malloc(mrb, sizeof(int) * ncap); + + for (const char *sp = str + start; sp <= str_end; sp++) { + /* Skip ahead using literal prefix or first-byte bitmap */ + if (pat->prefix_len > 0) { + const char *skip = skip_to_prefix(pat, sp, str_end); + if (!skip) break; + sp = skip; + } + else if (pat->has_first_bytes) { + while (sp < str_end && !FIRST_BYTE_OK(pat, (uint8_t)*sp)) sp++; + if (sp > str_end) break; + } + memset(caps, -1, sizeof(int) * ncap); + int steps = 0; + + if (bt_match(pat, str, str_end, sp, 0, caps, ncap, &steps, 0)) { + if (captures) { + int copy = ncap < captures_size ? ncap : captures_size; + memcpy(captures, caps, sizeof(int) * copy); + } + mrb_free(mrb, caps); + return ncap > 0 ? ncap : 1; + } + } + mrb_free(mrb, caps); + return 0; +} + +/* Fast path for pure literal patterns: use memchr+memcmp, no NFA needed */ +static int +literal_exec(const mrb_regexp_pattern *pat, + const char *str, mrb_int len, mrb_int start, + int *captures, int captures_size) +{ + const char *sp = str + start; + const char *str_end = str + len; + int plen = pat->prefix_len; + + while (sp + plen <= str_end) { + const char *found = (const char*)memchr(sp, pat->prefix[0], str_end - sp); + if (!found || found + plen > str_end) return 0; + if (plen == 1 || memcmp(found + 1, pat->prefix + 1, plen - 1) == 0) { + /* match found */ + if (captures && captures_size >= 2) { + captures[0] = (int)(found - str); + captures[1] = (int)(found - str) + plen; + } + return 2; /* group 0 start/end */ + } + sp = found + 1; + } + return 0; +} + +/* Public entry point */ +int +mrb_re_exec(mrb_state *mrb, const mrb_regexp_pattern *pat, + const char *str, mrb_int len, mrb_int start, + int *captures, int captures_size) +{ + if (pat->is_literal) { + return literal_exec(pat, str, len, start, captures, captures_size); + } + if (pat->has_backref || pat->needs_backtrack) { + return backtrack_exec(mrb, pat, str, len, start, captures, captures_size); + } + return pike_vm(mrb, pat, str, len, start, captures, captures_size); +} diff --git a/mrbgems/mruby-regexp/src/re_utf8.c b/mrbgems/mruby-regexp/src/re_utf8.c new file mode 100644 index 0000000000..148d598586 --- /dev/null +++ b/mrbgems/mruby-regexp/src/re_utf8.c @@ -0,0 +1,76 @@ +/* +** re_utf8.c - UTF-8 utility functions for regexp engine +** +** See Copyright Notice in mruby.h +*/ + +#include "re_internal.h" + +/* Return byte length of UTF-8 character at s. + Returns 1 for invalid sequences (treat as single byte). */ +int +mrb_re_utf8_charlen(const char *s, const char *end) +{ + uint8_t c = (uint8_t)*s; + int len; + + if (c < 0x80) return 1; + else if (c < 0xc0) return 1; /* invalid continuation */ + else if (c < 0xe0) len = 2; + else if (c < 0xf0) len = 3; + else if (c < 0xf8) len = 4; + else return 1; /* invalid */ + + if (s + len > end) return 1; /* truncated */ + return len; +} + +/* Decode a UTF-8 character and return its codepoint. + *len is set to the byte length consumed. */ +uint32_t +mrb_re_utf8_decode(const char *s, int *len) +{ + uint8_t c = (uint8_t)s[0]; + uint32_t cp; + + if (c < 0x80) { + *len = 1; + return c; + } + else if (c < 0xc0) { + *len = 1; + return c; /* invalid, return as-is */ + } + else if (c < 0xe0) { + *len = 2; + cp = (c & 0x1f) << 6; + cp |= ((uint8_t)s[1] & 0x3f); + return cp; + } + else if (c < 0xf0) { + *len = 3; + cp = (c & 0x0f) << 12; + cp |= ((uint8_t)s[1] & 0x3f) << 6; + cp |= ((uint8_t)s[2] & 0x3f); + return cp; + } + else { + *len = 4; + cp = (c & 0x07) << 18; + cp |= ((uint8_t)s[1] & 0x3f) << 12; + cp |= ((uint8_t)s[2] & 0x3f) << 6; + cp |= ((uint8_t)s[3] & 0x3f); + return cp; + } +} + +/* Check if character is a "word" character (\w): [a-zA-Z0-9_] */ +mrb_bool +mrb_re_is_word_char(uint32_t c) +{ + if (c >= 'a' && c <= 'z') return TRUE; + if (c >= 'A' && c <= 'Z') return TRUE; + if (c >= '0' && c <= '9') return TRUE; + if (c == '_') return TRUE; + return FALSE; +} diff --git a/mrbgems/mruby-regexp/src/regexp.c b/mrbgems/mruby-regexp/src/regexp.c new file mode 100644 index 0000000000..fa06557f55 --- /dev/null +++ b/mrbgems/mruby-regexp/src/regexp.c @@ -0,0 +1,977 @@ +/* +** regexp.c - Regexp class and MatchData class +** +** See Copyright Notice in mruby.h +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "re_internal.h" + +#include + +/* Regexp data type */ +static void regexp_free(mrb_state *mrb, void *ptr) { + mrb_re_free(mrb, (mrb_regexp_pattern*)ptr); +} + +static const struct mrb_data_type regexp_type = { "Regexp", regexp_free }; + +/* MatchData */ +typedef struct { + mrb_value source; /* source string */ + mrb_value regexp; /* Regexp object (for named captures) */ + int *captures; /* capture positions [start0,end0,start1,end1,...] */ + int num_captures; /* number of capture groups (including 0) */ +} mrb_match_data; + +static void matchdata_free(mrb_state *mrb, void *ptr) { + mrb_match_data *md = (mrb_match_data*)ptr; + if (md) { + mrb_free(mrb, md->captures); + mrb_free(mrb, md); + } +} + +static const struct mrb_data_type matchdata_type = { "MatchData", matchdata_free }; + +/* Get internal flags from Regexp object */ +static uint32_t +get_iflags(mrb_state *mrb, mrb_value self) +{ + mrb_value v = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@flags")); + return mrb_nil_p(v) ? 0 : (uint32_t)mrb_integer(v); +} + +/* Parse flags from string or integer */ +static uint32_t +parse_flags(mrb_state *mrb, mrb_value flags_val) +{ + uint32_t flags = 0; + if (mrb_integer_p(flags_val)) { + mrb_int f = mrb_integer(flags_val); + if (f & 1) flags |= RE_FLAG_IGNORECASE; + if (f & 2) flags |= RE_FLAG_EXTENDED; + if (f & 4) flags |= RE_FLAG_MULTILINE | RE_FLAG_DOTALL; + return flags; + } + if (mrb_string_p(flags_val)) { + const char *s = RSTRING_PTR(flags_val); + mrb_int len = RSTRING_LEN(flags_val); + for (mrb_int i = 0; i < len; i++) { + switch (s[i]) { + case 'i': flags |= RE_FLAG_IGNORECASE; break; + case 'm': flags |= RE_FLAG_MULTILINE | RE_FLAG_DOTALL; break; + case 'x': flags |= RE_FLAG_EXTENDED; break; + } + } + return flags; + } + if (mrb_test(flags_val)) flags |= RE_FLAG_IGNORECASE; + return flags; +} + +/* + * Regexp.new(pattern, flags=nil) + * Regexp.new(regexp) + * Regexp.compile(pattern, flags=nil) + */ +static mrb_value +regexp_init(mrb_state *mrb, mrb_value self) +{ + mrb_value pattern; + mrb_value flags_val = mrb_nil_value(); + mrb_regexp_pattern *pat; + + mrb_get_args(mrb, "o|o", &pattern, &flags_val); + + uint32_t flags; + + /* If pattern is a Regexp, copy its source and flags */ + if (mrb_obj_is_kind_of(mrb, pattern, mrb_class_get(mrb, "Regexp"))) { + mrb_value iflags = mrb_iv_get(mrb, pattern, mrb_intern_lit(mrb, "@flags")); + flags = mrb_nil_p(iflags) ? 0 : (uint32_t)mrb_integer(iflags); + pattern = mrb_iv_get(mrb, pattern, mrb_intern_lit(mrb, "@source")); + } + else { + if (!mrb_string_p(pattern)) { + mrb_raise(mrb, E_TYPE_ERROR, "wrong argument type (expected String or Regexp)"); + } + flags = parse_flags(mrb, flags_val); + } + + /* Set @source and @flags before mrb_re_compile() so a Regexp that survives + a compile-time exception (e.g. picked up by ObjectSpace.each_object) + still has usable IVs for hash/eql?/inspect. */ + mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "@source"), pattern); + mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "@flags"), mrb_int_value(mrb, (mrb_int)flags)); + + pat = mrb_re_compile(mrb, RSTRING_PTR(pattern), RSTRING_LEN(pattern), flags); + + DATA_TYPE(self) = ®exp_type; + DATA_PTR(self) = pat; + + /* store named captures as hash */ + if (pat->num_named > 0) { + mrb_value nc = mrb_hash_new_capa(mrb, pat->num_named); + for (uint16_t i = 0; i < pat->num_named; i++) { + mrb_value name = mrb_str_new(mrb, pat->named_captures[i].name, pat->named_captures[i].name_len); + mrb_hash_set(mrb, nc, name, mrb_fixnum_value(pat->named_captures[i].group)); + } + mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "@named_captures"), nc); + } + + return self; +} + +/* Pre-interned symbols for $1-$9 (cached on first use) */ +static mrb_sym nth_syms[9]; + +static void +ensure_nth_syms(mrb_state *mrb) +{ + if (nth_syms[0]) return; + nth_syms[0] = mrb_intern_lit(mrb, "$1"); + nth_syms[1] = mrb_intern_lit(mrb, "$2"); + nth_syms[2] = mrb_intern_lit(mrb, "$3"); + nth_syms[3] = mrb_intern_lit(mrb, "$4"); + nth_syms[4] = mrb_intern_lit(mrb, "$5"); + nth_syms[5] = mrb_intern_lit(mrb, "$6"); + nth_syms[6] = mrb_intern_lit(mrb, "$7"); + nth_syms[7] = mrb_intern_lit(mrb, "$8"); + nth_syms[8] = mrb_intern_lit(mrb, "$9"); +} + +static void +clear_match_globals(mrb_state *mrb) +{ + ensure_nth_syms(mrb); + mrb_gv_set(mrb, mrb_intern_lit(mrb, "$~"), mrb_nil_value()); + for (int i = 0; i < 9; i++) { + mrb_gv_set(mrb, nth_syms[i], mrb_nil_value()); + } +} + +/* Create MatchData from captures */ +static mrb_value +create_matchdata(mrb_state *mrb, mrb_value regexp, mrb_value str, int *captures, int ncap) +{ + ensure_nth_syms(mrb); + + struct RClass *md_class = mrb_class_get(mrb, "MatchData"); + mrb_match_data *md = (mrb_match_data*)mrb_malloc(mrb, sizeof(mrb_match_data)); + md->source = str; + md->regexp = regexp; + md->num_captures = ncap / 2; + md->captures = (int*)mrb_malloc(mrb, sizeof(int) * ncap); + memcpy(md->captures, captures, sizeof(int) * ncap); + + mrb_value obj = mrb_obj_value(mrb_data_object_alloc(mrb, md_class, md, &matchdata_type)); + /* Keep `source` and `regexp` GC-reachable via instance variables. + * The mrb_values are also held in mrb_match_data, but C-allocated + * structs are not scanned by the GC. */ + mrb_iv_set(mrb, obj, mrb_intern_lit(mrb, "source"), str); + mrb_iv_set(mrb, obj, mrb_intern_lit(mrb, "regexp"), regexp); + mrb_gv_set(mrb, mrb_intern_lit(mrb, "$~"), obj); + + /* set $1-$9 from captures */ + for (int i = 0; i < 9; i++) { + mrb_value val = mrb_nil_value(); + int g = i + 1; + if (g < md->num_captures && captures[g*2] >= 0) { + val = mrb_str_substr(mrb, str, captures[g*2], captures[g*2+1] - captures[g*2]); + } + mrb_gv_set(mrb, nth_syms[i], val); + } + + return obj; +} + +/* Internal: execute match and create MatchData. + Returns MatchData on match, nil on no match. + Sets $~ and $1-$9 globals. */ +static mrb_value +exec_match(mrb_state *mrb, mrb_value self, mrb_value str, mrb_int pos) +{ + mrb_regexp_pattern *pat = DATA_GET_PTR(mrb, self, ®exp_type, mrb_regexp_pattern); + if (!pat) mrb_raise(mrb, E_ARGUMENT_ERROR, "uninitialized Regexp"); + + int cap_size = pat->num_captures * 2; + int *captures = (int*)mrb_malloc(mrb, sizeof(int) * cap_size); + memset(captures, -1, sizeof(int) * cap_size); + int ncap = mrb_re_exec(mrb, pat, RSTRING_PTR(str), RSTRING_LEN(str), pos, + captures, cap_size); + + if (ncap == 0) { + mrb_free(mrb, captures); + clear_match_globals(mrb); + return mrb_nil_value(); + } + mrb_value md = create_matchdata(mrb, self, str, captures, cap_size); + mrb_free(mrb, captures); + return md; +} + +/* + * Regexp#match(str, pos=0) + */ +static mrb_value +regexp_match(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + mrb_int pos = 0; + mrb_get_args(mrb, "S|i", &str, &pos); + return exec_match(mrb, self, str, pos); +} + +/* + * Regexp#match?(str, pos=0) + */ +static mrb_value +regexp_match_p(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + mrb_int pos = 0; + mrb_get_args(mrb, "S|i", &str, &pos); + + mrb_regexp_pattern *pat = DATA_GET_PTR(mrb, self, ®exp_type, mrb_regexp_pattern); + if (!pat) mrb_raise(mrb, E_ARGUMENT_ERROR, "uninitialized Regexp"); + + int ncap = mrb_re_exec(mrb, pat, RSTRING_PTR(str), RSTRING_LEN(str), pos, NULL, 0); + return mrb_bool_value(ncap > 0); +} + +/* + * Regexp#=~(str) + */ +static mrb_value +regexp_match_op(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + mrb_get_args(mrb, "o", &str); + if (mrb_nil_p(str)) return mrb_nil_value(); + mrb_ensure_string_type(mrb, str); + + mrb_value md = exec_match(mrb, self, str, 0); + if (mrb_nil_p(md)) return mrb_nil_value(); + + mrb_match_data *m = DATA_GET_PTR(mrb, md, &matchdata_type, mrb_match_data); + return mrb_int_value(mrb, m->captures[0]); +} + +/* + * Regexp#===(str) + */ +static mrb_value +regexp_case_match(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + mrb_regexp_pattern *pat; + + mrb_get_args(mrb, "o", &str); + if (!mrb_string_p(str)) return mrb_false_value(); + + pat = DATA_GET_PTR(mrb, self, ®exp_type, mrb_regexp_pattern); + if (!pat) return mrb_false_value(); + + int ncap = mrb_re_exec(mrb, pat, RSTRING_PTR(str), RSTRING_LEN(str), 0, NULL, 0); + return mrb_bool_value(ncap > 0); +} + +/* + * Regexp#source + */ +static mrb_value +regexp_source(mrb_state *mrb, mrb_value self) +{ + return mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source")); +} + +/* + * Regexp#options - convert internal flags to Ruby constants + * Internal: IGNORECASE=1, MULTILINE=2, DOTALL=4, EXTENDED=8 + * Ruby: IGNORECASE=1, EXTENDED=2, MULTILINE=4 + */ +static mrb_value +regexp_options(mrb_state *mrb, mrb_value self) +{ + uint32_t iflags = get_iflags(mrb, self); + mrb_int opts = 0; + if (iflags & RE_FLAG_IGNORECASE) opts |= 1; /* Regexp::IGNORECASE */ + if (iflags & RE_FLAG_EXTENDED) opts |= 2; /* Regexp::EXTENDED */ + if (iflags & RE_FLAG_MULTILINE) opts |= 4; /* Regexp::MULTILINE */ + return mrb_fixnum_value(opts); +} + +/* + * Regexp#casefold? + */ +static mrb_value +regexp_casefold_p(mrb_state *mrb, mrb_value self) +{ + return mrb_bool_value((get_iflags(mrb, self) & RE_FLAG_IGNORECASE) != 0); +} + +/* + * Regexp#to_s - CRuby-compatible (?flags:source) format + */ +static mrb_value +regexp_to_s(mrb_state *mrb, mrb_value self) +{ + mrb_value src = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source")); + uint32_t flags = get_iflags(mrb, self); + + mrb_value result = mrb_str_new_lit(mrb, "(?"); + if (flags & RE_FLAG_IGNORECASE) mrb_str_cat_lit(mrb, result, "i"); + if (flags & RE_FLAG_MULTILINE) mrb_str_cat_lit(mrb, result, "m"); + if (flags & RE_FLAG_EXTENDED) mrb_str_cat_lit(mrb, result, "x"); + mrb_str_cat_lit(mrb, result, ":"); + mrb_str_cat_str(mrb, result, src); + mrb_str_cat_lit(mrb, result, ")"); + return result; +} + +static mrb_value +regexp_inspect(mrb_state *mrb, mrb_value self) +{ + mrb_value src = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source")); + uint32_t flags = get_iflags(mrb, self); + + mrb_value result = mrb_str_new_lit(mrb, "/"); + mrb_str_cat_str(mrb, result, src); + mrb_str_cat_lit(mrb, result, "/"); + if (flags & RE_FLAG_IGNORECASE) mrb_str_cat_lit(mrb, result, "i"); + if (flags & RE_FLAG_MULTILINE) mrb_str_cat_lit(mrb, result, "m"); + if (flags & RE_FLAG_EXTENDED) mrb_str_cat_lit(mrb, result, "x"); + return result; +} + +/* + * Regexp#== (and eql?) + */ +static mrb_value +regexp_eql(mrb_state *mrb, mrb_value self) +{ + mrb_value other; + mrb_get_args(mrb, "o", &other); + if (!mrb_obj_is_kind_of(mrb, other, mrb_class_get(mrb, "Regexp"))) { + return mrb_false_value(); + } + mrb_value src1 = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source")); + mrb_value src2 = mrb_iv_get(mrb, other, mrb_intern_lit(mrb, "@source")); + if (!mrb_string_p(src1) || !mrb_string_p(src2)) { + return mrb_bool_value(mrb_obj_eq(mrb, self, other)); + } + if (!mrb_str_equal(mrb, src1, src2)) return mrb_false_value(); + return mrb_bool_value(get_iflags(mrb, self) == get_iflags(mrb, other)); +} + +/* + * Regexp#hash + */ +static mrb_value +regexp_hash(mrb_state *mrb, mrb_value self) +{ + mrb_value src = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source")); + uint32_t h = mrb_string_p(src) ? mrb_str_hash(mrb, src) : 0; + h ^= get_iflags(mrb, self) * 0x9e3779b9; /* mix flags into hash */ + return mrb_int_value(mrb, (mrb_int)h); +} + +/* + * Regexp.escape(str) + */ +static mrb_value +regexp_escape(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + mrb_get_args(mrb, "S", &str); + + const char *s = RSTRING_PTR(str); + mrb_int len = RSTRING_LEN(str); + mrb_value result = mrb_str_new_capa(mrb, len + len / 4); + + for (mrb_int i = 0; i < len; i++) { + char c = s[i]; + switch (c) { + case '\\': case '.': case '*': case '+': case '?': case '|': + case '(': case ')': case '[': case ']': case '{': case '}': + case '^': case '$': + mrb_str_cat_lit(mrb, result, "\\"); + /* fall through */ + default: + mrb_str_cat(mrb, result, &c, 1); + break; + } + } + return result; +} + +/* --- MatchData methods --- */ + +/* + * MatchData#[](n) + */ +static mrb_value +matchdata_aref(mrb_state *mrb, mrb_value self) +{ + mrb_value arg; + mrb_get_args(mrb, "o", &arg); + + mrb_match_data *md = DATA_GET_PTR(mrb, self, &matchdata_type, mrb_match_data); + if (!md) return mrb_nil_value(); + + mrb_int idx; + if (mrb_string_p(arg) || mrb_symbol_p(arg)) { + /* named capture access */ + const char *name; + mrb_int name_len; + if (mrb_symbol_p(arg)) { + name = mrb_sym_name_len(mrb, mrb_symbol(arg), &name_len); + } + else { + name = RSTRING_PTR(arg); + name_len = RSTRING_LEN(arg); + } + /* look up name in regexp's named captures */ + mrb_regexp_pattern *pat = NULL; + if (!mrb_nil_p(md->regexp)) { + pat = DATA_GET_PTR(mrb, md->regexp, ®exp_type, mrb_regexp_pattern); + } + if (pat) { + for (uint16_t i = 0; i < pat->num_named; i++) { + if (pat->named_captures[i].name_len == (uint16_t)name_len && + memcmp(pat->named_captures[i].name, name, name_len) == 0) { + idx = pat->named_captures[i].group; + goto found; + } + } + } + return mrb_nil_value(); + } + else { + idx = mrb_as_int(mrb, arg); + } + +found: + if (idx < 0 || idx >= md->num_captures) return mrb_nil_value(); + int start = md->captures[idx * 2]; + int end = md->captures[idx * 2 + 1]; + if (start < 0) return mrb_nil_value(); + + return mrb_str_substr(mrb, md->source, start, end - start); +} + +/* Build array of capture strings from group `from` to num_captures-1 */ +static mrb_value +matchdata_to_ary(mrb_state *mrb, mrb_value self, int from) +{ + mrb_match_data *md = DATA_GET_PTR(mrb, self, &matchdata_type, mrb_match_data); + if (!md) return mrb_ary_new(mrb); + + mrb_value ary = mrb_ary_new_capa(mrb, md->num_captures - from); + for (int i = from; i < md->num_captures; i++) { + int s = md->captures[i * 2]; + int e = md->captures[i * 2 + 1]; + if (s < 0) { + mrb_ary_push(mrb, ary, mrb_nil_value()); + } + else { + mrb_ary_push(mrb, ary, mrb_str_substr(mrb, md->source, s, e - s)); + } + } + return ary; +} + +static mrb_value +matchdata_captures(mrb_state *mrb, mrb_value self) +{ + return matchdata_to_ary(mrb, self, 1); +} + +static mrb_value +matchdata_to_a(mrb_state *mrb, mrb_value self) +{ + return matchdata_to_ary(mrb, self, 0); +} + +/* + * MatchData#begin(n) / MatchData#end(n) + */ +static mrb_value +matchdata_begin(mrb_state *mrb, mrb_value self) +{ + mrb_int idx; + mrb_get_args(mrb, "i", &idx); + + mrb_match_data *md = DATA_GET_PTR(mrb, self, &matchdata_type, mrb_match_data); + if (!md || idx < 0 || idx >= md->num_captures) return mrb_nil_value(); + int pos = md->captures[idx * 2]; + if (pos < 0) return mrb_nil_value(); + return mrb_int_value(mrb, pos); +} + +static mrb_value +matchdata_end(mrb_state *mrb, mrb_value self) +{ + mrb_int idx; + mrb_get_args(mrb, "i", &idx); + + mrb_match_data *md = DATA_GET_PTR(mrb, self, &matchdata_type, mrb_match_data); + if (!md || idx < 0 || idx >= md->num_captures) return mrb_nil_value(); + int pos = md->captures[idx * 2 + 1]; + if (pos < 0) return mrb_nil_value(); + return mrb_int_value(mrb, pos); +} + +/* + * MatchData#pre_match / #post_match + */ +static mrb_value +matchdata_pre(mrb_state *mrb, mrb_value self) +{ + mrb_match_data *md = DATA_GET_PTR(mrb, self, &matchdata_type, mrb_match_data); + if (!md || md->captures[0] < 0) return mrb_nil_value(); + return mrb_str_substr(mrb, md->source, 0, md->captures[0]); +} + +static mrb_value +matchdata_post(mrb_state *mrb, mrb_value self) +{ + mrb_match_data *md = DATA_GET_PTR(mrb, self, &matchdata_type, mrb_match_data); + if (!md || md->captures[1] < 0) return mrb_nil_value(); + int pos = md->captures[1]; + return mrb_str_substr(mrb, md->source, pos, RSTRING_LEN(md->source) - pos); +} + +/* + * MatchData#length / #size + */ +static mrb_value +matchdata_length(mrb_state *mrb, mrb_value self) +{ + mrb_match_data *md = DATA_GET_PTR(mrb, self, &matchdata_type, mrb_match_data); + if (!md) return mrb_fixnum_value(0); + return mrb_fixnum_value(md->num_captures); +} + +/* + * MatchData#named_captures + */ +static mrb_value +matchdata_named_captures(mrb_state *mrb, mrb_value self) +{ + mrb_match_data *md = DATA_GET_PTR(mrb, self, &matchdata_type, mrb_match_data); + if (!md) return mrb_hash_new(mrb); + + mrb_regexp_pattern *pat = NULL; + if (!mrb_nil_p(md->regexp)) { + pat = DATA_GET_PTR(mrb, md->regexp, ®exp_type, mrb_regexp_pattern); + } + if (!pat || pat->num_named == 0) return mrb_hash_new(mrb); + + mrb_value result = mrb_hash_new_capa(mrb, pat->num_named); + for (uint16_t i = 0; i < pat->num_named; i++) { + mrb_value name = mrb_str_new(mrb, pat->named_captures[i].name, pat->named_captures[i].name_len); + int group = pat->named_captures[i].group; + mrb_value val = mrb_nil_value(); + if (group >= 0 && group < md->num_captures) { + int s = md->captures[group * 2]; + int e = md->captures[group * 2 + 1]; + if (s >= 0) val = mrb_str_substr(mrb, md->source, s, e - s); + } + mrb_hash_set(mrb, result, name, val); + } + return result; +} + +/* + * MatchData#string - the original string (frozen copy) + */ +static mrb_value +matchdata_string(mrb_state *mrb, mrb_value self) +{ + mrb_match_data *md = DATA_GET_PTR(mrb, self, &matchdata_type, mrb_match_data); + if (!md) return mrb_nil_value(); + return md->source; +} + +/* + * MatchData#regexp - the Regexp used + */ +static mrb_value +matchdata_regexp(mrb_state *mrb, mrb_value self) +{ + mrb_match_data *md = DATA_GET_PTR(mrb, self, &matchdata_type, mrb_match_data); + if (!md) return mrb_nil_value(); + return md->regexp; +} + +/* + * MatchData#to_s - full match string + */ +static mrb_value +matchdata_to_s(mrb_state *mrb, mrb_value self) +{ + mrb_match_data *md = DATA_GET_PTR(mrb, self, &matchdata_type, mrb_match_data); + if (!md || md->captures[0] < 0) return mrb_nil_value(); + int s = md->captures[0]; + int e = md->captures[1]; + return mrb_str_substr(mrb, md->source, s, e - s); +} + +/* --- C-level gsub/sub/scan core --- */ + +/* Process replacement string: expand \0-\9, \&, \`, \', \+, \\ */ +static void +apply_replacement(mrb_state *mrb, mrb_value result, + const char *rep, mrb_int rep_len, + const char *str, int *captures, int ncap) +{ + mrb_int i = 0; + while (i < rep_len) { + if (rep[i] == '\\' && i + 1 < rep_len) { + char c = rep[i + 1]; + if (c >= '0' && c <= '9') { + int g = c - '0'; + if (g < ncap && captures[g * 2] >= 0) { + int s = captures[g * 2], e = captures[g * 2 + 1]; + mrb_str_cat(mrb, result, str + s, e - s); + } + } + else if (c == '&') { + if (captures[0] >= 0) { + mrb_str_cat(mrb, result, str + captures[0], captures[1] - captures[0]); + } + } + else if (c == '`') { + if (captures[0] >= 0) { + mrb_str_cat(mrb, result, str, captures[0]); + } + } + else if (c == '\'') { + if (captures[1] >= 0) { + mrb_str_cat(mrb, result, str + captures[1], strlen(str) - captures[1]); + } + } + else if (c == '+') { + /* last successful capture */ + for (int g = ncap - 1; g >= 1; g--) { + if (captures[g * 2] >= 0) { + int s = captures[g * 2], e = captures[g * 2 + 1]; + mrb_str_cat(mrb, result, str + s, e - s); + break; + } + } + } + else if (c == '\\') { + mrb_str_cat_lit(mrb, result, "\\"); + } + else { + mrb_str_cat(mrb, result, rep + i, 2); /* \x as-is */ + } + i += 2; + } + else { + /* find next backslash or end for batch copy */ + mrb_int j = i + 1; + while (j < rep_len && rep[j] != '\\') j++; + mrb_str_cat(mrb, result, rep + i, j - i); + i = j; + } + } +} + +/* Check if replacement contains backslash */ +static mrb_bool +has_backslash(const char *s, mrb_int len) +{ + return memchr(s, '\\', len) != NULL; +} + +/* + * Regexp#__gsub_str(str, replacement) - gsub core without block + */ +static mrb_value +regexp_gsub_str(mrb_state *mrb, mrb_value self) +{ + mrb_value str, replacement; + mrb_get_args(mrb, "SS", &str, &replacement); + + mrb_regexp_pattern *pat = DATA_GET_PTR(mrb, self, ®exp_type, mrb_regexp_pattern); + if (!pat) mrb_raise(mrb, E_ARGUMENT_ERROR, "uninitialized Regexp"); + + const char *s = RSTRING_PTR(str); + mrb_int slen = RSTRING_LEN(str); + const char *rep = RSTRING_PTR(replacement); + mrb_int rep_len = RSTRING_LEN(replacement); + mrb_bool need_expand = has_backslash(rep, rep_len); + + int ncap = pat->num_captures; + int cap_size = ncap * 2; + int *captures = (int*)mrb_malloc(mrb, sizeof(int) * cap_size); + mrb_value result = mrb_str_new_capa(mrb, slen); + int ai = mrb_gc_arena_save(mrb); + + mrb_int pos = 0; + int last_ncap = 0; + int last_captures[RE_MAX_CAPTURES * 2]; + + while (pos <= slen) { + memset(captures, -1, sizeof(int) * cap_size); + int n = mrb_re_exec(mrb, pat, s, slen, pos, captures, cap_size); + if (n == 0) break; + + /* save last match for $~ */ + last_ncap = cap_size; + memcpy(last_captures, captures, sizeof(int) * cap_size); + + /* append pre-match */ + if (captures[0] > pos) { + mrb_str_cat(mrb, result, s + pos, captures[0] - pos); + } + + /* append replacement */ + if (need_expand) { + apply_replacement(mrb, result, rep, rep_len, s, captures, ncap); + } + else { + mrb_str_cat(mrb, result, rep, rep_len); + } + + /* advance position */ + int match_end = captures[1]; + if (match_end == pos) { + /* zero-length match: copy one char and advance */ + if (pos < slen) { + mrb_str_cat(mrb, result, s + pos, 1); + } + pos++; + } + else { + pos = match_end; + } + mrb_gc_arena_restore(mrb, ai); + } + + /* append remainder */ + if (pos <= slen) { + mrb_str_cat(mrb, result, s + pos, slen - pos); + } + + mrb_free(mrb, captures); + + /* set $~ from last match */ + if (last_ncap > 0) { + create_matchdata(mrb, self, str, last_captures, last_ncap); + } + else { + clear_match_globals(mrb); + } + + return result; +} + +/* + * Regexp#__sub_str(str, replacement) - sub core without block + */ +static mrb_value +regexp_sub_str(mrb_state *mrb, mrb_value self) +{ + mrb_value str, replacement; + mrb_get_args(mrb, "SS", &str, &replacement); + + mrb_regexp_pattern *pat = DATA_GET_PTR(mrb, self, ®exp_type, mrb_regexp_pattern); + if (!pat) mrb_raise(mrb, E_ARGUMENT_ERROR, "uninitialized Regexp"); + + const char *s = RSTRING_PTR(str); + mrb_int slen = RSTRING_LEN(str); + const char *rep = RSTRING_PTR(replacement); + mrb_int rep_len = RSTRING_LEN(replacement); + + int cap_size = pat->num_captures * 2; + int *captures = (int*)mrb_malloc(mrb, sizeof(int) * cap_size); + memset(captures, -1, sizeof(int) * cap_size); + + int n = mrb_re_exec(mrb, pat, s, slen, 0, captures, cap_size); + if (n == 0) { + mrb_free(mrb, captures); + clear_match_globals(mrb); + return mrb_str_dup(mrb, str); + } + + mrb_value result = mrb_str_new_capa(mrb, slen); + + /* pre-match */ + if (captures[0] > 0) { + mrb_str_cat(mrb, result, s, captures[0]); + } + + /* replacement */ + if (has_backslash(rep, rep_len)) { + apply_replacement(mrb, result, rep, rep_len, s, captures, pat->num_captures); + } + else { + mrb_str_cat(mrb, result, rep, rep_len); + } + + /* post-match */ + if (captures[1] < slen) { + mrb_str_cat(mrb, result, s + captures[1], slen - captures[1]); + } + + create_matchdata(mrb, self, str, captures, cap_size); + mrb_free(mrb, captures); + return result; +} + +/* + * Regexp#__scan(str) - scan core, returns array + */ +static mrb_value +regexp_scan(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + mrb_get_args(mrb, "S", &str); + + mrb_regexp_pattern *pat = DATA_GET_PTR(mrb, self, ®exp_type, mrb_regexp_pattern); + if (!pat) mrb_raise(mrb, E_ARGUMENT_ERROR, "uninitialized Regexp"); + + const char *s = RSTRING_PTR(str); + mrb_int slen = RSTRING_LEN(str); + int ncap = pat->num_captures; + int cap_size = ncap * 2; + int *captures = (int*)mrb_malloc(mrb, sizeof(int) * cap_size); + + mrb_value ary = mrb_ary_new(mrb); + int ai = mrb_gc_arena_save(mrb); + mrb_int pos = 0; + int last_ncap = 0; + int last_captures[RE_MAX_CAPTURES * 2]; + + while (pos <= slen) { + memset(captures, -1, sizeof(int) * cap_size); + int n = mrb_re_exec(mrb, pat, s, slen, pos, captures, cap_size); + if (n == 0) break; + + last_ncap = cap_size; + memcpy(last_captures, captures, sizeof(int) * cap_size); + + if (ncap <= 1) { + /* no captures or just group 0: push matched string */ + mrb_ary_push(mrb, ary, + mrb_str_substr(mrb, str, captures[0], captures[1] - captures[0])); + } + else if (ncap == 2) { + /* single capture group: push capture string */ + if (captures[2] >= 0) { + mrb_ary_push(mrb, ary, + mrb_str_substr(mrb, str, captures[2], captures[3] - captures[2])); + } + else { + mrb_ary_push(mrb, ary, mrb_nil_value()); + } + } + else { + /* multiple captures: push array of captures */ + mrb_value sub = mrb_ary_new_capa(mrb, ncap - 1); + for (int i = 1; i < ncap; i++) { + if (captures[i * 2] >= 0) { + mrb_ary_push(mrb, sub, + mrb_str_substr(mrb, str, captures[i*2], captures[i*2+1] - captures[i*2])); + } + else { + mrb_ary_push(mrb, sub, mrb_nil_value()); + } + } + mrb_ary_push(mrb, ary, sub); + } + + int match_end = captures[1]; + if (match_end == pos) { + pos++; + } + else { + pos = match_end; + } + mrb_gc_arena_restore(mrb, ai); + } + + mrb_free(mrb, captures); + + if (last_ncap > 0) { + create_matchdata(mrb, self, str, last_captures, last_ncap); + } + else { + clear_match_globals(mrb); + } + + return ary; +} + +/* --- Gem init --- */ + +void +mrb_mruby_regexp_gem_init(mrb_state *mrb) +{ + struct RClass *re = mrb_define_class(mrb, "Regexp", mrb->object_class); + MRB_SET_INSTANCE_TT(re, MRB_TT_CDATA); + + /* Constants */ + mrb_define_const(mrb, re, "IGNORECASE", mrb_fixnum_value(1)); + mrb_define_const(mrb, re, "EXTENDED", mrb_fixnum_value(2)); + mrb_define_const(mrb, re, "MULTILINE", mrb_fixnum_value(4)); + + /* Class methods */ + mrb_define_method(mrb, re, "initialize", regexp_init, MRB_ARGS_ARG(1, 2)); + /* compile is defined in Ruby (mrblib) as alias for new */ + mrb_define_class_method(mrb, re, "escape", regexp_escape, MRB_ARGS_REQ(1)); + mrb_define_class_method(mrb, re, "quote", regexp_escape, MRB_ARGS_REQ(1)); + + /* Instance methods */ + mrb_define_method(mrb, re, "match", regexp_match, MRB_ARGS_ARG(1, 1)); + mrb_define_method(mrb, re, "match?", regexp_match_p, MRB_ARGS_ARG(1, 1)); + mrb_define_method(mrb, re, "=~", regexp_match_op, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, re, "===", regexp_case_match, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, re, "source", regexp_source, MRB_ARGS_NONE()); + mrb_define_method(mrb, re, "inspect", regexp_inspect, MRB_ARGS_NONE()); + mrb_define_method(mrb, re, "to_s", regexp_to_s, MRB_ARGS_NONE()); + mrb_define_method(mrb, re, "==", regexp_eql, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, re, "eql?", regexp_eql, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, re, "hash", regexp_hash, MRB_ARGS_NONE()); + mrb_define_method(mrb, re, "options", regexp_options, MRB_ARGS_NONE()); + mrb_define_method(mrb, re, "casefold?", regexp_casefold_p, MRB_ARGS_NONE()); + mrb_define_method(mrb, re, "__gsub_str", regexp_gsub_str, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, re, "__sub_str", regexp_sub_str, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, re, "__scan", regexp_scan, MRB_ARGS_REQ(1)); + + /* MatchData class */ + struct RClass *md = mrb_define_class(mrb, "MatchData", mrb->object_class); + MRB_SET_INSTANCE_TT(md, MRB_TT_CDATA); + + mrb_define_method(mrb, md, "[]", matchdata_aref, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, md, "captures", matchdata_captures, MRB_ARGS_NONE()); + mrb_define_method(mrb, md, "to_a", matchdata_to_a, MRB_ARGS_NONE()); + mrb_define_method(mrb, md, "length", matchdata_length, MRB_ARGS_NONE()); + mrb_define_method(mrb, md, "size", matchdata_length, MRB_ARGS_NONE()); + mrb_define_method(mrb, md, "begin", matchdata_begin, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, md, "end", matchdata_end, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, md, "pre_match", matchdata_pre, MRB_ARGS_NONE()); + mrb_define_method(mrb, md, "post_match", matchdata_post, MRB_ARGS_NONE()); + mrb_define_method(mrb, md, "named_captures", matchdata_named_captures, MRB_ARGS_NONE()); + mrb_define_method(mrb, md, "string", matchdata_string, MRB_ARGS_NONE()); + mrb_define_method(mrb, md, "regexp", matchdata_regexp, MRB_ARGS_NONE()); + mrb_define_method(mrb, md, "to_s", matchdata_to_s, MRB_ARGS_NONE()); +} + +void +mrb_mruby_regexp_gem_final(mrb_state *mrb) +{ +} diff --git a/mrbgems/mruby-regexp/test/regexp.rb b/mrbgems/mruby-regexp/test/regexp.rb new file mode 100644 index 0000000000..787c6898d7 --- /dev/null +++ b/mrbgems/mruby-regexp/test/regexp.rb @@ -0,0 +1,494 @@ +assert("Regexp.new with string") do + re = Regexp.new("abc") + assert_kind_of Regexp, re +end + +assert("Regexp.new with regexp") do + r1 = Regexp.new("abc", Regexp::IGNORECASE) + r2 = Regexp.new(r1) + assert_equal r1.source, r2.source + assert_equal r1.options, r2.options + assert_true r2.match?("ABC") +end + +assert("Regexp#match - simple") do + re = Regexp.new("abc") + md = re.match("xabcy") + assert_kind_of MatchData, md + assert_equal "abc", md[0] +end + +assert("Regexp#match - no match") do + re = Regexp.new("xyz") + assert_nil re.match("abc") +end + +assert("Regexp#match?") do + re = Regexp.new("abc") + assert_true re.match?("xabcy") + assert_false re.match?("xyz") +end + +assert("Regexp#=~") do + re = Regexp.new("bc") + assert_equal 1, re =~ "abcd" + assert_nil re =~ "xyz" +end + +assert("Regexp#===") do + re = Regexp.new("abc") + assert_true re === "abc" + assert_false re === "xyz" +end + +assert("Regexp - character class") do + re = Regexp.new("[a-z]+") + md = re.match("123abc456") + assert_equal "abc", md[0] +end + +assert("Regexp - \\b inside character class is backspace") do + # Outside [...], \b is the word boundary assertion; inside [...] + # it must mean U+0008 (backspace), matching MRI/Onigmo. + assert_equal "Ruby", "Ruby".gsub(/[\b]/, "X") + assert_equal "aXc", "a\bc".gsub(/[\b]/, "X") + assert_equal ["\b", "\t", "\n"], "ABC\b\t\n".scan(/[\b-\n]/) +end + +assert("Regexp - dot") do + re = Regexp.new("a.c") + assert_true re.match?("abc") + assert_true re.match?("axc") + assert_false re.match?("ac") +end + +assert("Regexp - alternation") do + re = Regexp.new("cat|dog") + assert_equal "cat", re.match("I have a cat")[0] + assert_equal "dog", re.match("I have a dog")[0] +end + +assert("Regexp - quantifiers") do + assert_equal "aaa", Regexp.new("a+").match("aaa")[0] + assert_equal "", Regexp.new("a*").match("bbb")[0] + assert_equal "ab", Regexp.new("ab?").match("ab")[0] + assert_equal "a", Regexp.new("ab?").match("ac")[0] +end + +assert("Regexp - captures") do + re = Regexp.new("(\\w+)@(\\w+)") + md = re.match("user@host") + assert_equal "user@host", md[0] + assert_equal "user", md[1] + assert_equal "host", md[2] +end + +assert("Regexp - \\d \\w \\s") do + assert_true Regexp.new("\\d+").match?("123") + assert_true Regexp.new("\\w+").match?("abc_123") + assert_true Regexp.new("\\s+").match?(" ") + assert_false Regexp.new("\\d+").match?("abc") +end + +assert("Regexp - anchors") do + assert_true Regexp.new("^abc").match?("abc") + assert_false Regexp.new("^abc").match?("xabc") + assert_true Regexp.new("abc$").match?("abc") + assert_false Regexp.new("abc$").match?("abcx") +end + +assert("Regexp - case insensitive") do + re = Regexp.new("abc", Regexp::IGNORECASE) + assert_true re.match?("ABC") + assert_true re.match?("Abc") +end + +assert("Regexp - repetition {n,m}") do + assert_equal "aaa", Regexp.new("a{3}").match("aaaa")[0] + assert_equal "aa", Regexp.new("a{2,3}").match("aa")[0] + assert_equal "aaa", Regexp.new("a{2,3}").match("aaaa")[0] +end + +assert("MatchData#captures") do + re = Regexp.new("(a)(b)(c)") + md = re.match("abc") + assert_equal ["a", "b", "c"], md.captures +end + +assert("MatchData#pre_match / #post_match") do + re = Regexp.new("bc") + md = re.match("abcde") + assert_equal "a", md.pre_match + assert_equal "de", md.post_match +end + +assert("MatchData#string") do + md = Regexp.new("bc").match("abcde") + assert_equal "abcde", md.string +end + +assert("MatchData#regexp") do + re = Regexp.new("bc") + md = re.match("abcde") + assert_equal re, md.regexp +end + +assert("MatchData#to_s") do + md = Regexp.new("bc").match("abcde") + assert_equal "bc", md.to_s +end + +assert("MatchData#begin / #end") do + re = Regexp.new("bc") + md = re.match("abcde") + assert_equal 1, md.begin(0) + assert_equal 3, md.end(0) +end + +assert("Regexp.escape") do + assert_equal "a\\.b\\*c", Regexp.escape("a.b*c") +end + +assert("Regexp#inspect") do + re = Regexp.new("abc", Regexp::IGNORECASE) + assert_equal "/abc/i", re.inspect +end + +assert("Regexp#to_s") do + assert_equal "(?:abc)", Regexp.new("abc").to_s + assert_equal "(?i:abc)", Regexp.new("abc", Regexp::IGNORECASE).to_s + assert_equal "(?m:abc)", Regexp.new("abc", Regexp::MULTILINE).to_s + assert_equal "(?im:abc)", Regexp.new("abc", Regexp::IGNORECASE | Regexp::MULTILINE).to_s +end + +assert("Regexp#== and Regexp#eql?") do + r1 = Regexp.new("abc", Regexp::IGNORECASE) + r2 = Regexp.new("abc", Regexp::IGNORECASE) + r3 = Regexp.new("abc") + r4 = Regexp.new("def", Regexp::IGNORECASE) + assert_true r1 == r2 + assert_true r1.eql?(r2) + assert_false r1 == r3 # different flags + assert_false r1 == r4 # different source + assert_false r1 == "abc" # not a Regexp +end + +assert("Regexp#hash") do + r1 = Regexp.new("abc", Regexp::IGNORECASE) + r2 = Regexp.new("abc", Regexp::IGNORECASE) + r3 = Regexp.new("abc") + assert_equal r1.hash, r2.hash + assert_not_equal r1.hash, r3.hash +end + +assert("Regexp#hash/== on uninitialized regexp") do + # Regexp.allocate yields an object with no @source IV; hash/== must + # not crash (regression: ObjectSpace.each_object could expose a + # half-initialized Regexp after Regexp.new raised a compile error). + r = Regexp.allocate + assert_kind_of Integer, r.hash + assert_true r == r + assert_false r == Regexp.allocate + assert_false r == Regexp.new("abc") +end + +assert("Regexp#options") do + assert_equal 0, Regexp.new("abc").options + assert_equal Regexp::IGNORECASE, Regexp.new("abc", Regexp::IGNORECASE).options + assert_equal Regexp::MULTILINE, Regexp.new("abc", Regexp::MULTILINE).options + assert_equal Regexp::EXTENDED, Regexp.new("abc", Regexp::EXTENDED).options + assert_equal Regexp::IGNORECASE | Regexp::MULTILINE, + Regexp.new("abc", Regexp::IGNORECASE | Regexp::MULTILINE).options + assert_equal Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE, + Regexp.new("abc", Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE).options +end + +assert("Regexp#casefold?") do + assert_true Regexp.new("abc", Regexp::IGNORECASE).casefold? + assert_false Regexp.new("abc").casefold? +end + +assert("Regexp extended mode (x flag)") do + # whitespace is ignored + re = Regexp.new('a b c', Regexp::EXTENDED) + assert_true re.match?("abc") + assert_false re.match?("a b c") + + # comments are ignored + re = Regexp.new("a # match a\nb # match b\nc", Regexp::EXTENDED) + assert_true re.match?("abc") + + # whitespace inside character class is literal + re = Regexp.new('[ ]', Regexp::EXTENDED) + assert_true re.match?(" ") + + # escaped whitespace is preserved + re = Regexp.new('a\\ b', Regexp::EXTENDED) + assert_true re.match?("a b") + + # inspect shows x flag + assert_equal "/abc/x", Regexp.new("abc", Regexp::EXTENDED).inspect + + # to_s shows x flag + assert_equal "(?x:abc)", Regexp.new("abc", Regexp::EXTENDED).to_s +end + +assert("String#match") do + md = "hello world".match(Regexp.new("(\\w+)\\s(\\w+)")) + assert_equal "hello", md[1] + assert_equal "world", md[2] +end + +assert("String#sub") do + assert_equal "hXllo", "hello".sub(Regexp.new("e"), "X") +end + +assert("String#gsub") do + assert_equal "h-ll-", "hello".gsub(Regexp.new("[eo]"), "-") +end + +assert("String#sub with \\& \\` \\' specials") do + # \& = full match + assert_equal "a[bc]d", "abcd".sub(/bc/, '[\\&]') + # \` = pre_match + assert_equal "a[a]d", "abcd".sub(/bc/, '[\\`]') + # \' = post_match + assert_equal "a[d]d", "abcd".sub(/bc/, "[\\']") + # \+ = last capture + assert_equal "a[c]d", "abcd".sub(/(b)(c)/, '[\\+]') + # \\ = literal backslash + assert_equal "a\\d", "abcd".sub(/bc/, "\\\\") + # \1 still works + assert_equal "abbd", "abcd".sub(/(b)c/, '\\1\\1') +end + +assert("String#gsub with \\& special") do + assert_equal "[a][b][c]", "abc".gsub(/./, '[\\&]') +end + +assert("String#scan") do + assert_equal ["1", "2", "3"], "a1b2c3".scan(Regexp.new("\\d")) +end + +assert("Regexp literal /regex/") do + assert_true /abc/.match?("abc") + assert_equal "123", /\d+/.match("abc123")[0] + assert_true /hello/i.match?("HELLO") +end + +assert("$~ global variable") do + /(\w+)@(\w+)/ =~ "user@host" + assert_kind_of MatchData, $~ + assert_equal "user", $~[1] + assert_equal "host", $~[2] +end + +assert("$~ is nil on no match") do + /xyz/ =~ "abc" + assert_nil $~ +end + +assert("Regexp.last_match") do + /(\d+)/ =~ "abc123" + assert_equal "123", Regexp.last_match(1) + assert_equal "123", Regexp.last_match(0) +end + +assert("Regexp - empty pattern") do + assert_true //.match?("") + assert_true //.match?("abc") +end + +assert("Regexp - nested captures") do + md = /((a)(b))c/.match("abc") + assert_equal "abc", md[0] + assert_equal "ab", md[1] + assert_equal "a", md[2] + assert_equal "b", md[3] +end + +assert("Regexp - non-greedy quantifiers") do + + assert_equal "a", /a+?/.match("aaa")[0] + assert_equal "", /a*?/.match("aaa")[0] +end + +assert("Regexp - word boundary") do + assert_equal "cat", /\bcat\b/.match("the cat sat")[0] + assert_nil /\bcat\b/.match("concatenate") +end + +assert("Regexp - non-capturing group") do + md = /(?:a)(b)/.match("ab") + assert_equal "ab", md[0] + assert_equal "b", md[1] + assert_nil md[2] +end + +assert("String#sub with block") do + assert_equal "HELLO world", "hello world".sub(/\w+/) { |m| m.upcase } +end + +assert("String#gsub with block") do + assert_equal "HELLO WORLD", "hello world".gsub(/\w+/) { |m| m.upcase } +end + +assert("String#gsub date reformat") do + result = "2026-03-21".gsub(/(\d+)-(\d+)-(\d+)/) { "#{$~[3]}/#{$~[2]}/#{$~[1]}" } + assert_equal "21/03/2026", result +end + +assert("String#scan with captures") do + assert_equal [["1","a"],["2","b"]], "1a2b".scan(/(\d)(\w)/) +end + +assert("String#split with regexp") do + assert_equal ["a", "b", "c"], "a, b, c".split(/,\s*/) +end + +assert("Regexp - case in when") do + result = case "hello123" + when /\d+/ then "has digits" + else "no digits" + end + assert_equal "has digits", result +end + +assert("Regexp - backreference \\1") do + # match repeated word + md = /(\w+) \1/.match("hello hello world") + assert_equal "hello hello", md[0] + assert_equal "hello", md[1] +end + +assert("Regexp - backreference no match") do + assert_nil /(\w+) \1/.match("hello world") +end + +assert("Regexp - named captures") do + md = /(?\d+)-(?\d+)-(?\d+)/.match("2026-03-21") + assert_equal "2026", md[:year] + assert_equal "03", md[:month] + assert_equal "21", md[:day] + assert_equal "2026", md["year"] +end + +assert("MatchData#named_captures") do + md = /(?\w+)@(?\w+)/.match("user@host") + nc = md.named_captures + assert_equal "user", nc["a"] + assert_equal "host", nc["b"] +end + +assert("Regexp - named captures survive /x preprocessing") do + # Regression: with /x, mrb_re_compile freed the stripped buffer that + # named_captures[i].name pointed into. + re = /(?\d+) # comment + \s* (?\w+) /x + m = re.match("42 px") + assert_equal "42", m[:n] + assert_equal "px", m[:u] +end + +assert("Regexp - named captures survive source string mutation") do + # Regression: name pointer used to alias RSTRING_PTR of the source. + s = String.new("(?\\d+)") + re = Regexp.new(s) + s.replace("X" * 10000) # force buffer reallocation + m = re.match("abc 123 def") + assert_equal "123", m[:key] +end + +assert("Regexp - positive lookahead (?=...)") do + md = /\w+(?=@)/.match("user@host") + assert_equal "user", md[0] +end + +assert("Regexp - negative lookahead (?!...)") do + md = /\d+(?!%)/.match("100%") + assert_equal "10", md[0] +end + +assert("Regexp - lookahead does not consume") do + md = /foo(?=bar)/.match("foobar") + assert_equal "foo", md[0] + assert_nil /foo(?=baz)/.match("foobar") +end + +assert("Regexp - positive lookbehind (?<=...)") do + md = Regexp.new("(?<=@)\\w+").match("user@host") + assert_equal "host", md[0] + assert_nil Regexp.new("(?<=@)\\w+").match("user_host") +end + +assert("Regexp - negative lookbehind (? "mruby/mruby-set" +``` + +## Usage and Examples + +### Creating a Set + +You can create a set from an array or by using the `Set.[]` shorthand: + +```ruby +require 'set' # Not strictly necessary in mruby if compiled in + +set1 = Set.new([1, 2, 3]) +#=> Set[1, 2, 3] + +set2 = Set[3, 4, 5] +#=> Set[3, 4, 5] +``` + +### Adding and Deleting Elements + +```ruby +s = Set.new +s.add(10) #=> Set[10] +s << 20 #=> Set[10, 20] +s.add?(30) #=> Set[10, 20, 30] +s.add?(20) #=> nil (20 is already in the set) + +s.delete(10) #=> Set[20, 30] +s.delete?(5) #=> nil (5 was not in the set) +s.delete?(20) #=> Set[30] +``` + +### Set Operations + +`mruby-set` supports common set operations: + +**Union (`|`, `+`, `union`):** Returns a new set containing all elements from both sets. + +```ruby +set_a = Set[1, 2, 3] +set_b = Set[3, 4, 5] + +set_a | set_b #=> Set[1, 2, 3, 4, 5] +set_a + set_b #=> Set[1, 2, 3, 4, 5] +``` + +**Intersection (`&`, `intersection`):** Returns a new set containing elements common to both sets. + +```ruby +set_a = Set[1, 2, 3] +set_b = Set[3, 4, 5] + +set_a & set_b #=> Set[3] +``` + +**Difference (`-`, `difference`):** Returns a new set containing elements from the first set that are not in the second set. + +```ruby +set_a = Set[1, 2, 3] +set_b = Set[3, 4, 5] + +set_a - set_b #=> Set[1, 2] +``` + +**Exclusive OR (`^`):** Returns a new set containing elements that are in one or the other of the sets, but not in both. + +```ruby +set_a = Set[1, 2, 3] +set_b = Set[3, 4, 5] + +set_a ^ set_b #=> Set[1, 2, 4, 5] +``` + +### Querying the Set + +**Checking for inclusion (`include?`, `member?`, `===`):** + +```ruby +s = Set["apple", "banana", "cherry"] +s.include?("banana") #=> true +s.member?("grape") #=> false +``` + +**Checking size (`size`, `length`):** + +```ruby +s = Set[10, 20, 30] +s.size #=> 3 +``` + +**Checking if empty (`empty?`):** + +```ruby +Set.new.empty? #=> true +Set[1].empty? #=> false +``` + +**Subset and Superset (`subset?`, `superset?`, `<`, `<=`, `>`, `>=`):** + +```ruby +set_main = Set[1, 2, 3, 4] +sub = Set[2, 3] +super_set = Set[1, 2, 3, 4, 5] + +sub.subset?(set_main) #=> true +set_main.superset?(sub) #=> true +set_main < super_set #=> true (proper subset) +super_set > set_main #=> true (proper superset) +Set[1,2].proper_subset?(Set[1,2,3]) #=> true +Set[1,2,3].proper_superset?(Set[1,2]) #=> true +``` + +**Disjoint (`disjoint?`):** Returns `true` if the set has no elements in common with the given set. + +```ruby +Set[1, 2].disjoint?(Set[3, 4]) #=> true +Set[1, 2].disjoint?(Set[2, 3]) #=> false +``` + +**Intersect (`intersect?`):** Returns `true` if the set has any elements in common with the given set. + +```ruby +Set[1, 2].intersect?(Set[2, 3]) #=> true +Set[1, 2].intersect?(Set[3, 4]) #=> false +``` + +### Other Useful Methods + +**Convert to Array (`to_a`):** + +```ruby +s = Set["a", "b", "c"] +s.to_a #=> ["a", "b", "c"] (order may vary) +``` + +**Iterating (`each`):** + +```ruby +s = Set[1, 2, 3] +s.each { |x| puts x * 10 } +# Output: +# 10 +# 20 +# 30 +``` + +**Map/Collect (`map!`, `collect!`):** Modifies the set by applying the block to each element. + +```ruby +s = Set[1, 2, 3] +s.map! { |x| x * x } #=> Set[1, 4, 9] +``` + +**Select/Filter (`select!`, `filter!`):** Keeps elements for which the block returns true. + +```ruby +s = Set[1, 2, 3, 4, 5] +s.select! { |x| x.even? } #=> Set[2, 4] +``` + +**Reject (`reject!`):** Deletes elements for which the block returns true. + +```ruby +s = Set[1, 2, 3, 4, 5] +s.reject! { |x| x.odd? } #=> Set[2, 4] +``` + +**Clear (`clear`):** Removes all elements from the set. + +```ruby +s = Set[1, 2, 3] +s.clear #=> Set[] +``` + +**Replace (`replace`):** Replaces the contents of the set with the contents of the given enumerable. + +```ruby +s = Set[1, 2, 3] +s.replace([4, 5]) #=> Set[4, 5] +``` + +**Flatten (`flatten`, `flatten!`):** Returns a new set that is a copy of the set, flattening any nested sets. `flatten!` modifies the set in place. + +```ruby +s = Set[1, Set[2, 3], 4] +s.flatten #=> Set[1, 2, 3, 4] +``` + +## Method Overview + +Here's a list of commonly used methods available in `mruby-set`: + +- `Set.[](*ary)` +- `initialize(enum = nil, &block)` +- `size`, `length` +- `empty?` +- `clear` +- `replace(enum)` +- `to_a` +- `include?(o)`, `member?(o)`, `===` +- `superset?(set)`, `>=` +- `proper_superset?(set)`, `>` +- `subset?(set)`, `<=` +- `proper_subset?(set)`, `<` +- `intersect?(set)` +- `disjoint?(set)` +- `each(&block)` +- `add(o)`, `<<(o)` +- `add?(o)` +- `delete(o)` +- `delete?(o)` +- `delete_if { |o| ... }` +- `keep_if { |o| ... }` +- `collect! { |o| ... }`, `map! { |o| ... }` +- `reject! { |o| ... }` +- `select! { |o| ... }`, `filter! { |o| ... }` +- `merge(enum)` +- `subtract(enum)` +- `|(enum)`, `+(enum)`, `union(enum)` +- `-(enum)`, `difference(enum)` +- `&(enum)`, `intersection(enum)` +- `^(enum)` +- `==(other)` +- `hash` +- `eql?(o)` +- `classify { |o| ... }` +- `divide(&func)` +- `join(separator = nil)` +- `inspect`, `to_s` +- `flatten`, `flatten!` + +## Limitations + +These methods are not implemented yet: + +- freeze +- to_set +- divide(Set#divide with 2 arity block is not implemented.) + +## License + +Under the MIT License: + +- see [LICENSE](LICENSE) file diff --git a/mrbgems/mruby-set/mrbgem.rake b/mrbgems/mruby-set/mrbgem.rake new file mode 100644 index 0000000000..1495b34ba3 --- /dev/null +++ b/mrbgems/mruby-set/mrbgem.rake @@ -0,0 +1,9 @@ +MRuby::Gem::Specification.new('mruby-set') do |spec| + spec.license = 'MIT' + spec.authors = 'yui-knk' + spec.summary = 'Set class' + spec.build.defines << "MRB_USE_SET" + + spec.add_dependency "mruby-hash-ext", :core => "mruby-hash-ext" + spec.add_dependency "mruby-enumerator", :core => "mruby-enumerator" +end diff --git a/mrbgems/mruby-set/mrblib/set.rb b/mrbgems/mruby-set/mrblib/set.rb new file mode 100644 index 0000000000..64a0f4c2c5 --- /dev/null +++ b/mrbgems/mruby-set/mrblib/set.rb @@ -0,0 +1,325 @@ +class Set + # + # call-seq: + # Set.new(enum = nil) -> set + # Set.new(enum = nil) { |obj| block } -> set + # + # Creates a new set containing the elements of the given enumerable object. + # If a block is given, the elements are preprocessed by the given block. + # + # Set.new([1, 2, 3]) #=> # + # Set.new([1, 2, 2, 3]) #=> # + # Set.new([1, 2, 3]) { |x| x * 2 } #=> # + # + def initialize(enum = nil, &block) + __init + return self if enum.nil? + + if block + __do_with_enum(enum) { add(block.call(_1)) } + else + merge(enum) + end + self + end + + # internal method + def __do_with_enum(enum, &block) + if enum.respond_to?(:each) + enum.each(&block) + else + raise ArgumentError, "value must be enumerable" + end + end + + # + # call-seq: + # set.merge(enum) -> self + # + # Merges the elements of the given enumerable object to the set and returns self. + # + # set = Set.new([1, 2]) + # set.merge([2, 3, 4]) #=> # + # set #=> # + # + def merge(enum) + __merge(enum) || __do_with_enum(enum) {|o| add(o) } + self + end + + # + # call-seq: + # set.replace(enum) -> self + # + # Replaces the contents of the set with the contents of the given enumerable + # object and returns self. + # + # set = Set.new([1, 2, 3]) + # set.replace([4, 5, 6]) #=> # + # set #=> # + # + def replace(enum) + clear + merge(enum) + end + + # + # call-seq: + # set.subtract(enum) -> self + # + # Deletes every element that appears in the given enumerable object and + # returns self. + # + # set = Set.new([1, 2, 3, 4]) + # set.subtract([2, 4]) #=> # + # set #=> # + # + def subtract(enum) + __subtract(enum) || __do_with_enum(enum) {|o| delete(o) } + self + end + + # + # call-seq: + # set.intersection(enum) -> new_set + # set & enum -> new_set + # + # Returns a new set containing elements common to the set and the given + # enumerable object. + # + # Set.new([1, 2, 3]).intersection([2, 3, 4]) #=> # + # Set.new([1, 2, 3]) & [2, 3, 4] #=> # + # + def intersection(enum) + __intersection(enum) || begin + n = Set.new + __do_with_enum(enum) {|o| n.add(o) if include?(o) } + n + end + end + + # Alias for #intersection + alias & intersection + + # + # call-seq: + # set.union(enum) -> new_set + # set | enum -> new_set + # set + enum -> new_set + # + # Returns a new set built by merging the set and the elements of the given + # enumerable object. + # + # Set.new([1, 2]).union([2, 3, 4]) #=> # + # Set.new([1, 2]) | [2, 3, 4] #=> # + # Set.new([1, 2]) + [2, 3, 4] #=> # + # + def union(enum) + __union(enum) || dup.merge(enum) + end + + # Aliases for #union + alias | union + alias + union + + # + # call-seq: + # set.difference(enum) -> new_set + # set - enum -> new_set + # + # Returns a new set built by duplicating the set, removing every element that + # appears in the given enumerable object. + # + # Set.new([1, 2, 3, 4]).difference([2, 4]) #=> # + # Set.new([1, 2, 3, 4]) - [2, 4] #=> # + # + def difference(enum) + __difference(enum) || begin + result = dup + __do_with_enum(enum) {|o| result.delete(o) } + result + end + end + + # Alias for #difference + alias - difference + + # + # call-seq: + # set ^ enum -> new_set + # + # Returns a new set containing elements exclusive between the set and the given + # enumerable object. (set ^ enum) is equivalent to ((set | enum) - (set & enum)). + # + # Set.new([1, 2, 3]) ^ [2, 3, 4] #=> # + # Set.new([1, 2]) ^ [2, 3] #=> # + # + def ^(enum) + __xor(enum) || begin + s2 = Set.new(enum) + (self | s2) - (self & s2) + end + end + + # + # call-seq: + # set.each { |obj| block } -> set + # set.each -> enumerator + # + # Calls the given block once for each element in the set, passing the element + # as parameter. Returns an enumerator if no block is given. + # + # Set.new([1, 2, 3]).each { |x| puts x } + # # prints: 1, 2, 3 + # #=> # + # + def each(&block) + return to_enum(:each) unless block_given? + # Use C implementation's to_a method and iterate + to_a.each(&block) + self + end + + # + # call-seq: + # set.delete_if { |obj| block } -> set + # set.delete_if -> enumerator + # + # Deletes every element of the set for which block evaluates to true, and + # returns self. Returns an enumerator if no block is given. + # + # set = Set.new([1, 2, 3, 4, 5]) + # set.delete_if { |x| x.even? } #=> # + # set #=> # + # + def delete_if + return to_enum(:delete_if) unless block_given? + select { yield _1 }.each { delete(_1) } + self + end + + # + # call-seq: + # set.keep_if { |obj| block } -> set + # set.keep_if -> enumerator + # + # Deletes every element of the set for which block evaluates to false, and + # returns self. Returns an enumerator if no block is given. + # + # set = Set.new([1, 2, 3, 4, 5]) + # set.keep_if { |x| x.even? } #=> # + # set #=> # + # + def keep_if + return to_enum(:keep_if) unless block_given? + reject { yield _1 }.each { delete(_1) } + self + end + + # + # call-seq: + # set.collect! { |obj| block } -> set + # set.map! { |obj| block } -> set + # set.collect! -> enumerator + # set.map! -> enumerator + # + # Replaces the elements with ones returned by collect(). + # Returns an enumerator if no block is given. + # + # set = Set.new([1, 2, 3]) + # set.collect! { |x| x * 2 } #=> # + # set #=> # + # + def collect! + return to_enum(:collect!) unless block_given? + set = self.class.new + each { set << yield(_1) } + replace(set) + end + alias map! collect! + + # + # call-seq: + # set.reject! { |obj| block } -> set or nil + # set.reject! -> enumerator + # + # Equivalent to Set#delete_if, but returns nil if no changes were made. + # Returns an enumerator if no block is given. + # + # set = Set.new([1, 2, 3, 4, 5]) + # set.reject! { |x| x.even? } #=> # + # set.reject! { |x| x > 10 } #=> nil + # + def reject!(&block) + return to_enum(:reject!) unless block_given? + n = size + delete_if(&block) + size == n ? nil : self + end + + # + # call-seq: + # set.select! { |obj| block } -> set or nil + # set.filter! { |obj| block } -> set or nil + # set.select! -> enumerator + # set.filter! -> enumerator + # + # Equivalent to Set#keep_if, but returns nil if no changes were made. + # Returns an enumerator if no block is given. + # + # set = Set.new([1, 2, 3, 4, 5]) + # set.select! { |x| x.even? } #=> # + # set.select! { |x| x.even? } #=> nil + # + def select!(&block) + return to_enum(:select!) unless block_given? + n = size + keep_if(&block) + size == n ? nil : self + end + alias filter! select! + + # + # call-seq: + # set.classify { |obj| block } -> hash + # set.classify -> enumerator + # + # Classifies the set by the return value of the given block and returns a + # hash of {value => set of elements} pairs. Returns an enumerator if no block is given. + # + # set = Set.new([1, 2, 3, 4, 5, 6]) + # set.classify { |x| x % 3 } + # #=> {1=>#, 2=>#, 0=>#} + # + def classify + return to_enum(:classify) unless block_given? + h = {} + each {|i| + x = yield(i) + (h[x] ||= self.class.new).add(i) + } + h + end + + # + # call-seq: + # set.divide { |obj1, obj2| block } -> set + # set.divide -> enumerator + # + # Divides the set into a set of subsets according to the commonality defined + # by the given block. Returns an enumerator if no block is given. + # + # set = Set.new([1, 2, 3, 4, 5, 6]) + # set.divide { |x, y| (x % 3) == (y % 3) } + # #=> #, #, #}> + # + def divide(&func) + return to_enum(:divide) unless block_given? + + if func.arity == 2 + raise NotImplementedError, "Set#divide with 2 arity block is not implemented." + end + + Set.new(classify(&func).values) + end +end diff --git a/mrbgems/mruby-set/mruby-set.gem b/mrbgems/mruby-set/mruby-set.gem new file mode 100644 index 0000000000..84a4afb475 --- /dev/null +++ b/mrbgems/mruby-set/mruby-set.gem @@ -0,0 +1,6 @@ +name: mruby-set +description: Set class +author: yui-knk +website: https://github.com/yui-knk/mruby-set +protocol: git +repository: https://github.com/yui-knk/mruby-set.git diff --git a/mrbgems/mruby-set/src/set.c b/mrbgems/mruby-set/src/set.c new file mode 100644 index 0000000000..82eec5b1ba --- /dev/null +++ b/mrbgems/mruby-set/src/set.c @@ -0,0 +1,1534 @@ +/* +** set.c - Set class +** +** See Copyright Notice in mruby.h +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Use khash.h for set implementation - set mode (no values, only keys) */ +KHASH_DECLARE(set_val, mrb_value, char, FALSE) /* FALSE = set mode */ + +/* Helper for protected hash computation */ +static mrb_value +kset_hash_body(mrb_state *mrb, void *data) +{ + mrb_value *key = (mrb_value*)data; + return mrb_int_value(mrb, mrb_obj_hash_code(mrb, *key)); +} + +/* Helper for protected equality check */ +struct kset_eql_data { + mrb_value a; + mrb_value b; +}; + +static mrb_value +kset_eql_body(mrb_state *mrb, void *data) +{ + struct kset_eql_data *d = (struct kset_eql_data*)data; + return mrb_bool_value(mrb_eql(mrb, d->a, d->b)); +} + +/* Hash and equality functions for mrb_value keys */ +/* These use mrb_protect_error to catch exceptions and prevent leaks in khash rebuild */ +static inline khint_t +kset_hash_value(mrb_state *mrb, mrb_value key) +{ + mrb_bool error; + mrb_value result = mrb_protect_error(mrb, kset_hash_body, &key, &error); + if (error) { + mrb->exc = mrb_obj_ptr(result); /* Store exception to raise later */ + return 0; /* Return default hash value */ + } + return (khint_t)mrb_integer(result); +} + +static inline mrb_bool +kset_equal_value(mrb_state *mrb, mrb_value a, mrb_value b) +{ + struct kset_eql_data data = { a, b }; + mrb_bool error; + mrb_value result = mrb_protect_error(mrb, kset_eql_body, &data, &error); + if (error) { + mrb->exc = mrb_obj_ptr(result); /* Store exception to raise later */ + return FALSE; /* Return not-equal */ + } + return mrb_test(result); +} + +KHASH_DEFINE(set_val, mrb_value, char, FALSE, kset_hash_value, kset_equal_value) + +#define KSET_INITIAL_SIZE 4 +#define GOLDEN_RATIO_PRIME 0x9e3779b97f4a7c15ULL + +/* Compatibility layer and type definitions */ +typedef kh_set_val_t kset_t; +typedef khint_t kset_iter_t; + +/* API Aliases to khash.h */ +#define kset_init(mrb) kh_init(set_val, mrb) +#define kset_init_data(mrb, s, sz) kh_init_data(set_val, mrb, s, sz) +#define kset_destroy_data(mrb, s) kh_destroy_data(set_val, mrb, s) +#define kset_clear(mrb, s) kh_clear(set_val, mrb, s) +#define kset_resize(mrb, s, sz) kh_resize(set_val, mrb, s, sz) +#define kset_put(mrb, s, k) kh_put(set_val, mrb, s, k) +#define kset_put2(mrb, s, k, r) kh_put2(set_val, mrb, s, k, r) +#define kset_get(mrb, s, k) kh_get(set_val, mrb, s, k) +#define kset_del(mrb, s, k) kh_del(set_val, mrb, s, k) +#define kset_exist(s, k) kh_exist(set_val, s, k) +#define kset_key(s, k) kh_key(set_val, s, k) +#define kset_size(s) kh_size(s) +#define kset_end(s) kh_end(s) +#define kset_is_end(s, k) kh_is_end(s, k) + +#define KSET_FOREACH(s, k) KHASH_FOREACH(set_val, s, k) + +/* Helper macros for set state checking */ +#define kset_is_uninitialized(s) ((s)->data == NULL) +#define kset_is_empty(s) (kset_is_uninitialized(s) || kset_size(s) == 0) + +/* Embedded set structure in RSet - exactly 3 pointers */ +struct RSet { + MRB_OBJECT_HEADER; + kset_t set; /* Embedded directly, not a pointer */ +}; + +mrb_static_assert_object_size(struct RSet); + +#define mrb_set_ptr(o) ((struct RSet*)mrb_obj_ptr(o)) + +/* Get pointer to embedded set */ +static kset_t* +set_get_kset(mrb_state *mrb, mrb_value self) +{ + mrb_check_type(mrb, self, MRB_TT_SET); + return &mrb_set_ptr(self)->set; +} + +/* Get RSet pointer from embedded kset_t pointer */ +#define kset_to_rset(kset) ((struct RBasic*)((char*)(kset) - offsetof(struct RSet, set))) + +/* Copy all elements from src to dst (merge operation) */ +static void +kset_copy_merge(mrb_state *mrb, kset_t *dst, kset_t *src) +{ + if (!kset_is_empty(src)) { + struct RBasic *dst_obj = kset_to_rset(dst); + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(src, k) { + mrb_value key = kset_key(src, k); + kset_put(mrb, dst, key); + mrb_field_write_barrier_value(mrb, dst_obj, key); + mrb_gc_arena_restore(mrb, ai); + } + } +} + +/* Helper function to ensure set is initialized */ +static void +set_ensure_initialized(mrb_state *mrb, kset_t *set) +{ + if (kset_is_uninitialized(set)) { + mrb_raise(mrb, E_RUNTIME_ERROR, "uninitialized Set"); + } +} + +/* Mark function for Set instances */ +size_t +mrb_gc_mark_set(mrb_state *mrb, struct RBasic *obj) +{ + struct RSet *s = (struct RSet*)obj; + kset_t *set = &s->set; + if (kset_is_empty(set)) return 0; + + KSET_FOREACH(set, k) { + mrb_gc_mark_value(mrb, kset_key(set, k)); + } + return set->size; +} + +void +mrb_gc_free_set(mrb_state *mrb, struct RBasic *obj) +{ + struct RSet *s = (struct RSet*)obj; + kset_destroy_data(mrb, &s->set); +} + +size_t +mrb_set_memsize(mrb_value set) +{ + size_t size = sizeof(struct RSet); + struct RSet *s = mrb_set_ptr(set); + kset_t *kset = &s->set; + if (kset->data) { + /* New khash layout: keys + flags in single allocation */ + size += sizeof(mrb_value) * kset->n_buckets; /* keys */ + size += kset->n_buckets / 4; /* flags */ + } + return size; +} + +/* Helper function to check if a value is a Set and return a boolean result */ +static mrb_bool +set_is_set(mrb_value obj) +{ + return mrb_type(obj) == MRB_TT_SET; +} + +/* Helper function to check if a value is a Set and raise an error if not */ +static void +set_check_type(mrb_state *mrb, mrb_value obj) +{ + if (!set_is_set(obj)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "value must be a set"); + } +} + +static mrb_value +set_init(mrb_state *mrb, mrb_value self) +{ + kset_t *set = set_get_kset(mrb, self); + if (!kset_is_uninitialized(set)) { + mrb_raise(mrb, E_RUNTIME_ERROR, "already initialized set"); + } + kset_init_data(mrb, set, KSET_INITIAL_SIZE); + return self; +} + +/* + * call-seq: + * set.initialize_copy(orig) + * Copy constructor. + */ +static mrb_value +set_init_copy(mrb_state *mrb, mrb_value self) +{ + mrb_value orig = mrb_get_arg1(mrb); + + if (mrb_type(orig) != MRB_TT_SET) { + mrb_raise(mrb, E_TYPE_ERROR, "initialize_copy should take a Set object"); + } + if (mrb_obj_class(mrb, self) != mrb_obj_class(mrb, orig)) { + mrb_raise(mrb, E_TYPE_ERROR, "initialize_copy should take same class object"); + } + + kset_t *orig_set = set_get_kset(mrb, orig); + set_ensure_initialized(mrb, orig_set); + + kset_t *self_set = set_get_kset(mrb, self); + /* Free existing data if already initialized (for replace semantics) */ + if (!kset_is_uninitialized(self_set)) { + kset_destroy_data(mrb, self_set); + } + kset_init_data(mrb, self_set, kset_size(orig_set)); + kh_replace(set_val, mrb, self_set, orig_set); + + return self; +} + +/* + * call-seq: + * set.size -> integer + * set.length -> integer + * + * Returns the number of elements. + */ +static mrb_value +set_size(mrb_state *mrb, mrb_value self) +{ + kset_t *set = set_get_kset(mrb, self); + if (kset_is_empty(set)) return mrb_fixnum_value(0); + return mrb_fixnum_value(kset_size(set)); +} + +/* + * call-seq: + * set.empty? -> true or false + * + * Returns true if the set contains no elements. + */ +static mrb_value +set_empty_p(mrb_state *mrb, mrb_value self) +{ + kset_t *set = set_get_kset(mrb, self); + return mrb_bool_value(kset_is_empty(set)); +} + +/* + * call-seq: + * set.clear -> self + * + * Removes all elements and returns self. + */ +static mrb_value +set_clear(mrb_state *mrb, mrb_value self) +{ + kset_t *set = set_get_kset(mrb, self); + if (!kset_is_empty(set)) { + kset_clear(mrb, set); + } + return self; +} + +/* + * call-seq: + * set.to_a -> array + * + * Converts the set to an array. + */ +static mrb_value +set_to_a(mrb_state *mrb, mrb_value self) +{ + kset_t *set = set_get_kset(mrb, self); + + if (kset_is_empty(set)) return mrb_ary_new(mrb); + + mrb_value ary = mrb_ary_new_capa(mrb, kset_size(set)); + + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(set, k) { + mrb_ary_push(mrb, ary, kset_key(set, k)); + mrb_gc_arena_restore(mrb, ai); + } + + return ary; +} + +/* + * call-seq: + * set.include?(object) -> true or false + * set.member?(object) -> true or false + * set === object -> true or false + * + * Returns true if the set contains the given object. + */ +static mrb_value +set_include_p(mrb_state *mrb, mrb_value self) +{ + mrb_value obj = mrb_get_arg1(mrb); + kset_t *set = set_get_kset(mrb, self); + if (kset_is_empty(set)) return mrb_false_value(); + + return mrb_bool_value(!kset_is_end(set, kset_get(mrb, set, obj))); +} + +/* + * call-seq: + * set.add(object) -> self + * set << object -> self + * + * Adds the given object to the set and returns self. + */ +static mrb_value +set_add(mrb_state *mrb, mrb_value self) +{ + mrb_value obj = mrb_get_arg1(mrb); + kset_t *set = set_get_kset(mrb, self); + set_ensure_initialized(mrb, set); + + kset_put(mrb, set, obj); + mrb_field_write_barrier_value(mrb, kset_to_rset(set), obj); + return self; +} + +/* + * call-seq: + * set.add?(object) -> self or nil + * + * Adds the given object to the set and returns self. If the object is already + * in the set, returns nil. + */ +static mrb_value +set_add_p(mrb_state *mrb, mrb_value self) +{ + mrb_value obj = mrb_get_arg1(mrb); + kset_t *set = set_get_kset(mrb, self); + set_ensure_initialized(mrb, set); + + int ret; + kset_put2(mrb, set, obj, &ret); + mrb_field_write_barrier_value(mrb, kset_to_rset(set), obj); + return (ret == 0) ? mrb_nil_value() : self; +} + +/* + * call-seq: + * set.delete(object) -> self + * + * Deletes the given object from the set and returns self. + */ +static mrb_value +set_delete(mrb_state *mrb, mrb_value self) +{ + mrb_value obj = mrb_get_arg1(mrb); + kset_t *set = set_get_kset(mrb, self); + if (kset_is_empty(set)) return self; + + kset_iter_t k = kset_get(mrb, set, obj); + if (!kset_is_end(set, k)) { + kset_del(mrb, set, k); + } + return self; +} + +/* + * call-seq: + * set.delete?(object) -> self or nil + * + * Deletes the given object from the set and returns self. If the object is not + * in the set, returns nil. + */ +static mrb_value +set_delete_p(mrb_state *mrb, mrb_value self) +{ + mrb_value obj = mrb_get_arg1(mrb); + kset_t *set = set_get_kset(mrb, self); + if (kset_is_empty(set)) return mrb_nil_value(); + + kset_iter_t k = kset_get(mrb, set, obj); + if (!kset_is_end(set, k)) { + kset_del(mrb, set, k); + return self; + } + else { + return mrb_nil_value(); + } +} + +/* + * Core implementation of Set-to-Set merge (mutating version) + * This is an internal method that will be called from Ruby + */ +static mrb_value +set_core_merge(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + + if (!set_is_set(other)) { + return mrb_false_value(); + } + + kset_t *self_set = set_get_kset(mrb, self); + kset_t *other_set = set_get_kset(mrb, other); + + set_ensure_initialized(mrb, self_set); + if (!kset_is_empty(other_set)) { + kset_copy_merge(mrb, self_set, other_set); + } + + return mrb_true_value(); +} + +/* + * Core implementation of Set-to-Set subtraction (mutating version) + * This is an internal method that will be called from Ruby + */ +static mrb_value +set_core_subtract(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + + if (!set_is_set(other)) { + return mrb_false_value(); + } + + kset_t *self_set = set_get_kset(mrb, self); + if (kset_is_empty(self_set)) return mrb_true_value(); + + kset_t *other_set = set_get_kset(mrb, other); + if (kset_is_empty(other_set)) return mrb_true_value(); + + /* Remove all elements that are in other set */ + KSET_FOREACH(other_set, k) { + mrb_value key = kset_key(other_set, k); + kset_iter_t self_k = kset_get(mrb, self_set, key); + if (!kset_is_end(self_set, self_k)) { + kset_del(mrb, self_set, self_k); + } + } + + return mrb_true_value(); +} + +/* + * Core implementation of Set-to-Set union + * This is an internal method that will be called from Ruby + */ +static mrb_value +set_core_union(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + + if (!set_is_set(other)) { + return mrb_nil_value(); + } + + /* Create a new set by duplicating self */ + mrb_value result = mrb_obj_dup(mrb, self); + kset_t *result_set = set_get_kset(mrb, result); + if (kset_is_uninitialized(result_set)) { + /* If self is empty, initialize the set */ + kset_init_data(mrb, result_set, KSET_INITIAL_SIZE); + } + + /* Add all elements from other set */ + kset_t *other_set = set_get_kset(mrb, other); + if (!kset_is_uninitialized(other_set)) { + kset_copy_merge(mrb, result_set, other_set); + } + + return result; +} + +/* + * Core implementation of Set-to-Set difference + * This is an internal method that will be called from Ruby + */ +static mrb_value +set_core_difference(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + + if (!set_is_set(other)) { + return mrb_nil_value(); + } + + /* Create a new set by duplicating self */ + mrb_value result = mrb_obj_dup(mrb, self); + kset_t *result_set = set_get_kset(mrb, result); + if (kset_is_uninitialized(result_set)) { + /* If self is empty, return an empty set */ + return result; + } + + /* Remove all elements that are in other set */ + kset_t *other_set = set_get_kset(mrb, other); + if (!kset_is_uninitialized(other_set)) { + KSET_FOREACH(other_set, k) { + mrb_value key = kset_key(other_set, k); + kset_iter_t result_k = kset_get(mrb, result_set, key); + if (!kset_is_end(result_set, result_k)) { + kset_del(mrb, result_set, result_k); + } + } + } + + return result; +} + + +/* + * Core implementation of Set-to-Set intersection + * This is an internal method that will be called from Ruby + */ +static mrb_value +set_core_intersection(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + + if (!set_is_set(other)) { + return mrb_nil_value(); + } + + /* Create a new empty set of the same class as self */ + mrb_value result = mrb_obj_new(mrb, mrb_obj_class(mrb, self), 0, NULL); + kset_t *result_set = set_get_kset(mrb, result); + + kset_t *self_set = set_get_kset(mrb, self); + if (kset_is_uninitialized(self_set)) return result; + + kset_t *other_set = set_get_kset(mrb, other); + if (kset_is_uninitialized(other_set)) return result; + + KSET_FOREACH(other_set, k) { + mrb_value key = kset_key(other_set, k); + kset_iter_t self_k = kset_get(mrb, self_set, key); + + /* If key exists in self, add it to result */ + if (!kset_is_end(self_set, self_k)) { + kset_put(mrb, result_set, key); + mrb_field_write_barrier_value(mrb, kset_to_rset(result_set), key); + } + } + + return result; +} + + +/* + * Core implementation of Set-to-Set XOR (symmetric difference) + * This is an internal method that will be called from Ruby + */ +static mrb_value +set_core_xor(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + + if (!set_is_set(other)) { + return mrb_nil_value(); + } + + mrb_value result = mrb_obj_new(mrb, mrb_obj_class(mrb, self), 0, NULL); + kset_t *result_set = set_get_kset(mrb, result); + kset_t *self_set, *other_set; + + self_set = set_get_kset(mrb, self); + other_set = set_get_kset(mrb, other); + + /* Handle empty sets */ + if (kset_is_empty(self_set)) { + if (!kset_is_empty(other_set)) { + kset_copy_merge(mrb, result_set, other_set); + } + return result; + } + if (kset_is_empty(other_set)) { + kh_replace(set_val, mrb, result_set, self_set); + return result; + } + + /* Add elements from self that are not in other */ + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(self_set, k) { + mrb_value key = kset_key(self_set, k); + kset_iter_t other_k = kset_get(mrb, other_set, key); + + /* Add to result if not in other */ + if (kset_is_end(other_set, other_k)) { + kset_put(mrb, result_set, key); + mrb_field_write_barrier_value(mrb, kset_to_rset(result_set), key); + } + mrb_gc_arena_restore(mrb, ai); + } + + /* Add elements from other that are not in self */ + KSET_FOREACH(other_set, k) { + mrb_value key = kset_key(other_set, k); + kset_iter_t self_k = kset_get(mrb, self_set, key); + + /* Add to result if not in self */ + if (kset_is_end(self_set, self_k)) { + kset_put(mrb, result_set, key); + mrb_field_write_barrier_value(mrb, kset_to_rset(result_set), key); + } + mrb_gc_arena_restore(mrb, ai); + } + + return result; +} + +/* + * call-seq: + * set == other -> true or false + * + * Returns true if two sets are equal. + */ +/* + * call-seq: + * set.eql?(other) -> true or false + * + * Returns true if two sets are equal. + */ +static mrb_value +set_equal(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + + /* Fast path: same object */ + if (mrb_obj_equal(mrb, self, other)) { + return mrb_true_value(); + } + + /* Only compare with other Set objects */ + if (!set_is_set(other)) { + return mrb_false_value(); + } + + kset_t *self_set = set_get_kset(mrb, self); + kset_t *other_set = set_get_kset(mrb, other); + + /* Fast path: both empty */ + if (kset_is_empty(self_set) && kset_is_empty(other_set)) { + return mrb_true_value(); + } + + /* Fast path: different sizes */ + if (kset_size(self_set) != kset_size(other_set)) { + return mrb_false_value(); + } + + /* Compare elements: iterate through the smaller hash for efficiency */ + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(self_set, k) { + kset_iter_t k2 = kset_get(mrb, other_set, kset_key(self_set, k)); + if (kset_is_end(other_set, k2)) { + return mrb_false_value(); /* Element in self not found in other */ + } + mrb_gc_arena_restore(mrb, ai); + } + + return mrb_true_value(); +} + +/* + * call-seq: + * set.hash -> integer + * + * Compute a hash-code for this set. + * Uses an improved hash algorithm for better distribution. + */ +static mrb_value +set_hash_m(mrb_state *mrb, mrb_value self) +{ + kset_t *set = set_get_kset(mrb, self); + + /* Use order-independent hash algorithm for sets */ + uint64_t hash = 0; /* Start with zero for XOR accumulation */ + + /* Include the size of the set in the hash */ + size_t size = kset_size(set); + hash ^= size * GOLDEN_RATIO_PRIME; + + if (!kset_is_uninitialized(set) && size > 0) { + /* Process each element - order independent using XOR */ + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(set, k) { + /* Get element's hash code */ + khint_t elem_hash = (khint_t)mrb_obj_hash_code(mrb, kset_key(set, k)); + + /* XOR is commutative, so order doesn't matter */ + hash ^= elem_hash * GOLDEN_RATIO_PRIME; + + mrb_gc_arena_restore(mrb, ai); + } + } + + /* Final mixing to improve distribution */ + hash ^= hash >> 32; + + return mrb_fixnum_value((mrb_int)hash); +} + +/* + * call-seq: + * set.superset?(other) -> true or false + * set >= other -> true or false + * + * Returns true if the set is a superset of the given set. + */ +static mrb_value +set_superset_p(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + + /* Check if other is a Set */ + set_check_type(mrb, other); + + kset_t *self_set = set_get_kset(mrb, self); + kset_t *other_set = set_get_kset(mrb, other); + + /* Handle empty sets */ + if (kset_is_empty(other_set)) { + return mrb_true_value(); /* Empty set is a subset of any set */ + } + + if (kset_is_uninitialized(self_set)) { + return mrb_false_value(); /* Empty set is not a superset of a non-empty set */ + } + + /* Check size first - a superset must be at least as large as the subset */ + if (kset_size(self_set) < kset_size(other_set)) { + return mrb_false_value(); + } + + /* Check if all elements in other are in self */ + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(other_set, k) { + kset_iter_t self_k = kset_get(mrb, self_set, kset_key(other_set, k)); + if (kset_is_end(self_set, self_k)) { + return mrb_false_value(); /* Element in other not found in self */ + } + mrb_gc_arena_restore(mrb, ai); + } + + return mrb_true_value(); +} + +/* + * call-seq: + * set.proper_superset?(other) -> true or false + * set > other -> true or false + * + * Returns true if the set is a proper superset of the given set. + */ +static mrb_value +set_proper_superset_p(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + + /* Check if other is a Set */ + set_check_type(mrb, other); + + kset_t *self_set = set_get_kset(mrb, self); + kset_t *other_set = set_get_kset(mrb, other); + + /* Handle empty sets */ + if (kset_is_empty(other_set)) { + /* Empty set is a proper subset of any non-empty set */ + return !kset_is_empty(self_set) ? mrb_true_value() : mrb_false_value(); + } + + if (kset_is_uninitialized(self_set)) { + return mrb_false_value(); /* Empty set is not a proper superset of any set */ + } + + /* For a proper superset, self must be strictly larger than other */ + if (kset_size(self_set) <= kset_size(other_set)) { + return mrb_false_value(); + } + + /* Check if all elements in other are in self */ + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(other_set, k) { + kset_iter_t self_k = kset_get(mrb, self_set, kset_key(other_set, k)); + if (kset_is_end(self_set, self_k)) { + return mrb_false_value(); /* Element in other not found in self */ + } + mrb_gc_arena_restore(mrb, ai); + } + + return mrb_true_value(); +} + +/* + * call-seq: + * set.subset?(other) -> true or false + * set <= other -> true or false + * + * Returns true if the set is a subset of the given set. + */ +static mrb_value +set_subset_p(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + + /* Check if other is a Set */ + set_check_type(mrb, other); + + kset_t *self_set = set_get_kset(mrb, self); + kset_t *other_set = set_get_kset(mrb, other); + + /* Handle empty sets */ + if (kset_is_empty(self_set)) { + return mrb_true_value(); /* Empty set is a subset of any set */ + } + + if (kset_is_uninitialized(other_set)) { + return mrb_false_value(); /* Non-empty set is not a subset of an empty set */ + } + + /* Check size first - a subset cannot be larger than its superset */ + if (kset_size(other_set) < kset_size(self_set)) { + return mrb_false_value(); + } + + /* Check if all elements in self are in other */ + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(self_set, k) { + kset_iter_t other_k = kset_get(mrb, other_set, kset_key(self_set, k)); + if (kset_is_end(other_set, other_k)) { + return mrb_false_value(); /* Element in self not found in other */ + } + mrb_gc_arena_restore(mrb, ai); + } + + return mrb_true_value(); +} + +/* + * call-seq: + * set.proper_subset?(other) -> true or false + * set < other -> true or false + * + * Returns true if the set is a proper subset of the given set. + */ +static mrb_value +set_proper_subset_p(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + + /* Check if other is a Set */ + set_check_type(mrb, other); + + kset_t *self_set = set_get_kset(mrb, self); + kset_t *other_set = set_get_kset(mrb, other); + + /* Handle empty sets */ + if (kset_is_empty(self_set)) { + /* Empty set is a proper subset of any non-empty set */ + return !kset_is_empty(other_set) ? mrb_true_value() : mrb_false_value(); + } + + if (kset_is_uninitialized(other_set)) { + return mrb_false_value(); /* Non-empty set is not a proper subset of an empty set */ + } + + /* For a proper subset, self must be strictly smaller than other */ + if (kset_size(other_set) <= kset_size(self_set)) { + return mrb_false_value(); + } + + /* Check if all elements in self are in other */ + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(self_set, k) { + kset_iter_t other_k = kset_get(mrb, other_set, kset_key(self_set, k)); + if (kset_is_end(other_set, other_k)) { + return mrb_false_value(); /* Element in self not found in other */ + } + mrb_gc_arena_restore(mrb, ai); + } + + return mrb_true_value(); +} + +/* + * call-seq: + * set.intersect?(other) -> true or false + * + * Returns true if the set and the given set have at least one element in common. + */ +static mrb_value +set_intersect_p(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + + /* Check if other is a Set */ + set_check_type(mrb, other); + + kset_t *self_set = set_get_kset(mrb, self); + kset_t *other_set = set_get_kset(mrb, other); + + /* Handle empty sets */ + if (kset_is_empty(self_set) || kset_is_empty(other_set)) { + return mrb_false_value(); /* Empty sets have no elements in common */ + } + + /* Iterate through the smaller set for efficiency */ + int ai = mrb_gc_arena_save(mrb); + if (kset_size(self_set) < kset_size(other_set)) { + KSET_FOREACH(self_set, k) { + kset_iter_t other_k = kset_get(mrb, other_set, kset_key(self_set, k)); + if (!kset_is_end(other_set, other_k)) { + return mrb_true_value(); /* Found a common element */ + } + mrb_gc_arena_restore(mrb, ai); + } + } + else { + KSET_FOREACH(other_set, k) { + kset_iter_t self_k = kset_get(mrb, self_set, kset_key(other_set, k)); + if (!kset_is_end(self_set, self_k)) { + return mrb_true_value(); /* Found a common element */ + } + mrb_gc_arena_restore(mrb, ai); + } + } + + return mrb_false_value(); /* No common elements found */ +} + +/* + * call-seq: + * set.disjoint?(other) -> true or false + * + * Returns true if the set and the given set have no elements in common. + */ +static mrb_value +set_disjoint_p(mrb_state *mrb, mrb_value self) +{ + mrb_value result = set_intersect_p(mrb, self); + return mrb_bool_value(!mrb_test(result)); +} + +/* + * call-seq: + * set <=> other -> -1, 0, +1, or nil + * + * Compares this set with another set. + * Returns -1 if this set is a proper subset of the other set, + * +1 if this set is a proper superset of the other set, + * 0 if the sets are equal, + * or nil if the sets cannot be compared (they are neither subsets nor supersets). + */ +static mrb_value +set_cmp(mrb_state *mrb, mrb_value self) +{ + mrb_value other = mrb_get_arg1(mrb); + + if (!set_is_set(other)) { + return mrb_nil_value(); + } + + kset_t *self_set = set_get_kset(mrb, self); + kset_t *other_set = set_get_kset(mrb, other); + + /* Handle empty sets */ + if (kset_is_empty(self_set)) { + if (kset_is_empty(other_set)) { + return mrb_fixnum_value(0); /* Both empty, they're equal */ + } + return mrb_fixnum_value(-1); /* Empty set is a proper subset of any non-empty set */ + } + + if (kset_is_empty(other_set)) { + return mrb_fixnum_value(1); /* Any non-empty set is a proper superset of an empty set */ + } + + /* Compare sizes */ + mrb_int size_diff = kset_size(self_set) - kset_size(other_set); + + if (size_diff < 0) { + /* self might be a proper subset of other */ + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(self_set, k) { + kset_iter_t other_k = kset_get(mrb, other_set, kset_key(self_set, k)); + if (kset_is_end(other_set, other_k)) { + /* Not a subset */ + return mrb_nil_value(); /* Not comparable */ + } + mrb_gc_arena_restore(mrb, ai); + } + + /* All elements of self are in other, and self is smaller than other */ + return mrb_fixnum_value(-1); /* self is a proper subset of other */ + } + else if (size_diff > 0) { + /* self might be a proper superset of other */ + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(other_set, k) { + kset_iter_t self_k = kset_get(mrb, self_set, kset_key(other_set, k)); + if (kset_is_end(self_set, self_k)) { + /* Not a superset */ + return mrb_nil_value(); /* Not comparable */ + } + mrb_gc_arena_restore(mrb, ai); + } + + /* All elements of other are in self, and self is larger than other */ + return mrb_fixnum_value(1); /* self is a proper superset of other */ + } + else { /* size_diff == 0 */ + /* Same size, check if they're equal */ + mrb_bool is_equal = TRUE; + + int ai3 = mrb_gc_arena_save(mrb); + KSET_FOREACH(self_set, k) { + kset_iter_t other_k = kset_get(mrb, other_set, kset_key(self_set, k)); + if (kset_is_end(other_set, other_k)) { + is_equal = FALSE; + break; + } + mrb_gc_arena_restore(mrb, ai3); + } + + if (is_equal) { + return mrb_fixnum_value(0); /* Sets are equal */ + } + } + + /* Sets are not comparable */ + return mrb_nil_value(); +} + +/* + * call-seq: + * set.join(separator = nil) -> string + * + * Returns a string created by converting each element of the set to a string, + * separated by the given separator. + */ +static mrb_value +set_join(mrb_state *mrb, mrb_value self) +{ + mrb_value separator = mrb_nil_value(); + mrb_get_args(mrb, "|S", &separator); + + kset_t *set = set_get_kset(mrb, self); + if (kset_is_empty(set)) { + return mrb_str_new_lit(mrb, ""); + } + + /* Create result string */ + mrb_value result = mrb_str_new_capa(mrb, 64); /* Initial capacity */ + mrb_bool first = TRUE; + + /* Iterate through all elements */ + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(set, k) { + if (!first) { + if (!mrb_nil_p(separator)) { + mrb_str_cat(mrb, result, RSTRING_PTR(separator), RSTRING_LEN(separator)); + } + } + else { + first = FALSE; + } + + mrb_value elem = kset_key(set, k); + mrb_value str = mrb_obj_as_string(mrb, elem); + mrb_str_cat_str(mrb, result, str); + + mrb_gc_arena_restore(mrb, ai); + } + + return result; +} + +/* + * call-seq: + * set.inspect -> string + * set.to_s -> string + * + * Returns a string representation of the set. + * Format: Set[elem1, elem2, ...] + */ +static mrb_value +set_inspect(mrb_state *mrb, mrb_value self) +{ + struct RClass* c = mrb_obj_class(mrb, self); + const char* classname = mrb_class_name(mrb, c); + kset_t *set = set_get_kset(mrb, self); + + /* Handle empty set */ + if (kset_is_empty(set)) { + return mrb_format(mrb, "%s[]", classname); + } + + /* Handle recursive inspection */ + if (MRB_RECURSIVE_UNARY_P(mrb, MRB_SYM(inspect), self)) { + return mrb_format(mrb, "%s[...]", classname); + } + + /* Estimate buffer size based on set size */ + size_t size = kset_size(set); + size_t buffer_size = 16 + strlen(classname) + (size * 8); /* Rough estimate */ + + /* Create the beginning of the string with pre-allocated capacity */ + mrb_value result_str = mrb_str_new_capa(mrb, buffer_size); + mrb_str_cat_cstr(mrb, result_str, classname); + mrb_str_cat_lit(mrb, result_str, "["); + + /* Iterate through all elements */ + mrb_bool first = TRUE; + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(set, k) { + if (!first) { + mrb_str_cat_lit(mrb, result_str, ", "); + } + else { + first = FALSE; + } + + mrb_value elem = kset_key(set, k); + mrb_value entry_str = mrb_inspect(mrb, elem); + mrb_str_cat_str(mrb, result_str, entry_str); + + mrb_gc_arena_restore(mrb, ai); + } + + /* Add the closing part */ + mrb_str_cat_lit(mrb, result_str, "]"); + + return result_str; +} + +/* + * call-seq: + * set.reset -> self + * + * Resets the internal state after modification to existing elements. + * This is necessary when the hash value of objects in the set has changed. + * It rebuilds the hash table to ensure all elements can be found. + */ +static mrb_value +set_reset(mrb_state *mrb, mrb_value self) +{ + mrb_check_frozen_value(mrb, self); + + kset_t *set = set_get_kset(mrb, self); + if (!kset_is_empty(set)) { + kset_resize(mrb, set, kset_size(set)); + } + + return self; +} + +/* + * call-seq: + * set.add_all(*objects) -> self + * + * Adds multiple objects to the set and returns self. + */ +static mrb_value +set_add_all(mrb_state *mrb, mrb_value self) +{ + const mrb_value *argv; + mrb_int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + kset_t *set = set_get_kset(mrb, self); + set_ensure_initialized(mrb, set); + + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < argc; i++) { + kset_put(mrb, set, argv[i]); + mrb_field_write_barrier_value(mrb, kset_to_rset(set), argv[i]); + mrb_gc_arena_restore(mrb, ai); + } + + return self; +} + +/* + * Optimized implementation for flattening sets + * Uses a more efficient algorithm with minimal memory usage + */ + +/* Small array for tracking seen object IDs to detect cycles */ +#define MAX_NESTED_DEPTH 16 + +/* + * Recursively flattens a set by merging nested sets into the target set. + * This is an internal helper function that does not call back to the VM. + * + * @param mrb The mruby state + * @param target The target set table to add elements to + * @param source The source set table to flatten + * @param seen_count Pointer to the current count of seen sets (recursion depth) + * @return 0 on success, -1 if recursion depth exceeds maximum + */ +static int +set_flatten_recursive(mrb_state *mrb, kset_t *target, kset_t *source, int *seen_count) +{ + if (!source || !target) return 0; + if (*seen_count >= MAX_NESTED_DEPTH) return -1; + + struct RBasic *target_obj = kset_to_rset(target); + int ai = mrb_gc_arena_save(mrb); + /* Process each element in the source set */ + KSET_FOREACH(source, k) { + mrb_value elem = kset_key(source, k); + + /* Check if element is a Set */ + if (set_is_set(elem)) { + /* Increment recursion depth */ + (*seen_count)++; + + /* Recursively flatten the nested set */ + kset_t *nested_set = set_get_kset(mrb, elem); + if (nested_set) { + int nested_result = set_flatten_recursive(mrb, target, nested_set, seen_count); + if (nested_result < 0) { + return nested_result; /* Propagate error code */ + } + } + + /* Decrement recursion depth */ + (*seen_count)--; + } + else { + /* Add non-Set element directly */ + kset_put(mrb, target, elem); + mrb_field_write_barrier_value(mrb, target_obj, elem); + } + mrb_gc_arena_restore(mrb, ai); + } + return 0; +} + +/* + * Helper function: Check if a set has any nested sets + * Returns TRUE if nested sets found, FALSE otherwise + */ +static mrb_bool +set_has_nested_sets(mrb_state *mrb, kset_t *set) +{ + if (kset_is_empty(set)) return FALSE; + + int ai = mrb_gc_arena_save(mrb); + KSET_FOREACH(set, k) { + if (set_is_set(kset_key(set, k))) { + return TRUE; + } + mrb_gc_arena_restore(mrb, ai); + } + return FALSE; +} + +/* + * Helper function: Perform the actual flattening operation + * Returns the flattened set (creates a new kset_t*) + */ +static void +set_do_flatten(mrb_state *mrb, kset_t *result_set, kset_t *source_set) +{ + int seen_count = 0; + + if (set_flatten_recursive(mrb, result_set, source_set, &seen_count) < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "flatten recursion depth too deep"); + } +} + +/* + * call-seq: + * set.flatten -> new_set + * + * Returns a new set that is a flattened version of this set. + * Recursively flattens nested sets. + */ +static mrb_value +set_flatten(mrb_state *mrb, mrb_value self) +{ + kset_t *self_set = set_get_kset(mrb, self); + + /* Fast path for empty sets */ + if (kset_is_empty(self_set)) { + return mrb_obj_new(mrb, mrb_obj_class(mrb, self), 0, NULL); + } + + /* Fast path: check if there are any nested sets */ + if (!set_has_nested_sets(mrb, self_set)) { + return mrb_obj_dup(mrb, self); + } + + /* Create a new set and flatten into it */ + mrb_value result = mrb_obj_new(mrb, mrb_obj_class(mrb, self), 0, NULL); + kset_t *result_set = set_get_kset(mrb, result); + + set_do_flatten(mrb, result_set, self_set); + + return result; +} + +/* + * call-seq: + * set.flatten! -> self or nil + * + * Replaces the contents of this set with a flattened version of itself. + * Returns self if flattened, nil if no changes were made. + */ +static mrb_value +set_flatten_bang(mrb_state *mrb, mrb_value self) +{ + mrb_check_frozen_value(mrb, self); + + kset_t *self_set = set_get_kset(mrb, self); + if (kset_is_empty(self_set)) { + return mrb_nil_value(); /* No changes needed for empty set */ + } + + /* Check if there are any nested sets */ + if (!set_has_nested_sets(mrb, self_set)) { + return mrb_nil_value(); /* No nested sets, no changes needed */ + } + + /* Create a temporary set to flatten into (GC-protected) */ + mrb_value temp = mrb_obj_new(mrb, mrb_obj_class(mrb, self), 0, NULL); + kset_t *temp_set = set_get_kset(mrb, temp); + + set_do_flatten(mrb, temp_set, self_set); + + /* Swap the data between self and temp */ + kset_t temp_data = *self_set; + *self_set = *temp_set; + *temp_set = temp_data; + + return self; +} + +/* + * call-seq: + * set.delete_all(*objects) -> self + * + * Deletes multiple objects from the set and returns self. + */ +static mrb_value +set_delete_all(mrb_state *mrb, mrb_value self) +{ + const mrb_value *argv; + mrb_int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + kset_t *ks = set_get_kset(mrb, self); + if (kset_is_uninitialized(ks)) return self; + + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < argc; i++) { + kset_iter_t k = kset_get(mrb, ks, argv[i]); + if (!kset_is_end(ks, k)) { + kset_del(mrb, ks, k); + } + mrb_gc_arena_restore(mrb, ai); + } + + return self; +} + +/* + * call-seq: + * set.include_all?(*objects) -> true or false + * + * Returns true if the set contains all of the given objects. + */ +static mrb_value +set_include_all_p(mrb_state *mrb, mrb_value self) +{ + const mrb_value *argv; + mrb_int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + kset_t *ks = set_get_kset(mrb, self); + if (kset_is_uninitialized(ks)) return mrb_false_value(); + + for (mrb_int i = 0; i < argc; i++) { + kset_iter_t k = kset_get(mrb, ks, argv[i]); + if (kset_is_end(ks, k)) { + return mrb_false_value(); + } + } + + return mrb_true_value(); +} + +/* + * call-seq: + * set.include_any?(*objects) -> true or false + * + * Returns true if the set contains any of the given objects. + */ +static mrb_value +set_include_any_p(mrb_state *mrb, mrb_value self) +{ + const mrb_value *argv; + mrb_int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + kset_t *ks = set_get_kset(mrb, self); + if (kset_is_empty(ks)) return mrb_false_value(); + + for (mrb_int i = 0; i < argc; i++) { + kset_iter_t k = kset_get(mrb, ks, argv[i]); + if (!kset_is_end(ks, k)) { + return mrb_true_value(); + } + } + + return mrb_false_value(); +} + +/* + * call-seq: + * Set[*ary] -> new_set + * + * Creates a new set containing the given objects. + */ +static mrb_value +set_s_create(mrb_state *mrb, mrb_value klass) +{ + const mrb_value *argv; + mrb_int argc; + + mrb_get_args(mrb, "*", &argv, &argc); + + /* Optimized direct creation */ + mrb_value set = mrb_obj_new(mrb, mrb_class_ptr(klass), 0, NULL); + kset_t *ks = set_get_kset(mrb, set); + + for (mrb_int i = 0; i < argc; i++) { + kset_put(mrb, ks, argv[i]); + mrb_field_write_barrier_value(mrb, kset_to_rset(ks), argv[i]); + } + + return set; +} + +static const mrb_mt_entry set_rom_entries[] = { + MRB_MT_ENTRY(set_size, MRB_SYM(size), MRB_ARGS_NONE()), + MRB_MT_ENTRY(set_size, MRB_SYM(length), MRB_ARGS_NONE()), + MRB_MT_ENTRY(set_empty_p, MRB_SYM_Q(empty), MRB_ARGS_NONE()), + MRB_MT_ENTRY(set_clear, MRB_SYM(clear), MRB_ARGS_NONE()), + MRB_MT_ENTRY(set_to_a, MRB_SYM(to_a), MRB_ARGS_NONE()), + MRB_MT_ENTRY(set_include_p, MRB_SYM_Q(include), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_include_p, MRB_SYM_Q(member), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_include_p, MRB_OPSYM(eqq), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_add, MRB_SYM(add), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_add, MRB_OPSYM(lshift), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_add_p, MRB_SYM_Q(add), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_delete, MRB_SYM(delete), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_delete_p, MRB_SYM_Q(delete), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_init, MRB_SYM(__init), MRB_ARGS_NONE()), + MRB_MT_ENTRY(set_core_merge, MRB_SYM(__merge), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_core_subtract, MRB_SYM(__subtract), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_core_union, MRB_SYM(__union), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_core_difference, MRB_SYM(__difference), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_core_intersection, MRB_SYM(__intersection), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_core_xor, MRB_SYM(__xor), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_equal, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_hash_m, MRB_SYM(hash), MRB_ARGS_NONE()), + MRB_MT_ENTRY(set_join, MRB_SYM(join), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(set_inspect, MRB_SYM(inspect), MRB_ARGS_NONE()), + MRB_MT_ENTRY(set_inspect, MRB_SYM(to_s), MRB_ARGS_NONE()), + MRB_MT_ENTRY(set_reset, MRB_SYM(reset), MRB_ARGS_NONE()), + MRB_MT_ENTRY(set_add_all, MRB_SYM(add_all), MRB_ARGS_ANY()), + MRB_MT_ENTRY(set_delete_all, MRB_SYM(delete_all), MRB_ARGS_ANY()), + MRB_MT_ENTRY(set_include_all_p, MRB_SYM_Q(include_all), MRB_ARGS_ANY()), + MRB_MT_ENTRY(set_include_any_p, MRB_SYM_Q(include_any), MRB_ARGS_ANY()), + MRB_MT_ENTRY(set_superset_p, MRB_SYM_Q(superset), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_superset_p, MRB_OPSYM(ge), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_proper_superset_p, MRB_SYM_Q(proper_superset), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_proper_superset_p, MRB_OPSYM(gt), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_subset_p, MRB_SYM_Q(subset), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_subset_p, MRB_OPSYM(le), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_proper_subset_p, MRB_SYM_Q(proper_subset), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_proper_subset_p, MRB_OPSYM(lt), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_intersect_p, MRB_SYM_Q(intersect), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_disjoint_p, MRB_SYM_Q(disjoint), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_cmp, MRB_OPSYM(cmp), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(set_flatten, MRB_SYM(flatten), MRB_ARGS_NONE()), + MRB_MT_ENTRY(set_flatten_bang, MRB_SYM_B(flatten), MRB_ARGS_NONE()), +}; + +void +mrb_mruby_set_gem_init(mrb_state *mrb) +{ + struct RClass *set; + + set = mrb_define_class(mrb, "Set", mrb->object_class); + MRB_SET_INSTANCE_TT(set, MRB_TT_SET); + + mrb_include_module(mrb, set, mrb_module_get(mrb, "Enumerable")); + + mrb_define_class_method(mrb, set, "[]", set_s_create, MRB_ARGS_ANY()); + + mrb_define_private_method(mrb, set, "initialize_copy", set_init_copy, MRB_ARGS_REQ(1)); + + MRB_MT_INIT_ROM(mrb, set, set_rom_entries); + + mrb_define_alias(mrb, set, "eql?", "=="); +} + +void +mrb_mruby_set_gem_final(mrb_state *mrb) +{ +} diff --git a/mrbgems/mruby-set/test/set.rb b/mrbgems/mruby-set/test/set.rb new file mode 100644 index 0000000000..ca0add4c4f --- /dev/null +++ b/mrbgems/mruby-set/test/set.rb @@ -0,0 +1,764 @@ +## +## Set Test +## + +assert("Set.new") do + assert_nothing_raised { + Set.new() + Set.new(nil) + Set.new([]) + Set.new([1,2]) + Set.new(1..3) + } + assert_raise(ArgumentError) { Set.new(false) } + assert_raise(ArgumentError) { Set.new(1) } + assert_raise(ArgumentError) { Set.new(1,2) } + + ary = [2,4,6,4] + set = Set.new(ary) + ary.clear + assert_false set.empty? + assert_equal(3, set.size) + + ary = [1,2,3] + + s = Set.new(ary) { |o| o * 2 } + assert_equal([2,4,6], s.sort) +end + +assert("Set.[]") do + assert_nothing_raised { + Set[] + Set[nil] + Set[[]] + Set[[1,2]] + Set['a'..'c'] + Set[false] + Set[1] + Set[1,2] + } + + ary = [2,4,6,4] + set = Set[ary] + ary.clear + assert_false set.empty? + assert_equal([[]], set.to_a) +end + +assert("Set#clone") do + set1 = Set.new + set2 = set1.clone + + assert_false set1.equal?(set2) # assert_not_same + + assert_equal(set1, set2) + + set1 << 'abc' + + assert_equal(Set.new, set2) +end + +assert("Set#dup") do + set1 = Set[1,2] + set2 = set1.dup + + assert_false set1.equal?(set2) # assert_not_same + + assert_equal(set1, set2) + + set1 << 'abc' + + assert_equal(Set[1,2], set2) +end + +assert("Set#size") do + assert_equal(0, Set[].size) + assert_equal(1, Set[nil].size) + assert_equal(1, Set[[]].size) + assert_equal(1, Set[[nil]].size) +end + +assert("Set#empty?") do + assert_true Set[].empty? + assert_false Set[1,2].empty? +end + +assert("Set#clear") do + set = Set[1,2] + ret = set.clear + + assert_true set.equal?(ret) # assert_same + assert_true set.empty? +end + +assert("Set#replace") do + set = Set[1,2] + ret = set.replace(['a','b','c']) + + assert_true set.equal?(ret) # assert_same + assert_equal(Set['a','b','c'], set) + + + set = Set[1,2] + ret = set.replace(Set['a','b','c']) + + assert_true set.equal?(ret) # assert_same + assert_equal(Set['a','b','c'], set) +end + +assert("Set#to_a") do + set = Set[1,2,3,2] + ary = set.to_a + + assert_equal([1,2,3], ary.sort) +end + +assert("Set#flatten") do + # test1 + set1 = Set[ + 1, + Set[ + 5, + Set[7, + Set[0] + ], + Set[6,2], + 1 + ], + 3, + Set[3,4] + ] + + set2 = set1.flatten + set3 = Set.new(0..7) + + assert_false set1.equal?(set2) # assert_not_same + assert_equal(set3, set2) + + + # test2; multiple occurrences of a set in an set + set1 = Set[1, 2] + set2 = Set[set1, Set[set1, 4], 3] + + assert_nothing_raised { + set3 = set2.flatten + } + + assert_equal(Set.new(1..4), set3) + + + # test3; recursion + set2 = Set[] + set1 = Set[1, set2] + set2.add(set1) + + assert_raise(ArgumentError) { + set1.flatten + } + + # test4; miscellaneous + empty = Set[] + set = Set[Set[empty, "a"], Set[empty, "b"]] + + assert_nothing_raised { + set.flatten + } +end + +assert("Set#flatten!") do + # test1 + set1 = Set[ + 1, + Set[ + 5, + Set[7, + Set[0] + ], + Set[6,2], + 1 + ], + 3, + Set[3,4] + ] + + set3 = Set.new(0..7) + orig_set1 = set1 + set1.flatten! + + assert_true orig_set1.equal?(set1) # assert_same + assert_equal(set3, set1) + + + # test2; multiple occurrences of a set in an set + set1 = Set[1, 2] + set2 = Set[set1, Set[set1, 4], 3] + + assert_nothing_raised { + set2.flatten! + } + + assert_equal(Set.new(1..4), set2) + + + # test3; recursion + set2 = Set[] + set1 = Set[1, set2] + set2.add(set1) + + assert_raise(ArgumentError) { + set1.flatten! + } + + # test4; miscellaneous + assert_nil(Set.new(0..31).flatten!) + + x = Set[Set[],Set[1,2]].flatten! + y = Set[1,2] + + assert_equal(x, y) +end + +assert("Set#include?") do + set = Set[1,2,3] + + assert_true set.include?(1) + assert_true set.include?(2) + assert_true set.include?(3) + assert_false set.include?(0) + assert_false set.include?(nil) + + set = Set["1",nil,"2",nil,"0","1",false] + assert_true set.include?(nil) + assert_true set.include?(false) + assert_true set.include?("1") + assert_false set.include?(0) + assert_false set.include?(true) + assert_false set.include?(2) +end + +assert("Set#superset?") do + set = Set[1,2,3] + + assert_raise(ArgumentError) { set.superset?(nil) } + assert_raise(ArgumentError) { set.superset?(2) } + assert_raise(ArgumentError) { set.superset?([2]) } + + assert_true set.superset?(Set[]) + assert_true set.superset?(Set[1,2]) + assert_true set.superset?(Set[1,2,3]) + assert_false set.superset?(Set[1,2,3,4]) + assert_false set.superset?(Set[1,4]) + + assert_true set >= Set[1,2] + assert_true set >= Set[1,2,3] + + assert_true Set[].superset?(Set[]) +end + +assert("Set#proper_superset?") do + set = Set[1,2,3] + + assert_raise(ArgumentError) { set.proper_superset?(nil) } + assert_raise(ArgumentError) { set.proper_superset?(2) } + assert_raise(ArgumentError) { set.proper_superset?([2]) } + + assert_true set.proper_superset?(Set[]) + assert_true set.proper_superset?(Set[1,2]) + assert_false set.proper_superset?(Set[1,2,3]) + assert_false set.proper_superset?(Set[1,2,3,4]) + assert_false set.proper_superset?(Set[1,4]) + + assert_true set > Set[1,2] + assert_false set > Set[1,2,3] + + assert_false Set[].proper_superset?(Set[]) +end + +assert("Set#subset?") do + set = Set[1,2,3] + + assert_raise(ArgumentError) { set.subset?(nil) } + assert_raise(ArgumentError) { set.subset?(2) } + assert_raise(ArgumentError) { set.subset?([2]) } + + assert_true set.subset?(Set[1,2,3,4]) + assert_true set.subset?(Set[1,2,3]) + assert_false set.subset?(Set[1,2]) + assert_false set.subset?(Set[]) + + assert_true set <= Set[1,2,3] + assert_false set <= Set[1,2] + + assert_true Set[].subset?(Set[1]) + assert_true Set[].subset?(Set[]) +end + +assert("Set#proper_subset?") do + set = Set[1,2,3] + + assert_raise(ArgumentError) { set.proper_subset?(nil) } + assert_raise(ArgumentError) { set.proper_subset?(2) } + assert_raise(ArgumentError) { set.proper_subset?([2]) } + + assert_true set.proper_subset?(Set[1,2,3,4]) + assert_false set.proper_subset?(Set[1,2,3]) + assert_false set.proper_subset?(Set[1,2]) + assert_false set.proper_subset?(Set[]) + + assert_true set < Set[1,2,3,4] + assert_false set < Set[1,2,3] + + assert_true Set[].proper_subset?(Set[1]) + assert_false Set[].proper_subset?(Set[]) +end + +assert("Set#intersect?") do + set = Set[3,4,5] + + assert_raise(ArgumentError) { set.intersect?(3) } + assert_raise(ArgumentError) { set.intersect?([2,4,6]) } + + assert_true set.intersect?(set) + assert_true set.intersect?(Set[2,4]) + assert_true set.intersect?(Set[5,6,7]) + assert_true set.intersect?(Set[1,2,6,8,4]) + + assert_false(set.intersect?(Set[])) + assert_false(set.intersect?(Set[0,2])) + assert_false(set.intersect?(Set[0,2,6])) + assert_false(set.intersect?(Set[0,2,6,8,10])) + + # Make sure set hasn't changed + assert_equal(Set[3,4,5], set) +end + +assert("Set#disjoint?") do + set = Set[3,4,5] + + assert_raise(ArgumentError) { set.disjoint?(3) } + assert_raise(ArgumentError) { set.disjoint?([2,4,6]) } + + assert_true(set.disjoint?(Set[])) + assert_true(set.disjoint?(Set[0,2])) + assert_true(set.disjoint?(Set[0,2,6])) + assert_true(set.disjoint?(Set[0,2,6,8,10])) + + assert_false set.disjoint?(set) + assert_false set.disjoint?(Set[2,4]) + assert_false set.disjoint?(Set[5,6,7]) + assert_false set.disjoint?(Set[1,2,6,8,4]) + + # Make sure set hasn't changed + assert_equal(Set[3,4,5], set) +end + +assert("Set#each") do + ary = [1,3,5,7,10,20] + set = Set.new(ary) + + ret = set.each { |o| } + assert_true set.equal?(ret) # assert_same + + e = set.each + assert_true e.instance_of?(Enumerator) + + assert_nothing_raised { + set.each { |o| + ary.delete(o) or raise "unexpected element: #{o}" + } + ary.empty? or raise "forgotten elements: #{ary.join(', ')}" + } +end + +assert("Set#add") do + set = Set[1,2,3] + + ret = set.add(2) + assert_true set.equal?(ret) # assert_same + assert_equal(Set[1,2,3], set) + + ret = set.add(4) + assert_true set.equal?(ret) # assert_same + assert_equal(Set[1,2,3,4], set) +end + +assert("Set#add?") do + set = Set[1,2,3] + + ret = set.add?(2) + assert_nil ret + assert_equal(Set[1,2,3], set) + + ret = set.add?(4) + assert_true set.equal?(ret) # assert_same + assert_equal(Set[1,2,3,4], set) +end + +assert("Set#delete") do + set = Set[1,2,3] + + ret = set.delete(4) + assert_true set.equal?(ret) # assert_same + assert_equal(Set[1,2,3], set) + + ret = set.delete(2) + assert_true set.equal?(ret) # assert_same + assert_equal(Set[1,3], set) +end + +assert("Set#delete?") do + set = Set[1,2,3] + + ret = set.delete?(4) + assert_nil ret + assert_equal(Set[1,2,3], set) + + ret = set.delete?(1) + assert_true set.equal?(ret) # assert_same + assert_equal(Set[2,3], set) +end + +assert("Set#delete_if") do + set = Set.new(1..10) + ret = set.delete_if { |i| i > 10 } + assert_true set.equal?(ret) # assert_same + assert_equal(Set.new(1..10), set) + + set = Set.new(1..10) + ret = set.delete_if { |i| i % 3 == 0 } + assert_true set.equal?(ret) # assert_same + assert_equal(Set[1,2,4,5,7,8,10], set) +end + +assert("Set#keep_if") do + set = Set.new(1..10) + ret = set.keep_if { |i| i <= 10 } + assert_true set.equal?(ret) # assert_same + assert_equal(Set.new(1..10), set) + + set = Set.new(1..10) + ret = set.keep_if { |i| i % 3 != 0 } + assert_true set.equal?(ret) # assert_same + assert_equal(Set[1,2,4,5,7,8,10], set) +end + +assert("Set#collect!") do + set = Set[1,2,3,'a','b','c',-1..1,2..4] + + ret = set.collect! { |i| + case i + when Numeric + i * 2 + when String + i.upcase + else + nil + end + } + + assert_true set.equal?(ret) # assert_same + assert_equal(Set[2,4,6,"A","B","C",nil], set) +end + +assert("Set#reject!") do + set = Set.new(1..10) + + ret = set.reject! { |i| i > 10 } + assert_nil(ret) + assert_equal(Set.new(1..10), set) + + ret = set.reject! { |i| i % 3 == 0 } + assert_true set.equal?(ret) # assert_same + assert_equal(Set[1,2,4,5,7,8,10], set) +end + +# this test is not in CRuby +assert("Set#select!") do + set = Set.new(1..10) + + ret = set.select! { |i| i <= 10 } + assert_nil(ret) + assert_equal(Set.new(1..10), set) + + ret = set.select! { |i| i % 3 != 0 } + assert_true set.equal?(ret) # assert_same + assert_equal(Set[1,2,4,5,7,8,10], set) +end + +assert("Set#merge") do + set = Set[1,2,3] + + ret = set.merge([2,4,6]) + assert_true set.equal?(ret) # assert_same + assert_equal(Set[1,2,3,4,6], set) +end + +assert("Set#subtract") do + set = Set[1,2,3] + + ret = set.subtract([2,4,6]) + assert_true set.equal?(ret) # assert_same + assert_equal(Set[1,3], set) +end + +assert("Set#+") do + set = Set[1,2,3] + + ret = set + [2,4,6] + assert_false set.equal?(ret) # assert_not_same + assert_equal(Set[1,2,3,4,6], ret) + assert_equal(Set[1,2,3], set, "original set should not be modified") + + # Set + Set + set2 = Set[3, 4, 5] + assert_equal(Set[1, 2, 3, 4, 5], set + set2) + + # Set + empty Set + assert_equal(set, set + Set[]) + + # empty Set + Set + assert_equal(set, Set[] + set) + + # Set + Array with no common elements + assert_equal(Set[1, 2, 3, 4, 5], set + [4, 5]) + + # Set + self + assert_equal(set, set + set) + + # with various object types + s1 = Set["a", "b", "c"] + s2 = Set["c", "d", "e"] + assert_equal(Set["a", "b", "c", "d", "e"], s1 + s2) +end + +assert("Set#-") do + set = Set[1,2,3] + + ret = set - [2,4,6] + assert_false set.equal?(ret) # assert_not_same + assert_equal(Set[1,3], ret) + assert_equal(Set[1,2,3], set, "original set should not be modified") + + # Set - Set + set2 = Set[3, 4, 5] + assert_equal(Set[1, 2], set - set2) + + # Set - empty Set + assert_equal(set, set - Set[]) + + # empty Set - Set + assert_equal(Set[], Set[] - set) + + # Set - Array with no common elements + assert_equal(set, set - [4, 5]) + + # Set - self + assert_equal(Set[], set - set) + + # with various object types + s1 = Set["a", "b", "c"] + s2 = Set["b", "c", "d"] + assert_equal(Set["a"], s1 - s2) +end + +assert("Set#&") do + set = Set[1,2,3,4] + + ret = set & [2,4,6] + assert_false set.equal?(ret) # assert_not_same + assert_equal(Set[2,4], ret) + assert_equal(Set[1, 2, 3, 4], set, "original set should not be modified") + + # Set & Set + set2 = Set[3, 4, 5] + assert_equal(Set[3, 4], set & set2) + + # Set & empty Set + assert_equal(Set[], set & Set[]) + + # empty Set & Set + assert_equal(Set[], Set[] & set) + + # Set & Array with no common elements + assert_equal(Set[], set & [5, 6, 7]) + + # Set & self + assert_equal(set, set & set) + + # with various object types + s1 = Set["a", "b", "c"] + s2 = Set["b", "c", "d"] + assert_equal(Set["b", "c"], s1 & s2) +end + +assert("Set#^") do + set = Set[1,2,3,4] + + ret = set ^ [2,4,5,5] + assert_false set.equal?(ret) # assert_not_same + assert_equal(Set[1,3,5], ret) +end + +assert("Set#==") do + set1 = Set[2,3,1] + set2 = Set[1,2,3] + + assert_equal(set1, set1) + assert_equal(set1, set2) + assert_not_equal(Set[1], [1]) + + set1 = Class.new(Set)["a", "b"] + set2 = Set["a", "b", set1] + set1 = set1.add(set1.clone) + + assert_equal(set2, set2.clone) + assert_equal(set1.clone, set1) +end + +assert("Set#classify") do + set = Set.new(1..10) + ret = set.classify { |i| i % 3 } + + assert_equal(3, ret.size) + assert_equal(Hash, ret.class) + ret.each_value { |v| assert_equal(Set, v.class) } + assert_equal(Set[3,6,9], ret[0]) + assert_equal(Set[1,4,7,10], ret[1]) + assert_equal(Set[2,5,8], ret[2]) +end + +assert("Set#divide") do + # arity is 1 + set = Set.new(1..10) + ret = set.divide { |i| i % 3 } + + assert_equal(3, ret.size) + n = 0 + ret.each { |s| n += s.size } + assert_equal(set.size, n) + assert_equal(set, ret.flatten) + + assert_equal(Set, ret.class) + assert_true(ret.include?(Set[3,6,9])) + assert_true(ret.include?(Set[1,4,7,10])) + assert_true(ret.include?(Set[2,5,8])) + + + # arity is 2 + set = Set[7,10,5,11,1,3,4,9,0] + assert_raise(NotImplementedError) { + ret = set.divide { |a, b| (a - b).abs == 1 } + } + + # assert_equal(4, ret.size) + # n = 0 + # ret.each { |s| n += s.size } + # assert_equal(set.size, n) + # assert_equal(set, ret.flatten) + + # assert_equal(Set, ret.class) +end + +# freeze is not implemented yet +#assert("freeze") do +# orig = set = Set[1,2,3] +# assert_equal false, set.frozen? +# set << 4 +# assert_same orig, set.freeze +# assert_equal true, set.frozen? +# assert_raise(RuntimeError) { +# set << 5 +# } +# assert_equal 4, set.size +#end +# assert("freeze_dup") do +# set1 = Set[1,2,3] +# set1.freeze +# set2 = set1.dup +# +# assert_not_predicate set2, :frozen? +# assert_nothing_raised { +# set2.add 4 +# } +# end +# assert("reeze_clone") do +# set1 = Set[1,2,3] +# set1.freeze +# set2 = set1.clone +# +# assert_predicate set2, :frozen? +# assert_raise(RuntimeError) { +# set2.add 5 +# } +# end +# +assert("Set#inspect") do + set = Set[1,1,1] + assert_equal("Set[1]", set.inspect) +end + +assert("Set operations with custom objects") do + class MySettable + attr_reader :val + def initialize(val) + @val = val + end + + def hash + @val.hash + end + + def eql?(other) + other.is_a?(self.class) && self.val.eql?(other.val) + end + + def ==(other) + eql?(other) + end + + def to_s + "MySettable(#{@val})" + end + alias inspect to_s + end + + obj1 = MySettable.new(1) + obj2 = MySettable.new(2) + obj3 = MySettable.new(3) + obj4 = MySettable.new(4) + + set1 = Set[obj1, obj2] + set2 = Set[obj2, obj3] + + # Test for Set#+ + set_union = set1 + set2 + assert_equal(3, set_union.size) + assert_true(set_union.include?(obj1)) + assert_true(set_union.include?(obj2)) + assert_true(set_union.include?(obj3)) + + # Test for Set#- + set_diff = set1 - set2 + assert_equal(1, set_diff.size) + assert_equal(obj1, set_diff.to_a[0]) + + # Test for Set#& + set_intersect = set1 & set2 + assert_equal(1, set_intersect.size) + assert_equal(obj2, set_intersect.to_a[0]) + + # Test with an array of objects + arr = [obj2, obj4] + set_intersect_arr = set1 & arr + assert_equal(1, set_intersect_arr.size) + assert_equal(obj2, set_intersect_arr.to_a[0]) +end + +assert("Set#hash") do + set = Set[1, 2, 3] + assert_kind_of(Integer, set.hash) + hash = set.hash + assert_equal(hash, Set[3, 1, 2].hash) + assert_not_equal(hash, Set[1, 2, 4].hash) + assert_not_equal(hash, Set[].hash) +end diff --git a/mrbgems/mruby-sleep/README.md b/mrbgems/mruby-sleep/README.md index ed9c2730ab..d1492aeadd 100644 --- a/mrbgems/mruby-sleep/README.md +++ b/mrbgems/mruby-sleep/README.md @@ -4,7 +4,7 @@ mruby sleep module ## Install by mrbgems -* add `conf.gem` line to your build configuration. +- add `conf.gem` line to your build configuration. ```ruby MRuby::Build.new do |conf| @@ -26,4 +26,4 @@ usleep(10000) under the MIT License: -* +- diff --git a/mrbgems/mruby-sleep/src/sleep.c b/mrbgems/mruby-sleep/src/sleep.c index d05bc1944d..292564de7f 100644 --- a/mrbgems/mruby-sleep/src/sleep.c +++ b/mrbgems/mruby-sleep/src/sleep.c @@ -36,11 +36,28 @@ #include #endif -#include "mruby.h" +#include -/* not implemented forever sleep (called without an argument)*/ +/* + * call-seq: + * sleep(duration) -> integer + * + * Suspends the current thread for duration seconds (which may be any number, + * including a Float with fractional seconds if floating point is enabled). + * Returns the actual number of seconds slept (rounded), which may be less than + * that asked for if another thread calls Thread#run. Zero arguments causes + * sleep to sleep forever. + * + * Time.new #=> 2008-03-08 19:56:19 +0900 + * sleep 1.2 #=> 1 + * Time.new #=> 2008-03-08 19:56:20 +0900 + * sleep 1.9 #=> 2 + * Time.new #=> 2008-03-08 19:56:22 +0900 + * + * Note: Forever sleep (called without an argument) is not implemented. + */ static mrb_value -mrb_f_sleep(mrb_state *mrb, mrb_value self) +f_sleep(mrb_state *mrb, mrb_value self) { time_t beg = time(0); time_t end; @@ -60,7 +77,8 @@ mrb_f_sleep(mrb_state *mrb, mrb_value self) mrb_get_args(mrb, "i", &sec); if (sec >= 0) { sleep(sec); - } else { + } + else { mrb_raise(mrb, E_ARGUMENT_ERROR, "time interval must not be negative"); } #endif @@ -70,10 +88,26 @@ mrb_f_sleep(mrb_state *mrb, mrb_value self) } /* mruby special; needed for mruby without float numbers */ +/* + * call-seq: + * usleep(microseconds) -> 0 + * + * Suspends the current thread for microseconds microseconds (which should be + * an integer). This provides microsecond-level precision for short delays. + * Returns 0 on successful completion. + * + * usleep(500000) # Sleep for 0.5 seconds (500,000 microseconds) + * usleep(1000) # Sleep for 1 millisecond (1,000 microseconds) + * usleep(100) # Sleep for 100 microseconds + * + * Note: This function is useful for precise timing in embedded systems + * where sub-second delays are required. + */ static mrb_value -mrb_f_usleep(mrb_state *mrb, mrb_value self) +f_usleep(mrb_state *mrb, mrb_value self) { mrb_int usec; + time_t slp_tm; #ifdef _WIN32 FILETIME st_ft,ed_ft; unsigned __int64 st_time = 0; @@ -81,7 +115,6 @@ mrb_f_usleep(mrb_state *mrb, mrb_value self) #else struct timeval st_tm,ed_tm; #endif - time_t slp_tm; #ifdef _WIN32 GetSystemTimeAsFileTime(&st_ft); @@ -94,7 +127,8 @@ mrb_f_usleep(mrb_state *mrb, mrb_value self) if (usec >= 0) { usleep(usec); - } else { + } + else { mrb_raise(mrb, E_ARGUMENT_ERROR, "time interval must not be negative"); } @@ -108,7 +142,7 @@ mrb_f_usleep(mrb_state *mrb, mrb_value self) ed_time <<=32; ed_time |= ed_ft.dwLowDateTime; - slp_tm = (ed_time - st_time) / 10; + slp_tm = (time_t)((ed_time - st_time) / 10); #else gettimeofday(&ed_tm, NULL); @@ -123,13 +157,29 @@ mrb_f_usleep(mrb_state *mrb, mrb_value self) return mrb_fixnum_value((mrb_int)slp_tm); } +/* + * Initializes the mruby-sleep gem by defining sleep and usleep methods + * as private methods in the Kernel module, making them available globally. + * + * - sleep: requires 1 argument (duration in seconds), supports floating point + * when MRB_NO_FLOAT is not defined, otherwise uses integer seconds + * - usleep: requires 1 argument (duration in microseconds), integer only + * + * Both methods provide thread suspension capabilities for timing control + * in embedded Ruby environments with cross-platform support (Windows/Unix). + */ void mrb_mruby_sleep_gem_init(mrb_state *mrb) { - mrb_define_method(mrb, mrb->kernel_module, "sleep", mrb_f_sleep, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, mrb->kernel_module, "usleep", mrb_f_usleep, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, mrb->kernel_module, MRB_SYM(sleep), f_sleep, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, mrb->kernel_module, MRB_SYM(usleep), f_usleep, MRB_ARGS_REQ(1)); } +/* + * Finalizes the mruby-sleep gem. Currently no cleanup is required + * as the sleep/usleep implementation uses system calls without + * persistent state or allocated resources. + */ void mrb_mruby_sleep_gem_final(mrb_state *mrb) { diff --git a/mrbgems/mruby-socket/README.md b/mrbgems/mruby-socket/README.md index 8084cdb70e..f7ff8ca320 100644 --- a/mrbgems/mruby-socket/README.md +++ b/mrbgems/mruby-socket/README.md @@ -20,17 +20,17 @@ Date: Tue, 21 May 2013 04:31:30 GMT ## Requirement -* [mruby-io](https://github.com/mruby/mruby/tree/master/mrbgems/mruby-io) mrbgem -* [iij/mruby-mtest](https://github.com/iij/mruby-mtest) mrgbem to run tests -* system must have RFC3493 basic socket interface -* and some POSIX API... +- [mruby-io](../mruby-io) mrbgem +- [iij/mruby-mtest](https://github.com/iij/mruby-mtest) mrbgem to run tests +- system must have RFC3493 basic socket interface +- and some POSIX API... ## TODO -* add missing methods -* write more tests -* fix possible descriptor leakage (see XXX comments) -* `UNIXSocket#recv_io` `UNIXSocket#send_io` +- add missing methods +- write more tests +- fix possible descriptor leakage (see XXX comments) +- `UNIXSocket#recv_io` `UNIXSocket#send_io` ## License diff --git a/mrbgems/mruby-socket/include/socket_hal.h b/mrbgems/mruby-socket/include/socket_hal.h new file mode 100644 index 0000000000..4ad75e88e7 --- /dev/null +++ b/mrbgems/mruby-socket/include/socket_hal.h @@ -0,0 +1,102 @@ +/* +** socket_hal.h - Socket HAL (Hardware Abstraction Layer) interface +** +** See Copyright Notice in mruby.h +** +** This header defines the platform-independent socket HAL interface. +** Platform-specific implementations are provided by HAL gems: +** - hal-posix-socket: POSIX socket implementation (Linux, macOS, BSD, Unix) +** - hal-win-socket: Windows socket implementation (Windows, MinGW) +*/ + +#ifndef MRUBY_SOCKET_HAL_H +#define MRUBY_SOCKET_HAL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Socket HAL Initialization/Finalization + */ + +/* Initialize socket subsystem (e.g., WSAStartup on Windows) */ +void mrb_hal_socket_init(mrb_state *mrb); + +/* Finalize socket subsystem (e.g., WSACleanup on Windows) */ +void mrb_hal_socket_final(mrb_state *mrb); + +/* + * Error Handling + */ + +/* Translate the most recent socket-API error into a POSIX errno value and + * store it in errno. On Windows, this reads WSAGetLastError() and maps it; + * on POSIX, this is a no-op (errno is already set by the failed call). + * Call this immediately after a socket-API failure, before mrb_sys_fail. */ +void mrb_hal_socket_set_errno_from_last_error(void); + +/* + * Socket Control Operations + */ + +/* Set non-blocking mode on socket + * Returns 0 on success, -1 on error (sets errno) */ +int mrb_hal_socket_set_nonblock(mrb_state *mrb, int fd, int nonblock); + +/* + * Address Conversion Functions + */ + +/* Convert network address to presentation format (string) + * af: address family (AF_INET, AF_INET6) + * src: network address in binary form + * dst: buffer for string result + * size: size of dst buffer + * Returns: dst on success, NULL on error */ +const char* mrb_hal_socket_inet_ntop(int af, const void *src, char *dst, size_t size); + +/* Convert presentation format (string) to network address + * af: address family (AF_INET, AF_INET6) + * src: string representation of address + * dst: buffer for network address result + * Returns: 1 on success, 0 if src is not valid, -1 on error */ +int mrb_hal_socket_inet_pton(int af, const char *src, void *dst); + +/* + * Platform-Specific Socket Features + */ + +/* Create Unix domain socket address structure + * path: Unix socket path + * Returns: packed sockaddr string, or raises exception if not supported */ +mrb_value mrb_hal_socket_sockaddr_un(mrb_state *mrb, const char *path, size_t pathlen); + +/* Create a pair of connected sockets + * domain: address family (e.g., AF_UNIX) + * type: socket type (e.g., SOCK_STREAM) + * protocol: protocol (usually 0) + * sv: array to receive the two socket descriptors + * Returns: 0 on success, -1 on error (sets errno) */ +int mrb_hal_socket_socketpair(mrb_state *mrb, int domain, int type, int protocol, int sv[2]); + +/* Get Unix socket path from sockaddr + * Returns: Unix socket path string, or raises exception if not supported */ +mrb_value mrb_hal_socket_unix_path(mrb_state *mrb, const char *sockaddr, size_t socklen); + +/* Enumerate local IP addresses for all network interfaces. + * Returns an Array of String values, each a binary sockaddr_in (AF_INET) or + * sockaddr_in6 (AF_INET6) ready to be passed to Addrinfo.new. Loopback, + * link-local, and other interface-local addresses are included; the caller + * is responsible for filtering if needed. + * Raises a SystemCallError on failure of the underlying platform call + * (getifaddrs / GetAdaptersAddresses). */ +mrb_value mrb_hal_socket_ip_address_list(mrb_state *mrb); + +#ifdef __cplusplus +} +#endif + +#endif /* MRUBY_SOCKET_HAL_H */ diff --git a/mrbgems/mruby-socket/mrbgem.rake b/mrbgems/mruby-socket/mrbgem.rake index dba00c646a..3775ec57f8 100644 --- a/mrbgems/mruby-socket/mrbgem.rake +++ b/mrbgems/mruby-socket/mrbgem.rake @@ -1,17 +1,17 @@ MRuby::Gem::Specification.new('mruby-socket') do |spec| spec.license = 'MIT' - spec.authors = ['Internet Initiative Japan', 'mruby developers'] + spec.authors = ['Internet Initiative Japan Inc.', 'mruby developers'] spec.summary = 'standard socket class' #spec.cc.defines << "HAVE_SA_LEN=0" - # If Windows, use winsock + spec.add_dependency('mruby-io', :core => 'mruby-io') + spec.add_dependency('mruby-error', :core => 'mruby-error') + # spec.add_dependency('mruby-mtest') + if spec.for_windows? spec.linker.libraries << "wsock32" spec.linker.libraries << "ws2_32" + spec.linker.libraries << "iphlpapi" # for GetAdaptersAddresses (Socket.ip_address_list) end - - spec.add_dependency('mruby-io', :core => 'mruby-io') - spec.add_dependency('mruby-pack', :core => 'mruby-pack') - # spec.add_dependency('mruby-mtest') end diff --git a/mrbgems/mruby-socket/mrblib/socket.rb b/mrbgems/mruby-socket/mrblib/socket.rb index 0e7bbdcc1c..ce9b2a31b4 100644 --- a/mrbgems/mruby-socket/mrblib/socket.rb +++ b/mrbgems/mruby-socket/mrblib/socket.rb @@ -1,4 +1,15 @@ class Addrinfo + # + # call-seq: + # Addrinfo.new(sockaddr, family=Socket::PF_UNSPEC, socktype=0, protocol=0) -> addrinfo + # + # Creates a new Addrinfo object from socket address information. + # sockaddr can be a packed sockaddr string or an array representation. + # + # Addrinfo.new(Socket.sockaddr_in(80, "127.0.0.1")) + # Addrinfo.new(["AF_INET", 80, "localhost", "127.0.0.1"]) + # Addrinfo.new(["AF_UNIX", "/tmp/socket"]) + # def initialize(sockaddr, family=Socket::PF_UNSPEC, socktype=0, protocol=0) @hostname = nil if sockaddr.is_a? Array @@ -19,39 +30,101 @@ def initialize(sockaddr, family=Socket::PF_UNSPEC, socktype=0, protocol=0) end @socktype = socktype @protocol = protocol - @canonname = nil end + # + # call-seq: + # Addrinfo.foreach(nodename, service, family=nil, socktype=nil, protocol=nil, flags=0) { |addrinfo| block } -> array + # + # Iterates over all address information for the given nodename and service. + # Returns an array of Addrinfo objects. + # + # Addrinfo.foreach("www.example.com", "http") { |ai| puts ai.ip_address } + # Addrinfo.foreach("localhost", 80) { |ai| puts ai.inspect } + # def self.foreach(nodename, service, family=nil, socktype=nil, protocol=nil, flags=0, &block) a = self.getaddrinfo(nodename, service, family, socktype, protocol, flags) - a.each { |ai| block.call(ai) } + a.each {|ai| block.call(ai) } a end + # + # call-seq: + # Addrinfo.ip(host) -> addrinfo + # + # Creates an Addrinfo object for the given host with port 0. + # Useful for creating address info without specifying a port. + # + # Addrinfo.ip("127.0.0.1") #=> # + # Addrinfo.ip("::1") #=> # + # def self.ip(host) Addrinfo.new(Socket.sockaddr_in(0, host)) end + # + # call-seq: + # Addrinfo.tcp(host, port) -> addrinfo + # + # Creates an Addrinfo object for TCP connection to the given host and port. + # + # Addrinfo.tcp("localhost", 80) #=> # + # Addrinfo.tcp("www.example.com", 443) #=> # + # def self.tcp(host, port) Addrinfo.getaddrinfo(host, port, nil, Socket::SOCK_STREAM, Socket::IPPROTO_TCP)[0] end + # + # call-seq: + # Addrinfo.udp(host, port) -> addrinfo + # + # Creates an Addrinfo object for UDP connection to the given host and port. + # + # Addrinfo.udp("localhost", 53) #=> # + # Addrinfo.udp("8.8.8.8", 53) #=> # + # def self.udp(host, port) Addrinfo.getaddrinfo(host, port, nil, Socket::SOCK_DGRAM, Socket::IPPROTO_UDP)[0] end + # + # call-seq: + # Addrinfo.unix(path, socktype=Socket::SOCK_STREAM) -> addrinfo + # + # Creates an Addrinfo object for Unix domain socket at the given path. + # + # Addrinfo.unix("/tmp/socket") #=> # + # Addrinfo.unix("/var/run/daemon.sock", Socket::SOCK_DGRAM) #=> # + # def self.unix(path, socktype=Socket::SOCK_STREAM) Addrinfo.new(Socket.sockaddr_un(path), Socket::AF_UNIX, socktype) end + # + # call-seq: + # addrinfo.afamily -> integer + # + # Returns the address family of the socket address. + # + # Addrinfo.tcp("localhost", 80).afamily #=> 2 (AF_INET) + # Addrinfo.unix("/tmp/sock").afamily #=> 1 (AF_UNIX) + # + # + # call-seq: + # addrinfo.afamily -> integer + # + # Returns the address family of the socket address. + # + # Addrinfo.tcp("localhost", 80).afamily #=> 2 (AF_INET) + # Addrinfo.unix("/tmp/sock").afamily #=> 1 (AF_UNIX) + # def afamily @family end #def bind - - attr_reader :canonname - + #def canonname #def connect #def connect_from #def connect_to @@ -61,6 +134,15 @@ def afamily # Socket.getnameinfo #end + # + # call-seq: + # addrinfo.inspect -> string + # + # Returns a string representation of the Addrinfo object. + # + # Addrinfo.tcp("localhost", 80).inspect #=> "#" + # Addrinfo.unix("/tmp/sock").inspect #=> "#" + # def inspect if ipv4? or ipv6? if @protocol == Socket::IPPROTO_TCP or (@socktype == Socket::SOCK_STREAM and @protocol == 0) @@ -70,12 +152,21 @@ def inspect else proto = '???' end - "#" else - "#" + proto = "SOCK_STREAM" end - end - + "#" + end + + # + # call-seq: + # addrinfo.inspect_sockaddr -> string + # + # Returns a string representation of the socket address portion. + # + # Addrinfo.tcp("localhost", 80).inspect_sockaddr #=> "127.0.0.1:80" + # Addrinfo.unix("/tmp/sock").inspect_sockaddr #=> "/tmp/sock" + # def inspect_sockaddr if ipv4? a, p = ip_unpack @@ -90,23 +181,68 @@ def inspect_sockaddr end end + # + # call-seq: + # addrinfo.ip? -> true or false + # + # Returns true if the address is an IP address (IPv4 or IPv6). + # + # Addrinfo.tcp("localhost", 80).ip? #=> true + # Addrinfo.unix("/tmp/sock").ip? #=> false + # def ip? ipv4? or ipv6? end + # + # call-seq: + # addrinfo.ip_address -> string + # + # Returns the IP address as a string. Raises an exception if not an IP address. + # + # Addrinfo.tcp("localhost", 80).ip_address #=> "127.0.0.1" + # Addrinfo.udp("::1", 53).ip_address #=> "::1" + # def ip_address ip_unpack[0] end + # + # call-seq: + # addrinfo.ip_port -> integer + # + # Returns the port number. Raises an exception if not an IP address. + # + # Addrinfo.tcp("localhost", 80).ip_port #=> 80 + # Addrinfo.udp("127.0.0.1", 53).ip_port #=> 53 + # def ip_port ip_unpack[1] end + # + # call-seq: + # addrinfo.ip_unpack -> [ip_address, port] + # + # Returns an array containing the IP address and port number. + # + # Addrinfo.tcp("localhost", 80).ip_unpack #=> ["127.0.0.1", 80] + # Addrinfo.udp("::1", 53).ip_unpack #=> ["::1", 53] + # def ip_unpack h, p = getnameinfo(Socket::NI_NUMERICHOST|Socket::NI_NUMERICSERV) [ h, p.to_i ] end + # + # call-seq: + # addrinfo.ipv4? -> true or false + # + # Returns true if the address is an IPv4 address. + # + # Addrinfo.tcp("127.0.0.1", 80).ipv4? #=> true + # Addrinfo.tcp("::1", 80).ipv4? #=> false + # def ipv4? @family == Socket::AF_INET end @@ -115,6 +251,15 @@ def ipv4? #def ipv4_multicast? #def ipv4_private? + # + # call-seq: + # addrinfo.ipv6? -> true or false + # + # Returns true if the address is an IPv6 address. + # + # Addrinfo.tcp("::1", 80).ipv6? #=> true + # Addrinfo.tcp("127.0.0.1", 80).ipv6? #=> false + # def ipv6? @family == Socket::AF_INET6 end @@ -132,13 +277,48 @@ def ipv6? #def ipv6_v4mapped? #def listen(backlog=5) + # + # call-seq: + # addrinfo.pfamily -> integer + # + # Returns the protocol family (same as afamily). + # + # Addrinfo.tcp("localhost", 80).pfamily #=> 2 (PF_INET) + # Addrinfo.unix("/tmp/sock").pfamily #=> 1 (PF_UNIX) + # def pfamily @family end + # + # call-seq: + # addrinfo.protocol -> integer + # + # Returns the protocol number. + # + # Addrinfo.tcp("localhost", 80).protocol #=> 6 (IPPROTO_TCP) + # Addrinfo.udp("localhost", 53).protocol #=> 17 (IPPROTO_UDP) + # attr_reader :protocol + + # + # call-seq: + # addrinfo.socktype -> integer + # + # Returns the socket type. + # + # Addrinfo.tcp("localhost", 80).socktype #=> 1 (SOCK_STREAM) + # Addrinfo.udp("localhost", 53).socktype #=> 2 (SOCK_DGRAM) + # attr_reader :socktype + # + # call-seq: + # addrinfo._to_array -> array + # + # Internal method that returns the address information as an array. + # Used internally by socket operations. + # def _to_array case @family when Socket::AF_INET @@ -154,12 +334,30 @@ def _to_array [ s, port.to_i, addr, addr ] end + # + # call-seq: + # addrinfo.to_sockaddr -> string + # + # Returns the socket address as a packed string. + # + # ai = Addrinfo.tcp("localhost", 80) + # ai.to_sockaddr #=> packed sockaddr string + # def to_sockaddr @sockaddr end alias to_s to_sockaddr + # + # call-seq: + # addrinfo.unix? -> true or false + # + # Returns true if the address is a Unix domain socket address. + # + # Addrinfo.unix("/tmp/sock").unix? #=> true + # Addrinfo.tcp("localhost", 80).unix? #=> false + # def unix? @family == Socket::AF_UNIX end @@ -168,30 +366,77 @@ def unix? class BasicSocket < IO @@do_not_reverse_lookup = true + # + # call-seq: + # BasicSocket.do_not_reverse_lookup -> true or false + # + # Returns the current setting for reverse DNS lookups. + # + # BasicSocket.do_not_reverse_lookup #=> false + # def self.do_not_reverse_lookup @@do_not_reverse_lookup end + # + # call-seq: + # BasicSocket.do_not_reverse_lookup = boolean -> boolean + # + # Sets whether to perform reverse DNS lookups. + # + # BasicSocket.do_not_reverse_lookup = true + # def self.do_not_reverse_lookup=(val) @@do_not_reverse_lookup = val ? true : false end + # + # call-seq: + # BasicSocket.new(*args) -> basicsocket + # + # Creates a new BasicSocket object. This is typically called by subclasses. + # def initialize(*args) super(*args) self._is_socket = true @do_not_reverse_lookup = @@do_not_reverse_lookup end + # + # call-seq: + # BasicSocket.for_fd(fd) -> basicsocket + # + # Creates a BasicSocket object from an existing file descriptor. + # + # sock = BasicSocket.for_fd(3) + # def self.for_fd(fd) super(fd, "r+") end #def connect_address + # + # call-seq: + # basicsocket.local_address -> addrinfo + # + # Returns an Addrinfo object for the local address of the socket. + # + # sock.local_address #=> # + # def local_address Addrinfo.new self.getsockname end + # + # call-seq: + # basicsocket.recv_nonblock(maxlen, flags=0) -> string + # + # Receives data from the socket without blocking. May raise an exception + # if no data is available. + # + # data = sock.recv_nonblock(1024) + # def recv_nonblock(maxlen, flags=0) begin _setnonblock(true) @@ -201,6 +446,14 @@ def recv_nonblock(maxlen, flags=0) end end + # + # call-seq: + # basicsocket.remote_address -> addrinfo + # + # Returns an Addrinfo object for the remote address of the socket. + # + # sock.remote_address #=> # + # def remote_address Addrinfo.new self.getpeername end @@ -209,18 +462,52 @@ def remote_address end class IPSocket < BasicSocket + # + # call-seq: + # IPSocket.getaddress(host) -> string + # + # Returns the IP address of the given hostname as a string. + # + # IPSocket.getaddress("localhost") #=> "127.0.0.1" + # IPSocket.getaddress("www.ruby-lang.org") #=> "150.95.145.38" + # def self.getaddress(host) Addrinfo.ip(host).ip_address end + # + # call-seq: + # ipsocket.addr -> [family, port, hostname, ip_address] + # + # Returns the local address information as an array. + # + # sock.addr #=> ["AF_INET", 12345, "localhost", "127.0.0.1"] + # def addr Addrinfo.new(self.getsockname)._to_array end + # + # call-seq: + # ipsocket.peeraddr -> [family, port, hostname, ip_address] + # + # Returns the remote address information as an array. + # + # sock.peeraddr #=> ["AF_INET", 80, "example.com", "93.184.216.34"] + # def peeraddr Addrinfo.new(self.getpeername)._to_array end + # + # call-seq: + # ipsocket.recvfrom(maxlen, flags=0) -> [data, addrinfo] + # + # Receives data and sender information from the IP socket. + # + # data, addr = sock.recvfrom(1024) + # data, addr = sock.recvfrom(512, 0) + # def recvfrom(maxlen, flags=0) msg, sa = _recvfrom(maxlen, flags) [ msg, Addrinfo.new(sa)._to_array ] @@ -228,13 +515,24 @@ def recvfrom(maxlen, flags=0) end class TCPSocket < IPSocket + # + # call-seq: + # TCPSocket.new(host, service, local_host=nil, local_service=nil) -> tcpsocket + # + # Creates a new TCP socket connected to the given host and service. + # Optionally binds to local_host and local_service first. + # + # sock = TCPSocket.new("localhost", 80) + # sock = TCPSocket.new("www.example.com", "http") + # sock = TCPSocket.new("remote", 80, "127.0.0.1", 12345) + # def initialize(host, service, local_host=nil, local_service=nil) if @init_with_fd super(host, service) else s = nil e = SocketError - Addrinfo.foreach(host, service) { |ai| + Addrinfo.foreach(host, service) {|ai| begin s = Socket._socket(ai.afamily, Socket::SOCK_STREAM, 0) if local_host or local_service @@ -253,18 +551,21 @@ def initialize(host, service, local_host=nil, local_service=nil) raise e end end - - def self.new_with_prelude pre, *args - o = self._allocate - o.instance_eval(&pre) - o.initialize(*args) - o - end - #def self.gethostbyname(host) end class TCPServer < TCPSocket + # + # call-seq: + # TCPServer.new(host=nil, service) -> tcpserver + # + # Creates a new TCP server socket bound to the given host and service. + # If host is nil, binds to all available interfaces. + # + # server = TCPServer.new("localhost", 8080) + # server = TCPServer.new(nil, 3000) # binds to all interfaces + # server = TCPServer.new("0.0.0.0", "http") + # def initialize(host=nil, service) ai = Addrinfo.getaddrinfo(host, service, nil, nil, nil, Socket::AI_PASSIVE)[0] @init_with_fd = true @@ -277,16 +578,39 @@ def initialize(host=nil, service) self end + # + # call-seq: + # tcpserver.accept -> tcpsocket + # + # Accepts an incoming connection and returns a new TCPSocket. + # + # server = TCPServer.new(8080) + # client = server.accept + # def accept fd = self.sysaccept begin - TCPSocket.new_with_prelude(proc { @init_with_fd = true }, fd, "r+") - rescue + s = TCPSocket._allocate + s.instance_eval{ + @init_with_fd = true + } + s.__send__(:initialize, fd, "r+") + s + rescue => e IO._sysclose(fd) rescue nil - raise + raise e end end + # + # call-seq: + # tcpserver.accept_nonblock -> unixsocket + # + # Accepts an incoming connection without blocking. May raise an exception + # if no connection is available. + # + # client = server.accept_nonblock + # def accept_nonblock begin self._setnonblock(true) @@ -296,44 +620,104 @@ def accept_nonblock end end + # + # call-seq: + # unixserver.listen(backlog) -> 0 + # + # Sets the socket to listen for incoming connections with the given backlog. + # + # server.listen(5) + # server.listen(128) + # def listen(backlog) Socket._listen(self.fileno, backlog) 0 end + # + # call-seq: + # tcpserver.sysaccept -> integer + # + # Accepts an incoming connection and returns the file descriptor. + # + # fd = server.sysaccept + # def sysaccept Socket._accept(self.fileno) end end class UDPSocket < IPSocket + # + # call-seq: + # UDPSocket.new(af=Socket::AF_INET) -> udpsocket + # + # Creates a new UDP socket for the given address family. + # + # sock = UDPSocket.new + # sock = UDPSocket.new(Socket::AF_INET6) + # def initialize(af=Socket::AF_INET) super(Socket._socket(af, Socket::SOCK_DGRAM, 0), "r+") @af = af self end + # + # call-seq: + # ipsocket.bind(host, port) -> 0 + # + # Binds the socket to the given host and port. + # + # sock.bind("127.0.0.1", 8080) + # sock.bind("0.0.0.0", 3000) + # def bind(host, port) Socket._bind(self.fileno, _sockaddr_in(port, host)) 0 end + # + # call-seq: + # ipsocket.connect(host, port) -> 0 + # + # Connects the socket to the given host and port. + # + # sock.connect("127.0.0.1", 80) + # sock.connect("www.example.com", 443) + # def connect(host, port) Socket._connect(self.fileno, _sockaddr_in(port, host)) 0 end + # + # call-seq: + # udpsocket.recvfrom_nonblock(maxlen, flags=0) -> [data, addrinfo] + # + # Receives data and sender information without blocking. + # May raise an exception if no data is available. + # + # data, addr = sock.recvfrom_nonblock(1024) + # def recvfrom_nonblock(*args) - s = self begin self._setnonblock(true) self.recvfrom(*args) ensure - # XXX: self is a SystemcallException here! (should be bug) - s._setnonblock(false) + self._setnonblock(false) end end + # + # call-seq: + # ipsocket.send(mesg, flags, host=nil, port=nil) -> integer + # + # Sends data through the socket. Returns the number of bytes sent. + # + # sock.send("Hello", 0) + # sock.send("Data", 0, "127.0.0.1", 8080) + # def send(mesg, flags, host=nil, port=nil) if port super(mesg, flags, _sockaddr_in(port, host)) @@ -344,6 +728,13 @@ def send(mesg, flags, host=nil, port=nil) end end + # + # call-seq: + # udpsocket._sockaddr_in(port, host) -> string + # + # Internal method to create a sockaddr_in structure for the given port and host. + # Uses the socket's address family. + # def _sockaddr_in(port, host) ai = Addrinfo.getaddrinfo(host, port, @af, Socket::SOCK_DGRAM)[0] ai.to_sockaddr @@ -351,14 +742,32 @@ def _sockaddr_in(port, host) end class Socket < BasicSocket + # + # call-seq: + # Socket.new(domain, type, protocol=0) -> socket + # + # Creates a new socket with the given domain, type, and protocol. + # + # sock = Socket.new(Socket::AF_INET, Socket::SOCK_STREAM, 0) + # sock = Socket.new(Socket::AF_UNIX, Socket::SOCK_DGRAM) + # def initialize(domain, type, protocol=0) super(Socket._socket(domain, type, protocol), "r+") end #def self.accept_loop + # + # call-seq: + # Socket.getaddrinfo(nodename, servname, family=nil, socktype=nil, protocol=nil, flags=0) -> array + # + # Returns an array of Addrinfo objects for the given nodename and servname. + # + # Addrinfo.getaddrinfo("localhost", "http") + # Addrinfo.getaddrinfo("www.example.com", 80, Socket::AF_INET) + # def self.getaddrinfo(nodename, servname, family=nil, socktype=nil, protocol=nil, flags=0) - Addrinfo.getaddrinfo(nodename, servname, family, socktype, protocol, flags).map { |ai| + Addrinfo.getaddrinfo(nodename, servname, family, socktype, protocol, flags).map {|ai| ary = ai._to_array ary[2] = nodename ary[4] = ai.afamily @@ -369,12 +778,28 @@ def self.getaddrinfo(nodename, servname, family=nil, socktype=nil, protocol=nil, end #def self.getnameinfo - #def self.ip_address_list + # + # call-seq: + # Socket.open(domain, type, protocol=0) -> socket + # + # Creates a new socket. Alias for Socket.new. + # + # sock = Socket.open(Socket::AF_INET, Socket::SOCK_STREAM) + # def self.open(*args) new(args) end + # + # call-seq: + # Socket.sockaddr_in(port, host) -> string + # + # Returns a packed sockaddr_in structure for the given port and host. + # + # Socket.sockaddr_in(80, "127.0.0.1") + # Socket.sockaddr_in(443, "localhost") + # def self.sockaddr_in(port, host) ai = Addrinfo.getaddrinfo(host, port, nil, Socket::SOCK_DGRAM)[0] ai.to_sockaddr @@ -391,10 +816,26 @@ def self.sockaddr_in(port, host) #def self.unix_server_loop #def self.unix_server_socket + # + # call-seq: + # Socket.unpack_sockaddr_in(sockaddr) -> [port, ip_address] + # + # Unpacks a packed sockaddr_in structure and returns port and IP address. + # + # port, addr = Socket.unpack_sockaddr_in(sockaddr) + # def self.unpack_sockaddr_in(sa) Addrinfo.new(sa).ip_unpack.reverse end + # + # call-seq: + # Socket.unpack_sockaddr_un(sockaddr) -> path + # + # Unpacks a packed sockaddr_un structure and returns the Unix socket path. + # + # path = Socket.unpack_sockaddr_un(sockaddr) + # def self.unpack_sockaddr_un(sa) Addrinfo.new(sa).unix_path end @@ -419,18 +860,45 @@ def accept_nonblock end end + # + # call-seq: + # socket.bind(sockaddr) -> 0 + # + # Binds the socket to the given socket address. + # + # sock.bind(Socket.sockaddr_in(8080, "127.0.0.1")) + # sock.bind(addrinfo) + # def bind(sockaddr) sockaddr = sockaddr.to_sockaddr if sockaddr.is_a? Addrinfo Socket._bind(self.fileno, sockaddr) 0 end + # + # call-seq: + # socket.connect(sockaddr) -> 0 + # + # Connects the socket to the given socket address. + # + # sock.connect(Socket.sockaddr_in(80, "127.0.0.1")) + # sock.connect(addrinfo) + # def connect(sockaddr) sockaddr = sockaddr.to_sockaddr if sockaddr.is_a? Addrinfo Socket._connect(self.fileno, sockaddr) 0 end + # + # call-seq: + # socket.connect_nonblock(sockaddr) -> 0 + # + # Connects the socket to the given address without blocking. + # May raise an exception if the connection cannot be completed immediately. + # + # sock.connect_nonblock(sockaddr) + # def connect_nonblock(sockaddr) begin self._setnonblock(true) @@ -468,6 +936,17 @@ def sysaccept end class UNIXSocket < BasicSocket + # + # call-seq: + # UNIXSocket.new(path) -> unixsocket + # UNIXSocket.new(path) { |sock| block } -> obj + # + # Creates a new Unix domain socket connected to the given path. + # If a block is given, yields the socket and closes it when done. + # + # sock = UNIXSocket.new("/tmp/socket") + # UNIXSocket.new("/tmp/socket") { |s| s.write("data") } + # def initialize(path, &block) if self.is_a? UNIXServer super(path, "r") @@ -488,23 +967,56 @@ def initialize(path, &block) end end - def self.socketpair(type=Socket::SOCK_STREAM, protocol=0) - a = Socket.socketpair(Socket::AF_UNIX, type, protocol) - [ UNIXSocket.for_fd(a[0]), UNIXSocket.for_fd(a[1]) ] - end - class << self + # + # call-seq: + # UNIXSocket.socketpair(type=Socket::SOCK_STREAM, protocol=0) -> [socket1, socket2] + # + # Creates a pair of connected Unix domain sockets. + # + # sock1, sock2 = UNIXSocket.socketpair + # sock1, sock2 = UNIXSocket.socketpair(Socket::SOCK_DGRAM) + # + def socketpair(type=Socket::SOCK_STREAM, protocol=0) + a = Socket.socketpair(Socket::AF_UNIX, type, protocol) + [ UNIXSocket.for_fd(a[0]), UNIXSocket.for_fd(a[1]) ] + end + alias pair socketpair end + # + # call-seq: + # unixsocket.addr -> [family, path] + # + # Returns the local address information as an array. + # + # sock.addr #=> ["AF_UNIX", "/tmp/socket"] + # def addr [ "AF_UNIX", path ] end + # + # call-seq: + # unixsocket.path -> string + # + # Returns the path of the Unix domain socket. + # + # sock.path #=> "/tmp/socket" + # def path Addrinfo.new(self.getsockname).unix_path end + # + # call-seq: + # unixsocket.peeraddr -> [family, path] + # + # Returns the remote address information as an array. + # + # sock.peeraddr #=> ["AF_UNIX", "/tmp/peer_socket"] + # def peeraddr [ "AF_UNIX", Addrinfo.new(self.getpeername).unix_path ] end @@ -521,6 +1033,14 @@ def recvfrom(maxlen, flags=0) end class UNIXServer < UNIXSocket + # + # call-seq: + # UNIXServer.new(path) -> unixserver + # + # Creates a new Unix domain server socket bound to the given path. + # + # server = UNIXServer.new("/tmp/server_socket") + # def initialize(path) fd = Socket._socket(Socket::AF_UNIX, Socket::SOCK_STREAM, 0) begin @@ -541,6 +1061,15 @@ def initialize(path) end end + # + # call-seq: + # unixserver.accept -> unixsocket + # + # Accepts an incoming connection and returns a new UNIXSocket. + # + # server = UNIXServer.new("/tmp/server") + # client = server.accept + # def accept fd = self.sysaccept begin @@ -551,6 +1080,15 @@ def accept sock end + # + # call-seq: + # unixserver.accept_nonblock -> unixsocket + # + # Accepts an incoming connection without blocking. May raise an exception + # if no connection is available. + # + # client = server.accept_nonblock + # def accept_nonblock begin self._setnonblock(true) @@ -560,62 +1098,31 @@ def accept_nonblock end end + # + # call-seq: + # unixserver.listen(backlog) -> 0 + # + # Sets the socket to listen for incoming connections with the given backlog. + # + # server.listen(5) + # server.listen(128) + # def listen(backlog) Socket._listen(self.fileno, backlog) 0 end + # + # call-seq: + # unixserver.sysaccept -> integer + # + # Accepts an incoming connection and returns the file descriptor. + # + # fd = server.sysaccept + # def sysaccept Socket._accept(self.fileno) end end -class Socket - include Constants -end - -class Socket - class Option - def initialize(family, level, optname, data) - @family = family - @level = level - @optname = optname - @data = data - end - - def self.bool(family, level, optname, bool) - self.new(family, level, optname, [(bool ? 1 : 0)].pack('i')) - end - - def self.int(family, level, optname, integer) - self.new(family, level, optname, [integer].pack('i')) - end - - #def self.linger(family, level, optname, integer) - #end - - attr_reader :data, :family, :level, :optname - - def bool - @data.unpack('i')[0] != 0 - end - - def inspect - "#" - end - - def int - @data.unpack('i')[0] - end - - def linger - raise NotImplementedError.new - end - - def unpack(template) - raise NotImplementedError.new - end - end -end - class SocketError < StandardError; end diff --git a/mrbgems/mruby-socket/ports/posix/socket_hal.c b/mrbgems/mruby-socket/ports/posix/socket_hal.c new file mode 100644 index 0000000000..a2f6ad6695 --- /dev/null +++ b/mrbgems/mruby-socket/ports/posix/socket_hal.c @@ -0,0 +1,175 @@ +/* +** socket_hal.c - POSIX HAL implementation for mruby-socket +** +** See Copyright Notice in mruby.h +** +** POSIX implementation for socket operations using standard POSIX APIs. +** Supported platforms: Linux, macOS, BSD, Unix +*/ + +#include +#include +#include +#include +#include +#include "socket_hal.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Socket HAL Initialization/Finalization + */ + +void +mrb_hal_socket_init(mrb_state *mrb) +{ + (void)mrb; + /* No initialization needed for POSIX sockets */ +} + +void +mrb_hal_socket_final(mrb_state *mrb) +{ + (void)mrb; + /* No cleanup needed for POSIX sockets */ +} + +/* + * Error Handling + */ + +void +mrb_hal_socket_set_errno_from_last_error(void) +{ + /* POSIX socket calls already set errno on failure; nothing to do. */ +} + +/* + * Socket Control Operations + */ + +int +mrb_hal_socket_set_nonblock(mrb_state *mrb, int fd, int nonblock) +{ + (void)mrb; + + int flags = fcntl(fd, F_GETFL, 0); + if (flags == -1) { + return -1; + } + + if (nonblock) { + flags |= O_NONBLOCK; + } + else { + flags &= ~O_NONBLOCK; + } + + if (fcntl(fd, F_SETFL, flags) == -1) { + return -1; + } + + return 0; +} + +/* + * Address Conversion Functions + */ + +const char* +mrb_hal_socket_inet_ntop(int af, const void *src, char *dst, size_t size) +{ + return inet_ntop(af, src, dst, (socklen_t)size); +} + +int +mrb_hal_socket_inet_pton(int af, const char *src, void *dst) +{ + return inet_pton(af, src, dst); +} + +/* + * Platform-Specific Socket Features + */ + +mrb_value +mrb_hal_socket_sockaddr_un(mrb_state *mrb, const char *path, size_t pathlen) +{ + struct sockaddr_un *sunp; + + if (pathlen > sizeof(sunp->sun_path) - 1) { + mrb_raisef(mrb, mrb_class_get_id(mrb, MRB_SYM(ArgumentError)), + "too long unix socket path (max: %d bytes)", + (int)sizeof(sunp->sun_path) - 1); + } + + mrb_value s = mrb_str_new_capa(mrb, sizeof(struct sockaddr_un)); + sunp = (struct sockaddr_un*)RSTRING_PTR(s); + +#if defined(HAVE_SA_LEN) && HAVE_SA_LEN + sunp->sun_len = sizeof(struct sockaddr_un); +#endif + + sunp->sun_family = AF_UNIX; + memcpy(sunp->sun_path, path, pathlen); + sunp->sun_path[pathlen] = '\0'; + mrb_str_resize(mrb, s, sizeof(struct sockaddr_un)); + + return s; +} + +int +mrb_hal_socket_socketpair(mrb_state *mrb, int domain, int type, int protocol, int sv[2]) +{ + (void)mrb; + return socketpair(domain, type, protocol, sv); +} + +mrb_value +mrb_hal_socket_unix_path(mrb_state *mrb, const char *sockaddr, size_t socklen) +{ + const struct sockaddr *sa = (const struct sockaddr*)sockaddr; + + if (sa->sa_family != AF_UNIX) { + mrb_raise(mrb, mrb_class_get_id(mrb, MRB_SYM(SocketError)), "need AF_UNIX address"); + } + + if (socklen < offsetof(struct sockaddr_un, sun_path) + 1) { + return mrb_str_new(mrb, "", 0); + } + + return mrb_str_new_cstr(mrb, ((const struct sockaddr_un*)sockaddr)->sun_path); +} + +mrb_value +mrb_hal_socket_ip_address_list(mrb_state *mrb) +{ + struct ifaddrs *ifap = NULL; + if (getifaddrs(&ifap) != 0) { + mrb_sys_fail(mrb, "getifaddrs"); + } + mrb_value ary = mrb_ary_new(mrb); + int arena_idx = mrb_gc_arena_save(mrb); + for (struct ifaddrs *ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { + if (ifa->ifa_addr == NULL) continue; + socklen_t salen; + switch (ifa->ifa_addr->sa_family) { + case AF_INET: salen = sizeof(struct sockaddr_in); break; + case AF_INET6: salen = sizeof(struct sockaddr_in6); break; + default: continue; + } + mrb_ary_push(mrb, ary, mrb_str_new(mrb, (const char*)ifa->ifa_addr, salen)); + mrb_gc_arena_restore(mrb, arena_idx); + } + freeifaddrs(ifap); + return ary; +} diff --git a/mrbgems/mruby-socket/ports/win/socket_hal.c b/mrbgems/mruby-socket/ports/win/socket_hal.c new file mode 100644 index 0000000000..de01226668 --- /dev/null +++ b/mrbgems/mruby-socket/ports/win/socket_hal.c @@ -0,0 +1,320 @@ +/* +** socket_hal.c - Windows HAL implementation for mruby-socket +** +** See Copyright Notice in mruby.h +** +** Windows implementation for socket operations using Winsock APIs. +** Supported platforms: Windows, MinGW +*/ + +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x0501 // need Windows XP or later +#endif + +#include +#include +#include +#include +#include +#include "socket_hal.h" +#include +#include +#include +#include +#include +#include + +/* + * Socket HAL Initialization/Finalization + */ + +void +mrb_hal_socket_init(mrb_state *mrb) +{ + WSADATA wsaData; + int result = WSAStartup(MAKEWORD(2, 2), &wsaData); + if (result != NO_ERROR) { + mrb_raise(mrb, mrb_class_get_id(mrb, MRB_SYM(RuntimeError)), "WSAStartup failed"); + } +} + +void +mrb_hal_socket_final(mrb_state *mrb) +{ + (void)mrb; + WSACleanup(); +} + +/* + * Error Handling + */ + +/* Map a Winsock error code to a POSIX errno value. Each case is guarded + * with #ifdef so older MSVC CRTs that lack a particular Exxx still build; + * unknown codes fall back to EIO so mrb_sys_fail still produces a non-zero + * SystemCallError rather than reporting "success." */ +static int +wsa_to_errno(int wsa_err) +{ + switch (wsa_err) { + case 0: return 0; +#ifdef EINTR + case WSAEINTR: return EINTR; +#endif +#ifdef EBADF + case WSAEBADF: return EBADF; +#endif +#ifdef EACCES + case WSAEACCES: return EACCES; +#endif +#ifdef EFAULT + case WSAEFAULT: return EFAULT; +#endif +#ifdef EINVAL + case WSAEINVAL: return EINVAL; +#endif +#ifdef EMFILE + case WSAEMFILE: return EMFILE; +#endif +#ifdef EWOULDBLOCK + case WSAEWOULDBLOCK: return EWOULDBLOCK; +#endif +#ifdef EINPROGRESS + case WSAEINPROGRESS: return EINPROGRESS; +#endif +#ifdef EALREADY + case WSAEALREADY: return EALREADY; +#endif +#ifdef ENOTSOCK + case WSAENOTSOCK: return ENOTSOCK; +#endif +#ifdef EDESTADDRREQ + case WSAEDESTADDRREQ: return EDESTADDRREQ; +#endif +#ifdef EMSGSIZE + case WSAEMSGSIZE: return EMSGSIZE; +#endif +#ifdef EPROTOTYPE + case WSAEPROTOTYPE: return EPROTOTYPE; +#endif +#ifdef ENOPROTOOPT + case WSAENOPROTOOPT: return ENOPROTOOPT; +#endif +#ifdef EPROTONOSUPPORT + case WSAEPROTONOSUPPORT: return EPROTONOSUPPORT; +#endif +#ifdef EOPNOTSUPP + case WSAEOPNOTSUPP: return EOPNOTSUPP; +#endif +#ifdef EAFNOSUPPORT + case WSAEAFNOSUPPORT: return EAFNOSUPPORT; + case WSAEPFNOSUPPORT: return EAFNOSUPPORT; + case WSAESOCKTNOSUPPORT: return EAFNOSUPPORT; +#endif +#ifdef EADDRINUSE + case WSAEADDRINUSE: return EADDRINUSE; +#endif +#ifdef EADDRNOTAVAIL + case WSAEADDRNOTAVAIL: return EADDRNOTAVAIL; +#endif +#ifdef ENETDOWN + case WSAENETDOWN: return ENETDOWN; +#endif +#ifdef ENETUNREACH + case WSAENETUNREACH: return ENETUNREACH; +#endif +#ifdef ENETRESET + case WSAENETRESET: return ENETRESET; +#endif +#ifdef ECONNABORTED + case WSAECONNABORTED: return ECONNABORTED; +#endif +#ifdef ECONNRESET + case WSAECONNRESET: return ECONNRESET; +#endif +#ifdef ENOBUFS + case WSAENOBUFS: return ENOBUFS; +#endif +#ifdef EISCONN + case WSAEISCONN: return EISCONN; +#endif +#ifdef ENOTCONN + case WSAENOTCONN: return ENOTCONN; +#endif +#ifdef ETIMEDOUT + case WSAETIMEDOUT: return ETIMEDOUT; +#endif +#ifdef ECONNREFUSED + case WSAECONNREFUSED: return ECONNREFUSED; +#endif +#ifdef EHOSTUNREACH + case WSAEHOSTUNREACH: return EHOSTUNREACH; +#endif +#ifdef ENAMETOOLONG + case WSAENAMETOOLONG: return ENAMETOOLONG; +#endif + default: return EIO; + } +} + +void +mrb_hal_socket_set_errno_from_last_error(void) +{ + errno = wsa_to_errno(WSAGetLastError()); +} + +/* + * Socket Control Operations + */ + +int +mrb_hal_socket_set_nonblock(mrb_state *mrb, int fd, int nonblock) +{ + (void)mrb; + u_long mode = nonblock ? 1 : 0; + int result = ioctlsocket(fd, FIONBIO, &mode); + if (result != NO_ERROR) { + mrb_hal_socket_set_errno_from_last_error(); + return -1; + } + return 0; +} + +/* + * Address Conversion Functions + */ + +const char* +mrb_hal_socket_inet_ntop(int af, const void *src, char *dst, size_t size) +{ + if (af == AF_INET) { + struct sockaddr_in in = {0}; + in.sin_family = AF_INET; + memcpy(&in.sin_addr, src, sizeof(struct in_addr)); + if (getnameinfo((struct sockaddr*)&in, sizeof(struct sockaddr_in), + dst, (DWORD)size, NULL, 0, NI_NUMERICHOST) == 0) { + return dst; + } + return NULL; + } + else if (af == AF_INET6) { + struct sockaddr_in6 in = {0}; + in.sin6_family = AF_INET6; + memcpy(&in.sin6_addr, src, sizeof(struct in6_addr)); + if (getnameinfo((struct sockaddr*)&in, sizeof(struct sockaddr_in6), + dst, (DWORD)size, NULL, 0, NI_NUMERICHOST) == 0) { + return dst; + } + return NULL; + } + return NULL; +} + +int +mrb_hal_socket_inet_pton(int af, const char *src, void *dst) +{ + struct addrinfo hints = {0}; + hints.ai_family = af; + hints.ai_flags = AI_NUMERICHOST; + + struct addrinfo *res; + if (getaddrinfo(src, NULL, &hints, &res) != 0) { + return 0; /* Invalid address */ + } + + if (res == NULL) { + return 0; + } + + if (af == AF_INET && res->ai_family == AF_INET) { + memcpy(dst, &((struct sockaddr_in*)res->ai_addr)->sin_addr, sizeof(struct in_addr)); + freeaddrinfo(res); + return 1; + } + else if (af == AF_INET6 && res->ai_family == AF_INET6) { + memcpy(dst, &((struct sockaddr_in6*)res->ai_addr)->sin6_addr, sizeof(struct in6_addr)); + freeaddrinfo(res); + return 1; + } + + freeaddrinfo(res); + return 0; +} + +/* + * Platform-Specific Socket Features + */ + +mrb_value +mrb_hal_socket_sockaddr_un(mrb_state *mrb, const char *path, size_t pathlen) +{ + (void)path; + (void)pathlen; + mrb_raise(mrb, mrb_class_get_id(mrb, MRB_SYM(NotImplementedError)), + "sockaddr_un unsupported on Windows"); + return mrb_nil_value(); +} + +int +mrb_hal_socket_socketpair(mrb_state *mrb, int domain, int type, int protocol, int sv[2]) +{ + (void)mrb; + (void)domain; + (void)type; + (void)protocol; + (void)sv; + /* socketpair is not supported on Windows */ + errno = ENOSYS; + return -1; +} + +mrb_value +mrb_hal_socket_unix_path(mrb_state *mrb, const char *sockaddr, size_t socklen) +{ + (void)sockaddr; + (void)socklen; + mrb_raise(mrb, mrb_class_get_id(mrb, MRB_SYM(NotImplementedError)), + "unix_path unsupported on Windows"); + return mrb_nil_value(); +} + +mrb_value +mrb_hal_socket_ip_address_list(mrb_state *mrb) +{ + /* MSDN recommends 15 KiB as the initial buffer size to handle most + adapter configurations in a single call. */ + ULONG buflen = 15000; + IP_ADAPTER_ADDRESSES *adapters = (IP_ADAPTER_ADDRESSES*)mrb_malloc(mrb, buflen); + ULONG ret = ERROR_BUFFER_OVERFLOW; + for (int retries = 0; retries < 3 && ret == ERROR_BUFFER_OVERFLOW; retries++) { + ret = GetAdaptersAddresses(AF_UNSPEC, + GAA_FLAG_SKIP_ANYCAST | GAA_FLAG_SKIP_MULTICAST | GAA_FLAG_SKIP_DNS_SERVER, + NULL, adapters, &buflen); + if (ret == ERROR_BUFFER_OVERFLOW) { + adapters = (IP_ADAPTER_ADDRESSES*)mrb_realloc(mrb, adapters, buflen); + } + } + if (ret != ERROR_SUCCESS) { + mrb_free(mrb, adapters); + mrb_raisef(mrb, mrb_class_get_id(mrb, MRB_SYM(SocketError)), + "GetAdaptersAddresses failed (Win32 error %u)", (unsigned int)ret); + } + + mrb_value ary = mrb_ary_new(mrb); + int arena_idx = mrb_gc_arena_save(mrb); + for (IP_ADAPTER_ADDRESSES *ad = adapters; ad != NULL; ad = ad->Next) { + for (IP_ADAPTER_UNICAST_ADDRESS *ua = ad->FirstUnicastAddress; ua != NULL; ua = ua->Next) { + SOCKADDR *sa = ua->Address.lpSockaddr; + int salen; + switch (sa->sa_family) { + case AF_INET: salen = sizeof(SOCKADDR_IN); break; + case AF_INET6: salen = sizeof(SOCKADDR_IN6); break; + default: continue; + } + mrb_ary_push(mrb, ary, mrb_str_new(mrb, (const char*)sa, salen)); + mrb_gc_arena_restore(mrb, arena_idx); + } + } + mrb_free(mrb, adapters); + return ary; +} diff --git a/mrbgems/mruby-socket/src/const.cstub b/mrbgems/mruby-socket/src/const.cstub index f176513b98..592fe517be 100644 --- a/mrbgems/mruby-socket/src/const.cstub +++ b/mrbgems/mruby-socket/src/const.cstub @@ -385,6 +385,21 @@ #if defined(SO_TYPE) define_const(SO_TYPE); #endif +#if defined(SO_INCOMING_CPU) + define_const(SO_INCOMING_CPU); +#endif +#if defined(SO_INCOMING_NAPI_ID) + define_const(SO_INCOMING_NAPI_ID); +#endif +#if defined(SO_RTABLE) + define_const(SO_RTABLE); +#endif +#if defined(SO_SETFIB) + define_const(SO_SETFIB); +#endif +#if defined(SO_USER_COOKIE) + define_const(SO_USER_COOKIE); +#endif #if defined(SOCK_DGRAM) define_const(SOCK_DGRAM); #endif @@ -457,3 +472,9 @@ #if defined(TCP_WINDOW_CLAMP) define_const(TCP_WINDOW_CLAMP); #endif +#if defined(TCP_KEEPALIVE) + define_const(TCP_KEEPALIVE); +#endif +#if defined(TCP_CONNECTION_INFO) + define_const(TCP_CONNECTION_INFO); +#endif diff --git a/mrbgems/mruby-socket/src/const.def b/mrbgems/mruby-socket/src/const.def index b54bb84195..4275ed998b 100644 --- a/mrbgems/mruby-socket/src/const.def +++ b/mrbgems/mruby-socket/src/const.def @@ -136,6 +136,11 @@ SO_SNDTIMEO SO_SPLICE SO_TIMESTAMP SO_TYPE +SO_INCOMING_CPU +SO_INCOMING_NAPI_ID +SO_RTABLE +SO_SETFIB +SO_USER_COOKIE SOCK_DGRAM SOCK_RAW @@ -163,3 +168,5 @@ TCP_QUICKACK TCP_SACK_ENABLE TCP_SYNCNT TCP_WINDOW_CLAMP +TCP_KEEPALIVE +TCP_CONNECTION_INFO diff --git a/mrbgems/mruby-socket/src/socket.c b/mrbgems/mruby-socket/src/socket.c index a9ce33d2e9..ffb4c63141 100644 --- a/mrbgems/mruby-socket/src/socket.c +++ b/mrbgems/mruby-socket/src/socket.c @@ -6,12 +6,9 @@ #ifdef _WIN32 #define _WIN32_WINNT 0x0501 - #include #include #include - #include - #define SHUT_RDWR SD_BOTH typedef int fsize_t; #else @@ -30,18 +27,102 @@ #include -#include "mruby.h" -#include "mruby/array.h" -#include "mruby/class.h" -#include "mruby/data.h" -#include "mruby/numeric.h" -#include "mruby/string.h" -#include "mruby/variable.h" -#include "mruby/error.h" -#include "mruby/internal.h" -#include "mruby/presym.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "socket_hal.h" + +/* Address family information for compact lookup table */ +typedef struct { + int family; /* AF_INET, AF_INET6, etc. */ + const char *name; /* "AF_INET", "AF_INET6", etc. */ + int port_offset; /* Offset to port field in sockaddr structure */ + mrb_bool has_port; /* TRUE if this family has a port field */ +} af_info_t; + +/* Protocol family lookup table for socket option inspection */ +typedef struct { + int family; /* PF_INET, PF_INET6, etc. */ + const char *name; /* "INET", "INET6", etc. */ +} pf_info_t; + +/* Compact address family lookup table (memory-efficient) */ +static const af_info_t af_table[] = { + /* Internet Protocol families with port numbers */ + {AF_INET, "AF_INET", offsetof(struct sockaddr_in, sin_port), TRUE}, + {AF_INET6, "AF_INET6", offsetof(struct sockaddr_in6, sin6_port), TRUE}, + + /* Local/Unix domain sockets without port numbers */ +#ifdef AF_UNIX + {AF_UNIX, "AF_UNIX", -1, FALSE}, +#endif +#ifdef AF_LOCAL + {AF_LOCAL, "AF_LOCAL", -1, FALSE}, +#endif + + /* Additional protocol families (platform-dependent) */ +#ifdef AF_LINK + {AF_LINK, "AF_LINK", -1, FALSE}, +#endif +#ifdef AF_ROUTE + {AF_ROUTE, "AF_ROUTE", -1, FALSE}, +#endif +#ifdef AF_UNSPEC + {AF_UNSPEC, "AF_UNSPEC", -1, FALSE}, +#endif +}; + +#define AF_TABLE_SIZE (sizeof(af_table) / sizeof(af_table[0])) + +/* Get address family info for given family constant (compact linear search) */ +static inline const af_info_t *get_af_info(int family) { + for (size_t i = 0; i < AF_TABLE_SIZE; i++) { + if (af_table[i].family == family) { + return &af_table[i]; + } + } + return NULL; +} + +/* Compact protocol family lookup table (memory-efficient) */ +static const pf_info_t pf_table[] = { + {PF_INET, "INET"}, +#ifdef PF_INET6 + {PF_INET6, "INET6"}, +#endif +#ifdef PF_IPX + {PF_IPX, "IPX"}, +#endif +#ifdef PF_AX25 + {PF_AX25, "AX25"}, +#endif +#ifdef PF_APPLETALK + {PF_APPLETALK, "APPLETALK"}, +#endif +#ifdef PF_UNIX + {PF_UNIX, "UNIX"}, +#endif +}; -#include "mruby/ext/io.h" +#define PF_TABLE_SIZE (sizeof(pf_table) / sizeof(pf_table[0])) + +/* Get protocol family name for given family constant (compact linear search) */ +static inline const char *get_pf_name(int family) { + for (size_t i = 0; i < PF_TABLE_SIZE; i++) { + if (pf_table[i].family == family) { + return pf_table[i].name; + } + } + return NULL; +} #if !defined(HAVE_SA_LEN) #if (defined(BSD) && (BSD >= 199006)) @@ -53,90 +134,78 @@ #define E_SOCKET_ERROR mrb_class_get_id(mrb, MRB_SYM(SocketError)) -#ifdef _WIN32 -static const char *inet_ntop(int af, const void *src, char *dst, socklen_t cnt) +/* Raise a SystemCallError for the most recent socket-API failure. + * On Windows the HAL translates WSAGetLastError() into errno first; + * on POSIX errno is already set, so this is just mrb_sys_fail. */ +static mrb_noreturn void +sock_sys_fail(mrb_state *mrb, const char *mesg) { - if (af == AF_INET) - { - struct sockaddr_in in = {0}; + mrb_hal_socket_set_errno_from_last_error(); + mrb_sys_fail(mrb, mesg); +} - in.sin_family = AF_INET; - memcpy(&in.sin_addr, src, sizeof(struct in_addr)); - getnameinfo((struct sockaddr *)&in, sizeof(struct - sockaddr_in), dst, cnt, NULL, 0, NI_NUMERICHOST); - return dst; - } - else if (af == AF_INET6) - { - struct sockaddr_in6 in = {0}; +struct gen_addrinfo_args { + struct RClass *klass; + struct addrinfo *addrinfo; +}; - in.sin6_family = AF_INET6; - memcpy(&in.sin6_addr, src, sizeof(struct in_addr6)); - getnameinfo((struct sockaddr *)&in, sizeof(struct - sockaddr_in6), dst, cnt, NULL, 0, NI_NUMERICHOST); - return dst; +/* Helper to generate array of Addrinfo objects from addrinfo linked list */ +static mrb_value +gen_addrinfo(mrb_state *mrb, mrb_value args) +{ + mrb_value ary = mrb_ary_new(mrb); + int arena_idx = mrb_gc_arena_save(mrb); /* ary must be on arena! */ + struct gen_addrinfo_args *a = (struct gen_addrinfo_args*)mrb_cptr(args); + + for (struct addrinfo *res = a->addrinfo; res != NULL; res = res->ai_next) { + mrb_value sa = mrb_str_new(mrb, (char*)res->ai_addr, res->ai_addrlen); + mrb_value args[4] = {sa, mrb_fixnum_value(res->ai_family), mrb_fixnum_value(res->ai_socktype), mrb_fixnum_value(res->ai_protocol)}; + mrb_value ai = mrb_obj_new(mrb, a->klass, 4, args); + mrb_ary_push(mrb, ary, ai); + mrb_gc_arena_restore(mrb, arena_idx); } - return NULL; + return ary; } -static int inet_pton(int af, const char *src, void *dst) +/* Helper to free addrinfo structure - used with mrb_ensure */ +static mrb_value +free_addrinfo(mrb_state *mrb, mrb_value addrinfo) { - struct addrinfo hints = {0}; - struct addrinfo *res, *ressave; - - hints.ai_family = af; - - if (getaddrinfo(src, NULL, &hints, &res) != 0) - { - printf("Couldn't resolve host %s\n", src); - return -1; - } - - ressave = res; - - while (res) - { - memcpy(dst, res->ai_addr, res->ai_addrlen); - res = res->ai_next; - } - - freeaddrinfo(ressave); - return 0; + freeaddrinfo((struct addrinfo*)mrb_cptr(addrinfo)); + return mrb_nil_value(); } -#endif - +/* + * call-seq: + * Addrinfo.getaddrinfo(nodename, servname, family=nil, socktype=nil, protocol=nil, flags=0) -> array + * + * Returns an array of Addrinfo objects for the given nodename and servname. + * + * Addrinfo.getaddrinfo("localhost", "http") + * Addrinfo.getaddrinfo("www.example.com", 80, Socket::AF_INET) + */ static mrb_value mrb_addrinfo_getaddrinfo(mrb_state *mrb, mrb_value klass) { - struct addrinfo hints = {0}, *res0, *res; - mrb_value ai, ary, family, lastai, nodename, protocol, sa, service, socktype; - mrb_int flags; - int arena_idx, error; - const char *hostname = NULL, *servname = NULL; - - ary = mrb_ary_new(mrb); - arena_idx = mrb_gc_arena_save(mrb); /* ary must be on arena! */ + struct addrinfo hints = {0}, *addr; + mrb_value family, protocol, service, socktype; + mrb_int flags = 0; + const char *hostname; family = socktype = protocol = mrb_nil_value(); - flags = 0; - mrb_get_args(mrb, "oo|oooi", &nodename, &service, &family, &socktype, &protocol, &flags); - - if (mrb_string_p(nodename)) { - hostname = RSTRING_CSTR(mrb, nodename); - } else if (mrb_nil_p(nodename)) { - hostname = NULL; - } else { - mrb_raise(mrb, E_TYPE_ERROR, "nodename must be String or nil"); - } + mrb_get_args(mrb, "z!o|oooi", &hostname, &service, &family, &socktype, &protocol, &flags); + const char *servname = NULL; if (mrb_string_p(service)) { servname = RSTRING_CSTR(mrb, service); - } else if (mrb_integer_p(service)) { + } + else if (mrb_integer_p(service)) { servname = RSTRING_PTR(mrb_integer_to_str(mrb, service, 10)); - } else if (mrb_nil_p(service)) { + } + else if (mrb_nil_p(service)) { servname = NULL; - } else { + } + else { mrb_raise(mrb, E_TYPE_ERROR, "service must be String, Integer, or nil"); } @@ -154,52 +223,42 @@ mrb_addrinfo_getaddrinfo(mrb_state *mrb, mrb_value klass) hints.ai_protocol = (int)mrb_integer(protocol); } - lastai = mrb_cv_get(mrb, klass, MRB_SYM(_lastai)); - if (mrb_cptr_p(lastai)) { - freeaddrinfo((struct addrinfo*)mrb_cptr(lastai)); - mrb_cv_set(mrb, klass, MRB_SYM(_lastai), mrb_nil_value()); - } - - error = getaddrinfo(hostname, servname, &hints, &res0); + int error = getaddrinfo(hostname, servname, &hints, &addr); if (error) { mrb_raisef(mrb, E_SOCKET_ERROR, "getaddrinfo: %s", gai_strerror(error)); } - mrb_cv_set(mrb, klass, MRB_SYM(_lastai), mrb_cptr_value(mrb, res0)); - - for (res = res0; res != NULL; res = res->ai_next) { - sa = mrb_str_new(mrb, (char*)res->ai_addr, res->ai_addrlen); - ai = mrb_funcall_id(mrb, klass, MRB_SYM(new), 4, sa, mrb_fixnum_value(res->ai_family), mrb_fixnum_value(res->ai_socktype), mrb_fixnum_value(res->ai_protocol)); - mrb_ary_push(mrb, ary, ai); - mrb_gc_arena_restore(mrb, arena_idx); - } - - freeaddrinfo(res0); - mrb_cv_set(mrb, klass, MRB_SYM(_lastai), mrb_nil_value()); - return ary; + struct gen_addrinfo_args args = {mrb_class_ptr(klass), addr}; + return mrb_ensure(mrb, gen_addrinfo, mrb_cptr_value(mrb, &args), free_addrinfo, mrb_cptr_value(mrb, addr)); } +/* + * call-seq: + * addrinfo.getnameinfo(flags=0) -> [hostname, service] + * + * Returns the hostname and service name for the address. + * + * addr.getnameinfo #=> ["localhost", "http"] + * addr.getnameinfo(Socket::NI_NUMERICHOST) #=> ["127.0.0.1", "80"] + */ static mrb_value mrb_addrinfo_getnameinfo(mrb_state *mrb, mrb_value self) { - mrb_int flags; - mrb_value ary, host, sastr, serv; - int error; + mrb_int flags = 0; - flags = 0; mrb_get_args(mrb, "|i", &flags); - host = mrb_str_new_capa(mrb, NI_MAXHOST); - serv = mrb_str_new_capa(mrb, NI_MAXSERV); - sastr = mrb_iv_get(mrb, self, MRB_IVSYM(sockaddr)); + mrb_value host = mrb_str_new_capa(mrb, NI_MAXHOST); + mrb_value serv = mrb_str_new_capa(mrb, NI_MAXSERV); + mrb_value sastr = mrb_iv_get(mrb, self, MRB_IVSYM(sockaddr)); if (!mrb_string_p(sastr)) { mrb_raise(mrb, E_SOCKET_ERROR, "invalid sockaddr"); } - error = getnameinfo((struct sockaddr *)RSTRING_PTR(sastr), (socklen_t)RSTRING_LEN(sastr), RSTRING_PTR(host), NI_MAXHOST, RSTRING_PTR(serv), NI_MAXSERV, (int)flags); + int error = getnameinfo((struct sockaddr*)RSTRING_PTR(sastr), (socklen_t)RSTRING_LEN(sastr), RSTRING_PTR(host), NI_MAXHOST, RSTRING_PTR(serv), NI_MAXSERV, (int)flags); if (error) { mrb_raisef(mrb, E_SOCKET_ERROR, "getnameinfo: %s", gai_strerror(error)); } - ary = mrb_ary_new_capa(mrb, 2); + mrb_value ary = mrb_ary_new_capa(mrb, 2); mrb_str_resize(mrb, host, strlen(RSTRING_PTR(host))); mrb_ary_push(mrb, ary, host); mrb_str_resize(mrb, serv, strlen(RSTRING_PTR(serv))); @@ -207,50 +266,50 @@ mrb_addrinfo_getnameinfo(mrb_state *mrb, mrb_value self) return ary; } -#ifndef _WIN32 +/* + * call-seq: + * addrinfo.unix_path -> string + * + * Returns the Unix domain socket path. + * + * addr.unix_path #=> "/tmp/socket" + */ static mrb_value mrb_addrinfo_unix_path(mrb_state *mrb, mrb_value self) { - mrb_value sastr; + mrb_value sastr = mrb_iv_get(mrb, self, MRB_IVSYM(sockaddr)); - sastr = mrb_iv_get(mrb, self, MRB_IVSYM(sockaddr)); - if (((struct sockaddr *)RSTRING_PTR(sastr))->sa_family != AF_UNIX) - mrb_raise(mrb, E_SOCKET_ERROR, "need AF_UNIX address"); - if (RSTRING_LEN(sastr) < (mrb_int)offsetof(struct sockaddr_un, sun_path) + 1) { - return mrb_str_new(mrb, "", 0); - } else { - return mrb_str_new_cstr(mrb, ((struct sockaddr_un *)RSTRING_PTR(sastr))->sun_path); + if (!mrb_string_p(sastr)) { + mrb_raise(mrb, E_SOCKET_ERROR, "invalid sockaddr"); } + + return mrb_hal_socket_unix_path(mrb, RSTRING_PTR(sastr), (size_t)RSTRING_LEN(sastr)); } -#endif +/* Helper to convert sockaddr to address list array [family, port, host, host] */ static mrb_value sa2addrlist(mrb_state *mrb, const struct sockaddr *sa, socklen_t salen) { - mrb_value ary, host; - unsigned short port; - const char *afstr; - - switch (sa->sa_family) { - case AF_INET: - afstr = "AF_INET"; - port = ((struct sockaddr_in *)sa)->sin_port; - break; - case AF_INET6: - afstr = "AF_INET6"; - port = ((struct sockaddr_in6 *)sa)->sin6_port; - break; - default: + /* Use lookup table for O(1) address family dispatch */ + const af_info_t *af_info = get_af_info(sa->sa_family); + if (!af_info) { mrb_raise(mrb, E_ARGUMENT_ERROR, "bad af"); return mrb_nil_value(); } + + /* Extract port using table-driven offset calculation */ + unsigned short port = 0; + if (af_info->has_port) { + port = *(unsigned short*)((char*)sa + af_info->port_offset); + } port = ntohs(port); - host = mrb_str_new_capa(mrb, NI_MAXHOST); + mrb_value host = mrb_str_new_capa(mrb, NI_MAXHOST); if (getnameinfo(sa, salen, RSTRING_PTR(host), NI_MAXHOST, NULL, 0, NI_NUMERICHOST) == -1) - mrb_sys_fail(mrb, "getnameinfo"); + sock_sys_fail(mrb, "getnameinfo"); mrb_str_resize(mrb, host, strlen(RSTRING_PTR(host))); - ary = mrb_ary_new_capa(mrb, 4); - mrb_ary_push(mrb, ary, mrb_str_new_cstr(mrb, afstr)); + + mrb_value ary = mrb_ary_new_capa(mrb, 4); + mrb_ary_push(mrb, ary, mrb_str_new_cstr(mrb, af_info->name)); mrb_ary_push(mrb, ary, mrb_fixnum_value(port)); mrb_ary_push(mrb, ary, host); mrb_ary_push(mrb, ary, host); @@ -259,38 +318,45 @@ sa2addrlist(mrb_state *mrb, const struct sockaddr *sa, socklen_t salen) int mrb_io_fileno(mrb_state *mrb, mrb_value io); +/* Helper to extract file descriptor from socket object */ static int socket_fd(mrb_state *mrb, mrb_value sock) { return mrb_io_fileno(mrb, sock); } +/* Helper to get address family of socket by file descriptor */ static int socket_family(int s) { struct sockaddr_storage ss; - socklen_t salen; + socklen_t salen = sizeof(ss); - salen = sizeof(ss); - if (getsockname(s, (struct sockaddr *)&ss, &salen) == -1) + if (getsockname(s, (struct sockaddr*)&ss, &salen) == -1) return AF_UNSPEC; return ss.ss_family; } +/* + * call-seq: + * basicsocket.getpeereid -> [euid, egid] + * + * Returns the effective user ID and group ID of the peer process. + * Only available on systems that support getpeereid(). + * + * euid, egid = sock.getpeereid + */ static mrb_value mrb_basicsocket_getpeereid(mrb_state *mrb, mrb_value self) { #ifdef HAVE_GETPEEREID - mrb_value ary; gid_t egid; uid_t euid; - int s; - - s = socket_fd(mrb, self); + int s = socket_fd(mrb, self); if (getpeereid(s, &euid, &egid) != 0) - mrb_sys_fail(mrb, "getpeereid"); + sock_sys_fail(mrb, "getpeereid"); - ary = mrb_ary_new_capa(mrb, 2); + mrb_value ary = mrb_ary_new_capa(mrb, 2); mrb_ary_push(mrb, ary, mrb_fixnum_value((mrb_int)euid)); mrb_ary_push(mrb, ary, mrb_fixnum_value((mrb_int)egid)); return ary; @@ -300,185 +366,465 @@ mrb_basicsocket_getpeereid(mrb_state *mrb, mrb_value self) #endif } +/* + * call-seq: + * basicsocket.getpeername -> string + * + * Returns the remote socket address as a packed sockaddr string. + * + * sockaddr = sock.getpeername + */ static mrb_value mrb_basicsocket_getpeername(mrb_state *mrb, mrb_value self) { struct sockaddr_storage ss; - socklen_t salen; + socklen_t salen = sizeof(ss); - salen = sizeof(ss); - if (getpeername(socket_fd(mrb, self), (struct sockaddr *)&ss, &salen) != 0) - mrb_sys_fail(mrb, "getpeername"); + if (getpeername(socket_fd(mrb, self), (struct sockaddr*)&ss, &salen) != 0) + sock_sys_fail(mrb, "getpeername"); return mrb_str_new(mrb, (char*)&ss, salen); } +/* + * call-seq: + * basicsocket.getsockname -> string + * + * Returns the local socket address as a packed sockaddr string. + * + * sockaddr = sock.getsockname + */ static mrb_value mrb_basicsocket_getsockname(mrb_state *mrb, mrb_value self) { struct sockaddr_storage ss; - socklen_t salen; + socklen_t salen = sizeof(ss); - salen = sizeof(ss); - if (getsockname(socket_fd(mrb, self), (struct sockaddr *)&ss, &salen) != 0) - mrb_sys_fail(mrb, "getsockname"); + if (getsockname(socket_fd(mrb, self), (struct sockaddr*)&ss, &salen) != 0) + sock_sys_fail(mrb, "getsockname"); return mrb_str_new(mrb, (char*)&ss, salen); } +/* Helper to get Socket::Option class reference */ +static struct RClass * +socket_option_class(mrb_state *mrb) +{ + return mrb_class_get_under_id(mrb, mrb_class_get_id(mrb, MRB_SYM(Socket)), MRB_SYM(Option)); +} + +/* + * call-seq: + * Socket::Option.new(family, level, optname, data) -> socket_option + * + * Creates a new Socket::Option object with the given parameters. + * + * opt = Socket::Option.new(Socket::AF_INET, Socket::SOL_SOCKET, Socket::SO_REUSEADDR, [1].pack("i")) + */ static mrb_value -mrb_basicsocket_getsockopt(mrb_state *mrb, mrb_value self) +socket_option_init(mrb_state *mrb, mrb_value self) { - char opt[8]; - int s; mrb_int family, level, optname; - mrb_value c, data; - socklen_t optlen; + mrb_value data; + + mrb_get_args(mrb, "iiio", &family, &level, &optname, &data); + mrb_iv_set(mrb, self, MRB_SYM(family), mrb_int_value(mrb, family)); + mrb_iv_set(mrb, self, MRB_SYM(level), mrb_int_value(mrb, level)); + mrb_iv_set(mrb, self, MRB_SYM(optname), mrb_int_value(mrb, optname)); + mrb_iv_set(mrb, self, MRB_SYM(data), data); + + return self; +} + +/* + * call-seq: + * Socket::Option.bool(family, level, optname, bool) -> socket_option + * + * Creates a new Socket::Option object from a boolean value. + * + * opt = Socket::Option.bool(Socket::AF_INET, Socket::SOL_SOCKET, Socket::SO_REUSEADDR, true) + */ +static mrb_value +socket_option_s_bool(mrb_state *mrb, mrb_value klass) +{ + mrb_value args[4]; + mrb_bool data; + + mrb_get_args(mrb, "ooob", &args[0], &args[1], &args[2], &data); + + int tmp = (int)data; + args[3] = mrb_str_new(mrb, (char*)&tmp, sizeof(int)); + return mrb_obj_new(mrb, mrb_class_ptr(klass), 4, args); +} + +/* + * call-seq: + * Socket::Option.int(family, level, optname, integer) -> socket_option + * + * Creates a new Socket::Option object from an integer value. + * + * opt = Socket::Option.int(Socket::AF_INET, Socket::SOL_SOCKET, Socket::SO_KEEPALIVE, 1) + */ +static mrb_value +socket_option_s_int(mrb_state *mrb, mrb_value klass) +{ + mrb_value args[4]; + mrb_int data; + + mrb_get_args(mrb, "oooi", &args[0], &args[1], &args[2], &data); + + int tmp = (int)data; + args[3] = mrb_str_new(mrb, (char*)&tmp, sizeof(int)); + return mrb_obj_new(mrb, mrb_class_ptr(klass), 4, args); +} + +/* + * call-seq: + * socket_option.family -> integer + * + * Returns the address family of the socket option. + * + * opt.family #=> Socket::AF_INET + */ +static mrb_value +socket_option_family(mrb_state *mrb, mrb_value self) +{ + return mrb_iv_get(mrb, self, MRB_SYM(family)); +} + +/* + * call-seq: + * socket_option.level -> integer + * + * Returns the protocol level of the socket option. + * + * opt.level #=> Socket::SOL_SOCKET + */ +static mrb_value +socket_option_level(mrb_state *mrb, mrb_value self) +{ + return mrb_iv_get(mrb, self, MRB_SYM(level)); +} + +/* + * call-seq: + * socket_option.optname -> integer + * + * Returns the option name of the socket option. + * + * opt.optname #=> Socket::SO_REUSEADDR + */ +static mrb_value +socket_option_optname(mrb_state *mrb, mrb_value self) +{ + return mrb_iv_get(mrb, self, MRB_SYM(optname)); +} + +/* + * call-seq: + * socket_option.data -> string + * + * Returns the raw data of the socket option as a string. + * + * opt.data #=> "\x01\x00\x00\x00" + */ +static mrb_value +socket_option_data(mrb_state *mrb, mrb_value self) +{ + return mrb_iv_get(mrb, self, MRB_SYM(data)); +} + +/* Helper to extract integer value from Socket::Option data */ +static int +option_int(mrb_state *mrb, mrb_value self) +{ + mrb_value data = mrb_obj_as_string(mrb, mrb_iv_get(mrb, self, MRB_SYM(data))); + + if (RSTRING_LEN(data) != sizeof(int)) { + mrb_raisef(mrb, E_TYPE_ERROR, "size differ; expected as sizeof(int)=%i but %i", (mrb_int)sizeof(int), RSTRING_LEN(data)); + } + + int tmp; + memcpy((char*)&tmp, RSTRING_PTR(data), sizeof(int)); + return tmp; +} + +/* + * call-seq: + * socket_option.int -> integer + * + * Returns the socket option data as an integer value. + * + * opt.int #=> 1 + */ +static mrb_value +socket_option_int(mrb_state *mrb, mrb_value self) +{ + int i = option_int(mrb, self); + return mrb_int_value(mrb, (mrb_int)i); +} + +/* + * call-seq: + * socket_option.bool -> true or false + * + * Returns the socket option data as a boolean value. + * + * opt.bool #=> true + */ +static mrb_value +socket_option_bool(mrb_state *mrb, mrb_value self) +{ + int i = option_int(mrb, self); + return mrb_bool_value((mrb_bool)i); +} + +/* Helper to raise not implemented error for unimplemented Socket::Option methods */ +static mrb_value +socket_option_notimp(mrb_state *mrb, mrb_value self) +{ + mrb_notimplement(mrb); + return mrb_nil_value(); +} + +/* + * call-seq: + * socket_option.inspect -> string + * + * Returns a string representation of the socket option for debugging. + * + * opt.inspect #=> "#" + */ +static mrb_value +socket_option_inspect(mrb_state *mrb, mrb_value self) +{ + mrb_value str = mrb_str_new_cstr(mrb, "#"); + + return str; +} + +/* + * call-seq: + * basicsocket.getsockopt(level, optname) -> string + * + * Gets a socket option. Returns the option value as a string. + * + * val = sock.getsockopt(Socket::SOL_SOCKET, Socket::SO_REUSEADDR) + */ +static mrb_value +mrb_basicsocket_getsockopt(mrb_state *mrb, mrb_value self) +{ + mrb_int level, optname; mrb_get_args(mrb, "ii", &level, &optname); - s = socket_fd(mrb, self); - optlen = sizeof(opt); + + int s = socket_fd(mrb, self); + char opt[8]; + socklen_t optlen = sizeof(opt); + if (getsockopt(s, (int)level, (int)optname, opt, &optlen) == -1) - mrb_sys_fail(mrb, "getsockopt"); - c = mrb_const_get(mrb, mrb_obj_value(mrb_class_get_id(mrb, MRB_SYM(Socket))), MRB_SYM(Option)); - family = socket_family(s); - data = mrb_str_new(mrb, opt, optlen); - return mrb_funcall_id(mrb, c, MRB_SYM(new), 4, mrb_fixnum_value(family), mrb_fixnum_value(level), mrb_fixnum_value(optname), data); + sock_sys_fail(mrb, "getsockopt"); + mrb_int family = socket_family(s); + mrb_value data = mrb_str_new(mrb, opt, optlen); + mrb_value args[4] = {mrb_fixnum_value(family), mrb_fixnum_value(level), mrb_fixnum_value(optname), data}; + return mrb_obj_new(mrb, socket_option_class(mrb), 4, args); } +/* + * call-seq: + * basicsocket.recv(maxlen, flags=0) -> string + * + * Receives data from the socket. + * + * data = sock.recv(1024) + * data = sock.recv(512, 0) + */ static mrb_value mrb_basicsocket_recv(mrb_state *mrb, mrb_value self) { - ssize_t n; mrb_int maxlen, flags = 0; - mrb_value buf; mrb_get_args(mrb, "i|i", &maxlen, &flags); - buf = mrb_str_new_capa(mrb, maxlen); - n = recv(socket_fd(mrb, self), RSTRING_PTR(buf), (fsize_t)maxlen, (int)flags); + + mrb_value buf = mrb_str_new_capa(mrb, maxlen); + ssize_t n = recv(socket_fd(mrb, self), RSTRING_PTR(buf), (fsize_t)maxlen, (int)flags); if (n == -1) - mrb_sys_fail(mrb, "recv"); + sock_sys_fail(mrb, "recv"); mrb_str_resize(mrb, buf, (mrb_int)n); return buf; } +/* + * call-seq: + * basicsocket._recvfrom(maxlen, flags=0) -> [data, sockaddr] + * + * Internal method to receive data and sender address from socket. + * Returns data and packed sockaddr. + */ static mrb_value mrb_basicsocket_recvfrom(mrb_state *mrb, mrb_value self) { - ssize_t n; mrb_int maxlen, flags = 0; - mrb_value ary, buf, sa; - socklen_t socklen; mrb_get_args(mrb, "i|i", &maxlen, &flags); - buf = mrb_str_new_capa(mrb, maxlen); - socklen = sizeof(struct sockaddr_storage); - sa = mrb_str_new_capa(mrb, socklen); - n = recvfrom(socket_fd(mrb, self), RSTRING_PTR(buf), (fsize_t)maxlen, (int)flags, (struct sockaddr *)RSTRING_PTR(sa), &socklen); + + mrb_value buf = mrb_str_new_capa(mrb, maxlen); + socklen_t socklen = sizeof(struct sockaddr_storage); + mrb_value sa = mrb_str_new_capa(mrb, socklen); + ssize_t n = recvfrom(socket_fd(mrb, self), RSTRING_PTR(buf), (fsize_t)maxlen, (int)flags, (struct sockaddr*)RSTRING_PTR(sa), &socklen); if (n == -1) - mrb_sys_fail(mrb, "recvfrom"); + sock_sys_fail(mrb, "recvfrom"); mrb_str_resize(mrb, buf, (mrb_int)n); mrb_str_resize(mrb, sa, (mrb_int)socklen); - ary = mrb_ary_new_capa(mrb, 2); + + mrb_value ary = mrb_ary_new_capa(mrb, 2); mrb_ary_push(mrb, ary, buf); mrb_ary_push(mrb, ary, sa); return ary; } +/* + * call-seq: + * basicsocket.send(mesg, flags) -> integer + * + * Sends data through the socket. Returns the number of bytes sent. + * + * bytes_sent = sock.send("Hello", 0) + */ static mrb_value mrb_basicsocket_send(mrb_state *mrb, mrb_value self) { - ssize_t n; mrb_int flags; - mrb_value dest, mesg; + mrb_value mesg; + mrb_value dest = mrb_nil_value(); - dest = mrb_nil_value(); mrb_get_args(mrb, "Si|S", &mesg, &flags, &dest); + + ssize_t n; if (mrb_nil_p(dest)) { n = send(socket_fd(mrb, self), RSTRING_PTR(mesg), (fsize_t)RSTRING_LEN(mesg), (int)flags); - } else { + } + else { n = sendto(socket_fd(mrb, self), RSTRING_PTR(mesg), (fsize_t)RSTRING_LEN(mesg), (int)flags, (const struct sockaddr*)RSTRING_PTR(dest), (fsize_t)RSTRING_LEN(dest)); } if (n == -1) - mrb_sys_fail(mrb, "send"); + sock_sys_fail(mrb, "send"); return mrb_fixnum_value((mrb_int)n); } +/* + * call-seq: + * basicsocket._setnonblock(flag) -> nil + * + * Internal method to set or unset non-blocking mode on the socket. + * + * sock._setnonblock(true) # enable non-blocking + * sock._setnonblock(false) # disable non-blocking + */ static mrb_value mrb_basicsocket_setnonblock(mrb_state *mrb, mrb_value self) { - int fd, flags; mrb_bool nonblocking; -#ifdef _WIN32 - u_long mode = 1; -#endif mrb_get_args(mrb, "b", &nonblocking); - fd = socket_fd(mrb, self); -#ifdef _WIN32 - flags = ioctlsocket(fd, FIONBIO, &mode); - if (flags != NO_ERROR) - mrb_sys_fail(mrb, "ioctlsocket"); -#else - flags = fcntl(fd, F_GETFL, 0); - if (flags == 1) - mrb_sys_fail(mrb, "fcntl"); - if (nonblocking) - flags |= O_NONBLOCK; - else - flags &= ~O_NONBLOCK; - if (fcntl(fd, F_SETFL, flags) == -1) - mrb_sys_fail(mrb, "fcntl"); -#endif + int fd = socket_fd(mrb, self); + + if (mrb_hal_socket_set_nonblock(mrb, fd, nonblocking) == -1) + sock_sys_fail(mrb, "set_nonblock"); + return mrb_nil_value(); } +/* + * call-seq: + * basicsocket.setsockopt(level, optname, optval) -> 0 + * + * Sets a socket option. Level and optname are constants, optval is the value. + * + * sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_REUSEADDR, 1) + */ static mrb_value mrb_basicsocket_setsockopt(mrb_state *mrb, mrb_value self) { - int s; - mrb_int argc, level = 0, optname; - mrb_value optval, so; + mrb_int level = 0, optname; + mrb_value so, optval; + mrb_int argc = mrb_get_args(mrb, "o|io", &so, &optname, &optval); - argc = mrb_get_args(mrb, "o|io", &so, &optname, &optval); if (argc == 3) { - if (!mrb_integer_p(so)) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "level is not an integer"); - } + mrb_ensure_int_type(mrb, so); level = mrb_integer(so); if (mrb_string_p(optval)) { /* that's good */ - } else if (mrb_true_p(optval) || mrb_false_p(optval)) { + } + else if (mrb_true_p(optval) || mrb_false_p(optval)) { mrb_int i = mrb_test(optval) ? 1 : 0; optval = mrb_str_new(mrb, (char*)&i, sizeof(i)); - } else if (mrb_integer_p(optval)) { + } + else if (mrb_integer_p(optval)) { if (optname == IP_MULTICAST_TTL || optname == IP_MULTICAST_LOOP) { char uc = (char)mrb_integer(optval); optval = mrb_str_new(mrb, &uc, sizeof(uc)); - } else { + } + else { mrb_int i = mrb_integer(optval); optval = mrb_str_new(mrb, (char*)&i, sizeof(i)); } - } else { + } + else { mrb_raise(mrb, E_ARGUMENT_ERROR, "optval should be true, false, an integer, or a string"); } - } else if (argc == 1) { - if (strcmp(mrb_obj_classname(mrb, so), "Socket::Option") != 0) + } + else if (argc == 1) { + if (!mrb_obj_is_instance_of(mrb, so, socket_option_class(mrb))) mrb_raise(mrb, E_ARGUMENT_ERROR, "not an instance of Socket::Option"); - level = mrb_as_int(mrb, mrb_funcall_id(mrb, so, MRB_SYM(level), 0)); - optname = mrb_as_int(mrb, mrb_funcall_id(mrb, so, MRB_SYM(optname), 0)); - optval = mrb_funcall_id(mrb, so, MRB_SYM(data), 0); + level = mrb_as_int(mrb, mrb_iv_get(mrb, so, MRB_SYM(level))); + optname = mrb_as_int(mrb, mrb_iv_get(mrb, so, MRB_SYM(optname))); + optval = mrb_iv_get(mrb, so, MRB_SYM(data)); mrb_ensure_string_type(mrb, optval); - } else { + } + else { mrb_argnum_error(mrb, argc, 3, 3); } - s = socket_fd(mrb, self); + int s = socket_fd(mrb, self); if (setsockopt(s, (int)level, (int)optname, RSTRING_PTR(optval), (socklen_t)RSTRING_LEN(optval)) == -1) - mrb_sys_fail(mrb, "setsockopt"); + sock_sys_fail(mrb, "setsockopt"); return mrb_fixnum_value(0); } +/* + * call-seq: + * basicsocket.shutdown(how=Socket::SHUT_RDWR) -> 0 + * + * Shuts down part of the socket connection. + * + * sock.shutdown(Socket::SHUT_RD) # shutdown reading + * sock.shutdown(Socket::SHUT_WR) # shutdown writing + * sock.shutdown(Socket::SHUT_RDWR) # shutdown both (default) + */ static mrb_value mrb_basicsocket_shutdown(mrb_state *mrb, mrb_value self) { @@ -486,18 +832,18 @@ mrb_basicsocket_shutdown(mrb_state *mrb, mrb_value self) mrb_get_args(mrb, "|i", &how); if (shutdown(socket_fd(mrb, self), (int)how) != 0) - mrb_sys_fail(mrb, "shutdown"); + sock_sys_fail(mrb, "shutdown"); return mrb_fixnum_value(0); } +/* Helper to set socket flag on IO object */ static mrb_value mrb_basicsocket_set_is_socket(mrb_state *mrb, mrb_value self) { mrb_bool b; - struct mrb_io *io_p; mrb_get_args(mrb, "b", &b); - io_p = (struct mrb_io*)DATA_PTR(self); + struct mrb_io *io_p = (struct mrb_io*)DATA_PTR(self); if (io_p) { io_p->is_socket = b; } @@ -505,6 +851,14 @@ mrb_basicsocket_set_is_socket(mrb_state *mrb, mrb_value self) return mrb_bool_value(b); } +/* + * call-seq: + * IPSocket.ntop(af, addr) -> string + * + * Converts a network address to a string representation. + * + * IPSocket.ntop(Socket::AF_INET, "\x7f\x00\x00\x01") #=> "127.0.0.1" + */ static mrb_value mrb_ipsocket_ntop(mrb_state *mrb, mrb_value klass) { @@ -514,11 +868,25 @@ mrb_ipsocket_ntop(mrb_state *mrb, mrb_value klass) mrb_get_args(mrb, "is", &af, &addr, &n); if ((af == AF_INET && n != 4) || (af == AF_INET6 && n != 16) || - inet_ntop((int)af, addr, buf, sizeof(buf)) == NULL) + mrb_hal_socket_inet_ntop((int)af, addr, buf, sizeof(buf)) == NULL) mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid address"); return mrb_str_new_cstr(mrb, buf); } +/* + * call-seq: + * IPSocket.pton(af, hostname) -> string + * + * Converts a string representation of an address to network format. + * + * IPSocket.pton(Socket::AF_INET, "127.0.0.1") #=> "\x7f\x00\x00\x01" + */ +static mrb_noreturn void +invalid_address_error(mrb_state *mrb) +{ + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid address"); +} + static mrb_value mrb_ipsocket_pton(mrb_state *mrb, mrb_value klass) { @@ -527,84 +895,137 @@ mrb_ipsocket_pton(mrb_state *mrb, mrb_value klass) char buf[50]; mrb_get_args(mrb, "is", &af, &bp, &n); - if ((size_t)n > sizeof(buf) - 1) goto invalid; + if ((size_t)n > sizeof(buf) - 1) { + invalid_address_error(mrb); + } memcpy(buf, bp, n); buf[n] = '\0'; if (af == AF_INET) { struct in_addr in; - if (inet_pton(AF_INET, buf, (void *)&in.s_addr) != 1) - goto invalid; + if (mrb_hal_socket_inet_pton(AF_INET, buf, (void*)&in.s_addr) != 1) { + invalid_address_error(mrb); + } return mrb_str_new(mrb, (char*)&in.s_addr, 4); - } else if (af == AF_INET6) { + } + else if (af == AF_INET6) { struct in6_addr in6; - if (inet_pton(AF_INET6, buf, (void *)&in6.s6_addr) != 1) - goto invalid; + if (mrb_hal_socket_inet_pton(AF_INET6, buf, (void*)&in6.s6_addr) != 1) { + invalid_address_error(mrb); + } return mrb_str_new(mrb, (char*)&in6.s6_addr, 16); - } else + } + else { mrb_raise(mrb, E_ARGUMENT_ERROR, "unsupported address family"); - -invalid: - mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid address"); - return mrb_nil_value(); /* dummy */ + } + return mrb_nil_value(); /* not reached */ } +/* + * call-seq: + * ipsocket.recvfrom(maxlen, flags=0) -> [data, [family, port, hostname, ip]] + * + * Receives data from the socket and returns sender address information. + * + * data, addr = sock.recvfrom(1024) + * # addr => ["AF_INET", 12345, "hostname", "192.168.1.1"] + */ static mrb_value mrb_ipsocket_recvfrom(mrb_state *mrb, mrb_value self) { - struct sockaddr_storage ss; - socklen_t socklen; - mrb_value a, buf, pair; - mrb_int flags, maxlen; - ssize_t n; - int fd; + mrb_int maxlen; + mrb_int flags = 0; - fd = socket_fd(mrb, self); - flags = 0; mrb_get_args(mrb, "i|i", &maxlen, &flags); - buf = mrb_str_new_capa(mrb, maxlen); - socklen = sizeof(ss); - n = recvfrom(fd, RSTRING_PTR(buf), (fsize_t)maxlen, (int)flags, - (struct sockaddr *)&ss, &socklen); + + mrb_value buf = mrb_str_new_capa(mrb, maxlen); + struct sockaddr_storage ss; + socklen_t socklen = sizeof(ss); + int fd = socket_fd(mrb, self); + ssize_t n = recvfrom(fd, RSTRING_PTR(buf), (fsize_t)maxlen, (int)flags, + (struct sockaddr*)&ss, &socklen); if (n == -1) { - mrb_sys_fail(mrb, "recvfrom"); + sock_sys_fail(mrb, "recvfrom"); } mrb_str_resize(mrb, buf, (mrb_int)n); - a = sa2addrlist(mrb, (struct sockaddr *)&ss, socklen); - pair = mrb_ary_new_capa(mrb, 2); + + mrb_value a = sa2addrlist(mrb, (struct sockaddr*)&ss, socklen); + mrb_value pair = mrb_ary_new_capa(mrb, 2); mrb_ary_push(mrb, pair, buf); mrb_ary_push(mrb, pair, a); return pair; } +/* + * call-seq: + * Socket.gethostname -> string + * + * Returns the hostname of the current machine. + * + * Socket.gethostname #=> "localhost" + */ static mrb_value mrb_socket_gethostname(mrb_state *mrb, mrb_value cls) { - mrb_value buf; - size_t bufsize; - #ifdef HOST_NAME_MAX - bufsize = HOST_NAME_MAX + 1; + size_t bufsize = HOST_NAME_MAX + 1; #else - bufsize = 256; + size_t bufsize = 256; #endif - buf = mrb_str_new_capa(mrb, (mrb_int)bufsize); + mrb_value buf = mrb_str_new_capa(mrb, (mrb_int)bufsize); + if (gethostname(RSTRING_PTR(buf), (fsize_t)bufsize) != 0) - mrb_sys_fail(mrb, "gethostname"); + sock_sys_fail(mrb, "gethostname"); mrb_str_resize(mrb, buf, (mrb_int)strlen(RSTRING_PTR(buf))); return buf; } +/* + * call-seq: + * Socket.ip_address_list -> array + * + * Returns an array of `Addrinfo` objects representing all local IP addresses + * on every network interface (both IPv4 and IPv6). Loopback and link-local + * addresses are included; the caller is responsible for filtering. + * + * Socket.ip_address_list + * #=> [#, #, #, ...] + * + * Backed by `getifaddrs(3)` on POSIX and `GetAdaptersAddresses` on Windows. + */ +static mrb_value +mrb_socket_ip_address_list(mrb_state *mrb, mrb_value klass) +{ + (void)klass; + mrb_value sas = mrb_hal_socket_ip_address_list(mrb); + struct RClass *ainfo = mrb_class_get_id(mrb, MRB_SYM(Addrinfo)); + mrb_value result = mrb_ary_new_capa(mrb, RARRAY_LEN(sas)); + int arena_idx = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < RARRAY_LEN(sas); i++) { + mrb_value sa = RARRAY_PTR(sas)[i]; + mrb_value addr = mrb_obj_new(mrb, ainfo, 1, &sa); + mrb_ary_push(mrb, result, addr); + mrb_gc_arena_restore(mrb, arena_idx); + } + return result; +} + +/* + * call-seq: + * Socket._accept(fd) -> [new_fd, sockaddr] + * + * Internal method to accept a connection on a socket file descriptor. + * Returns the new file descriptor and remote address. + */ static mrb_value mrb_socket_accept(mrb_state *mrb, mrb_value klass) { - int s1; mrb_int s0; mrb_get_args(mrb, "i", &s0); - s1 = (int)accept(s0, NULL, NULL); + int s1 = (int)accept(s0, NULL, NULL); if (s1 == -1) { - mrb_sys_fail(mrb, "accept"); + sock_sys_fail(mrb, "accept"); } return mrb_fixnum_value(s1); } @@ -612,26 +1033,31 @@ mrb_socket_accept(mrb_state *mrb, mrb_value klass) static mrb_value mrb_socket_accept2(mrb_state *mrb, mrb_value klass) { - mrb_value ary, sastr; - int s1; mrb_int s0; - socklen_t socklen; mrb_get_args(mrb, "i", &s0); - socklen = sizeof(struct sockaddr_storage); - sastr = mrb_str_new_capa(mrb, (mrb_int)socklen); - s1 = (int)accept(s0, (struct sockaddr *)RSTRING_PTR(sastr), &socklen); + + socklen_t socklen = sizeof(struct sockaddr_storage); + mrb_value sastr = mrb_str_new_capa(mrb, (mrb_int)socklen); + mrb_value ary = mrb_ary_new_capa(mrb, 2); + + int s1 = (int)accept(s0, (struct sockaddr*)RSTRING_PTR(sastr), &socklen); if (s1 == -1) { - mrb_sys_fail(mrb, "accept"); + sock_sys_fail(mrb, "accept"); } - // XXX: possible descriptor leakage here! + mrb_str_resize(mrb, sastr, socklen); - ary = mrb_ary_new_capa(mrb, 2); mrb_ary_push(mrb, ary, mrb_fixnum_value(s1)); mrb_ary_push(mrb, ary, sastr); return ary; } +/* + * call-seq: + * Socket._bind(fd, sockaddr) -> 0 + * + * Internal method to bind a socket file descriptor to the given address. + */ static mrb_value mrb_socket_bind(mrb_state *mrb, mrb_value klass) { @@ -639,12 +1065,18 @@ mrb_socket_bind(mrb_state *mrb, mrb_value klass) mrb_int s; mrb_get_args(mrb, "iS", &s, &sastr); - if (bind((int)s, (struct sockaddr *)RSTRING_PTR(sastr), (socklen_t)RSTRING_LEN(sastr)) == -1) { - mrb_sys_fail(mrb, "bind"); + if (bind((int)s, (struct sockaddr*)RSTRING_PTR(sastr), (socklen_t)RSTRING_LEN(sastr)) == -1) { + sock_sys_fail(mrb, "bind"); } return mrb_nil_value(); } +/* + * call-seq: + * Socket._connect(fd, sockaddr) -> 0 + * + * Internal method to connect a socket file descriptor to the given address. + */ static mrb_value mrb_socket_connect(mrb_state *mrb, mrb_value klass) { @@ -652,20 +1084,26 @@ mrb_socket_connect(mrb_state *mrb, mrb_value klass) mrb_int s; mrb_get_args(mrb, "iS", &s, &sastr); - if (connect((int)s, (struct sockaddr *)RSTRING_PTR(sastr), (socklen_t)RSTRING_LEN(sastr)) == -1) { - mrb_sys_fail(mrb, "connect"); + if (connect((int)s, (struct sockaddr*)RSTRING_PTR(sastr), (socklen_t)RSTRING_LEN(sastr)) == -1) { + sock_sys_fail(mrb, "connect"); } return mrb_nil_value(); } +/* + * call-seq: + * Socket._listen(fd, backlog) -> 0 + * + * Internal method to set a socket file descriptor to listen for connections. + */ static mrb_value mrb_socket_listen(mrb_state *mrb, mrb_value klass) { - mrb_int backlog, s; + mrb_int s, backlog; mrb_get_args(mrb, "ii", &s, &backlog); if (listen((int)s, (int)backlog) == -1) { - mrb_sys_fail(mrb, "listen"); + sock_sys_fail(mrb, "listen"); } return mrb_nil_value(); } @@ -673,80 +1111,85 @@ mrb_socket_listen(mrb_state *mrb, mrb_value klass) static mrb_value mrb_socket_sockaddr_family(mrb_state *mrb, mrb_value klass) { - const struct sockaddr *sa; mrb_value str; + const struct sockaddr *sa; mrb_get_args(mrb, "S", &str); if ((size_t)RSTRING_LEN(str) < offsetof(struct sockaddr, sa_family) + sizeof(sa->sa_family)) { mrb_raise(mrb, E_SOCKET_ERROR, "invalid sockaddr (too short)"); } - sa = (const struct sockaddr *)RSTRING_PTR(str); + sa = (const struct sockaddr*)RSTRING_PTR(str); return mrb_fixnum_value(sa->sa_family); } +/* + * call-seq: + * Socket.sockaddr_un(path) -> string + * + * Returns a packed sockaddr_un structure for the given Unix socket path. + * + * Socket.sockaddr_un("/tmp/socket") + * Socket.sockaddr_un("/var/run/daemon.sock") + */ static mrb_value mrb_socket_sockaddr_un(mrb_state *mrb, mrb_value klass) { -#ifdef _WIN32 - mrb_raise(mrb, E_NOTIMP_ERROR, "sockaddr_un unsupported on Windows"); - return mrb_nil_value(); -#else - struct sockaddr_un *sunp; - mrb_value path, s; + mrb_value path; mrb_get_args(mrb, "S", &path); - if ((size_t)RSTRING_LEN(path) > sizeof(sunp->sun_path) - 1) { - mrb_raisef(mrb, E_ARGUMENT_ERROR, "too long unix socket path (max: %d bytes)", (int)sizeof(sunp->sun_path) - 1); - } - s = mrb_str_new_capa(mrb, sizeof(struct sockaddr_un)); - sunp = (struct sockaddr_un *)RSTRING_PTR(s); -#if HAVE_SA_LEN - sunp->sun_len = sizeof(struct sockaddr_un); -#endif - sunp->sun_family = AF_UNIX; - memcpy(sunp->sun_path, RSTRING_PTR(path), RSTRING_LEN(path)); - sunp->sun_path[RSTRING_LEN(path)] = '\0'; - mrb_str_resize(mrb, s, sizeof(struct sockaddr_un)); - return s; -#endif + return mrb_hal_socket_sockaddr_un(mrb, RSTRING_PTR(path), (size_t)RSTRING_LEN(path)); } +/* + * call-seq: + * Socket.socketpair(domain, type, protocol=0) -> [socket1, socket2] + * Socket.pair(domain, type, protocol=0) -> [socket1, socket2] + * + * Creates a pair of connected sockets. + * + * sock1, sock2 = Socket.socketpair(Socket::AF_UNIX, Socket::SOCK_STREAM) + * sock1, sock2 = Socket.pair(Socket::AF_UNIX, Socket::SOCK_DGRAM) + */ static mrb_value mrb_socket_socketpair(mrb_state *mrb, mrb_value klass) { -#ifdef _WIN32 - mrb_raise(mrb, E_NOTIMP_ERROR, "socketpair unsupported on Windows"); - return mrb_nil_value(); -#else - mrb_value ary; mrb_int domain, type, protocol; int sv[2]; mrb_get_args(mrb, "iii", &domain, &type, &protocol); - if (socketpair(domain, type, protocol, sv) == -1) { - mrb_sys_fail(mrb, "socketpair"); + + if (mrb_hal_socket_socketpair(mrb, (int)domain, (int)type, (int)protocol, sv) == -1) { + sock_sys_fail(mrb, "socketpair"); } - // XXX: possible descriptor leakage here! - ary = mrb_ary_new_capa(mrb, 2); + + mrb_value ary = mrb_ary_new_capa(mrb, 2); mrb_ary_push(mrb, ary, mrb_fixnum_value(sv[0])); mrb_ary_push(mrb, ary, mrb_fixnum_value(sv[1])); return ary; -#endif } +/* + * call-seq: + * Socket._socket(domain, type, protocol) -> fd + * + * Internal method to create a new socket and return its file descriptor. + * + * fd = Socket._socket(Socket::AF_INET, Socket::SOCK_STREAM, 0) + */ static mrb_value mrb_socket_socket(mrb_state *mrb, mrb_value klass) { mrb_int domain, type, protocol; - int s; mrb_get_args(mrb, "iii", &domain, &type, &protocol); - s = (int)socket((int)domain, (int)type, (int)protocol); + + int s = (int)socket((int)domain, (int)type, (int)protocol); if (s == -1) - mrb_sys_fail(mrb, "socket"); + sock_sys_fail(mrb, "socket"); return mrb_fixnum_value(s); } +/* Helper to allocate TCPSocket object */ static mrb_value mrb_tcpsocket_allocate(mrb_state *mrb, mrb_value klass) { @@ -764,6 +1207,13 @@ mrb_tcpsocket_allocate(mrb_state *mrb, mrb_value klass) * will break on socket descriptors. */ #ifdef _WIN32 +/* + * call-seq: + * basicsocket.close -> nil + * + * Windows-specific implementation to close socket using closesocket(). + * Overrides IO#close for socket objects on Windows. + */ static mrb_value mrb_win32_basicsocket_close(mrb_state *mrb, mrb_value self) { @@ -772,10 +1222,16 @@ mrb_win32_basicsocket_close(mrb_state *mrb, mrb_value self) return mrb_nil_value(); } +/* + * call-seq: + * basicsocket.sysread(maxlen, outbuf=nil) -> string + * + * Windows-specific implementation to read from socket using recv(). + * Overrides IO#sysread for socket objects on Windows. + */ static mrb_value mrb_win32_basicsocket_sysread(mrb_state *mrb, mrb_value self) { - int sd, ret; mrb_value buf = mrb_nil_value(); mrb_int maxlen; @@ -791,19 +1247,20 @@ mrb_win32_basicsocket_sysread(mrb_state *mrb, mrb_value self) buf = mrb_str_resize(mrb, buf, maxlen); } - sd = socket_fd(mrb, self); - ret = recv(sd, RSTRING_PTR(buf), (int)maxlen, 0); + int sd = socket_fd(mrb, self); + int ret = recv(sd, RSTRING_PTR(buf), (int)maxlen, 0); switch (ret) { case 0: /* EOF */ if (maxlen == 0) { buf = mrb_str_new_cstr(mrb, ""); - } else { + } + else { mrb_raise(mrb, E_EOF_ERROR, "sysread failed: End of File"); } break; case SOCKET_ERROR: /* Error */ - mrb_sys_fail(mrb, "recv"); + sock_sys_fail(mrb, "recv"); break; default: if (RSTRING_LEN(buf) != ret) { @@ -815,6 +1272,13 @@ mrb_win32_basicsocket_sysread(mrb_state *mrb, mrb_value self) return buf; } +/* + * call-seq: + * basicsocket.sysseek(offset, whence) -> integer + * + * Windows-specific implementation that raises NotImplementedError. + * Sockets don't support seeking operations. + */ static mrb_value mrb_win32_basicsocket_sysseek(mrb_state *mrb, mrb_value self) { @@ -822,111 +1286,116 @@ mrb_win32_basicsocket_sysseek(mrb_state *mrb, mrb_value self) return mrb_nil_value(); } +/* + * call-seq: + * basicsocket.syswrite(string) -> integer + * + * Windows-specific implementation to write to socket using send(). + * Overrides IO#syswrite for socket objects on Windows. + */ static mrb_value mrb_win32_basicsocket_syswrite(mrb_state *mrb, mrb_value self) { - int n; - SOCKET sd; mrb_value str; + SOCKET sd = socket_fd(mrb, self); - sd = socket_fd(mrb, self); mrb_get_args(mrb, "S", &str); - n = send(sd, RSTRING_PTR(str), (int)RSTRING_LEN(str), 0); + + int n = send(sd, RSTRING_PTR(str), (int)RSTRING_LEN(str), 0); if (n == SOCKET_ERROR) - mrb_sys_fail(mrb, "send"); + sock_sys_fail(mrb, "send"); return mrb_int_value(mrb, n); } #endif +/* ---------------------------*/ +static const mrb_mt_entry addrinfo_rom_entries[] = { + MRB_MT_ENTRY(mrb_addrinfo_getnameinfo, MRB_SYM(getnameinfo), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(mrb_addrinfo_unix_path, MRB_SYM(unix_path), MRB_ARGS_NONE()), +}; + +static const mrb_mt_entry basicsocket_rom_entries[] = { + MRB_MT_ENTRY(mrb_basicsocket_recvfrom, MRB_SYM(_recvfrom), MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(mrb_basicsocket_setnonblock, MRB_SYM(_setnonblock), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_basicsocket_getpeereid, MRB_SYM(getpeereid), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_basicsocket_getpeername, MRB_SYM(getpeername), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_basicsocket_getsockname, MRB_SYM(getsockname), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_basicsocket_getsockopt, MRB_SYM(getsockopt), MRB_ARGS_REQ(2)), + MRB_MT_ENTRY(mrb_basicsocket_recv, MRB_SYM(recv), MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(mrb_basicsocket_send, MRB_SYM(send), MRB_ARGS_REQ(2)|MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(mrb_basicsocket_setsockopt, MRB_SYM(setsockopt), MRB_ARGS_REQ(1)|MRB_ARGS_OPT(2)), + MRB_MT_ENTRY(mrb_basicsocket_shutdown, MRB_SYM(shutdown), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(mrb_basicsocket_set_is_socket, MRB_SYM_E(_is_socket), MRB_ARGS_REQ(1)), +#ifdef _WIN32 + MRB_MT_ENTRY(mrb_win32_basicsocket_close, MRB_SYM(close), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_win32_basicsocket_sysread, MRB_SYM(sysread), MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(mrb_win32_basicsocket_sysseek, MRB_SYM(sysseek), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_win32_basicsocket_syswrite, MRB_SYM(syswrite), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_win32_basicsocket_sysread, MRB_SYM(read), MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(mrb_win32_basicsocket_syswrite, MRB_SYM(write), MRB_ARGS_REQ(1)), +#endif +}; + +static const mrb_mt_entry ipsocket_rom_entries[] = { + MRB_MT_ENTRY(mrb_ipsocket_recvfrom, MRB_SYM(recvfrom), MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)), +}; + +static const mrb_mt_entry socket_option_rom_entries[] = { + MRB_MT_ENTRY(socket_option_init, MRB_SYM(initialize), MRB_ARGS_REQ(4)), + MRB_MT_ENTRY(socket_option_inspect, MRB_SYM(inspect), MRB_ARGS_REQ(0)), + MRB_MT_ENTRY(socket_option_family, MRB_SYM(family), MRB_ARGS_REQ(0)), + MRB_MT_ENTRY(socket_option_level, MRB_SYM(level), MRB_ARGS_REQ(0)), + MRB_MT_ENTRY(socket_option_optname, MRB_SYM(optname), MRB_ARGS_REQ(0)), + MRB_MT_ENTRY(socket_option_data, MRB_SYM(data), MRB_ARGS_REQ(0)), + MRB_MT_ENTRY(socket_option_bool, MRB_SYM(bool), MRB_ARGS_REQ(0)), + MRB_MT_ENTRY(socket_option_int, MRB_SYM(int), MRB_ARGS_REQ(0)), + MRB_MT_ENTRY(socket_option_notimp, MRB_SYM(linger), MRB_ARGS_REQ(0)), + MRB_MT_ENTRY(socket_option_notimp, MRB_SYM(unpack), MRB_ARGS_REQ(1)), +}; + void mrb_mruby_socket_gem_init(mrb_state* mrb) { - struct RClass *io, *ai, *sock, *bsock, *ipsock, *tcpsock; - struct RClass *constants; + mrb_hal_socket_init(mrb); -#ifdef _WIN32 - WSADATA wsaData; - int result; - result = WSAStartup(MAKEWORD(2,2), &wsaData); - if (result != NO_ERROR) - mrb_raise(mrb, E_RUNTIME_ERROR, "WSAStartup failed"); -#endif + struct RClass *ainfo = mrb_define_class_id(mrb, MRB_SYM(Addrinfo), mrb->object_class); + mrb_define_class_method_id(mrb, ainfo, MRB_SYM(getaddrinfo), mrb_addrinfo_getaddrinfo, MRB_ARGS_REQ(2)|MRB_ARGS_OPT(4)); - ai = mrb_define_class(mrb, "Addrinfo", mrb->object_class); - mrb_mod_cv_set(mrb, ai, MRB_SYM(_lastai), mrb_nil_value()); - mrb_define_class_method(mrb, ai, "getaddrinfo", mrb_addrinfo_getaddrinfo, MRB_ARGS_REQ(2)|MRB_ARGS_OPT(4)); - mrb_define_method(mrb, ai, "getnameinfo", mrb_addrinfo_getnameinfo, MRB_ARGS_OPT(1)); -#ifndef _WIN32 - mrb_define_method(mrb, ai, "unix_path", mrb_addrinfo_unix_path, MRB_ARGS_NONE()); -#endif + struct RClass *io = mrb_class_get_id(mrb, MRB_SYM(IO)); - io = mrb_class_get_id(mrb, MRB_SYM(IO)); - - bsock = mrb_define_class(mrb, "BasicSocket", io); - mrb_define_method(mrb, bsock, "_recvfrom", mrb_basicsocket_recvfrom, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - mrb_define_method(mrb, bsock, "_setnonblock", mrb_basicsocket_setnonblock, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, bsock, "getpeereid", mrb_basicsocket_getpeereid, MRB_ARGS_NONE()); - mrb_define_method(mrb, bsock, "getpeername", mrb_basicsocket_getpeername, MRB_ARGS_NONE()); - mrb_define_method(mrb, bsock, "getsockname", mrb_basicsocket_getsockname, MRB_ARGS_NONE()); - mrb_define_method(mrb, bsock, "getsockopt", mrb_basicsocket_getsockopt, MRB_ARGS_REQ(2)); - mrb_define_method(mrb, bsock, "recv", mrb_basicsocket_recv, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - // #recvmsg(maxlen, flags=0) - mrb_define_method(mrb, bsock, "send", mrb_basicsocket_send, MRB_ARGS_REQ(2)|MRB_ARGS_OPT(1)); - // #sendmsg - // #sendmsg_nonblock - mrb_define_method(mrb, bsock, "setsockopt", mrb_basicsocket_setsockopt, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(2)); - mrb_define_method(mrb, bsock, "shutdown", mrb_basicsocket_shutdown, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, bsock, "_is_socket=", mrb_basicsocket_set_is_socket, MRB_ARGS_REQ(1)); - - ipsock = mrb_define_class(mrb, "IPSocket", bsock); - mrb_define_class_method(mrb, ipsock, "ntop", mrb_ipsocket_ntop, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, ipsock, "pton", mrb_ipsocket_pton, MRB_ARGS_REQ(2)); - mrb_define_method(mrb, ipsock, "recvfrom", mrb_ipsocket_recvfrom, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - - tcpsock = mrb_define_class(mrb, "TCPSocket", ipsock); - mrb_define_class_method(mrb, tcpsock, "_allocate", mrb_tcpsocket_allocate, MRB_ARGS_NONE()); - mrb_define_class(mrb, "TCPServer", tcpsock); - - mrb_define_class(mrb, "UDPSocket", ipsock); - //#recvfrom_nonblock - - sock = mrb_define_class(mrb, "Socket", bsock); - mrb_define_class_method(mrb, sock, "_accept", mrb_socket_accept, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, sock, "_accept2", mrb_socket_accept2, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, sock, "_bind", mrb_socket_bind, MRB_ARGS_REQ(3)); - mrb_define_class_method(mrb, sock, "_connect", mrb_socket_connect, MRB_ARGS_REQ(3)); - mrb_define_class_method(mrb, sock, "_listen", mrb_socket_listen, MRB_ARGS_REQ(2)); - mrb_define_class_method(mrb, sock, "_sockaddr_family", mrb_socket_sockaddr_family, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, sock, "_socket", mrb_socket_socket, MRB_ARGS_REQ(3)); - //mrb_define_class_method(mrb, sock, "gethostbyaddr", mrb_socket_gethostbyaddr, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - //mrb_define_class_method(mrb, sock, "gethostbyname", mrb_socket_gethostbyname, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - mrb_define_class_method(mrb, sock, "gethostname", mrb_socket_gethostname, MRB_ARGS_NONE()); - //mrb_define_class_method(mrb, sock, "getservbyname", mrb_socket_getservbyname, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - //mrb_define_class_method(mrb, sock, "getservbyport", mrb_socket_getservbyport, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - mrb_define_class_method(mrb, sock, "sockaddr_un", mrb_socket_sockaddr_un, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, sock, "socketpair", mrb_socket_socketpair, MRB_ARGS_REQ(3)); - //mrb_define_method(mrb, sock, "sysaccept", mrb_socket_accept, MRB_ARGS_NONE()); - -#ifndef _WIN32 - mrb_define_class(mrb, "UNIXSocket", bsock); - //mrb_define_class_method(mrb, usock, "pair", mrb_unixsocket_open, MRB_ARGS_OPT(2)); - //mrb_define_class_method(mrb, usock, "socketpair", mrb_unixsocket_open, MRB_ARGS_OPT(2)); - - //mrb_define_method(mrb, usock, "recv_io", mrb_unixsocket_peeraddr, MRB_ARGS_NONE()); - //mrb_define_method(mrb, usock, "recvfrom", mrb_unixsocket_peeraddr, MRB_ARGS_NONE()); - //mrb_define_method(mrb, usock, "send_io", mrb_unixsocket_peeraddr, MRB_ARGS_NONE()); -#endif + struct RClass *bsock = mrb_define_class_id(mrb, MRB_SYM(BasicSocket), io); - /* Windows IO Method Overrides on BasicSocket */ -#ifdef _WIN32 - mrb_define_method(mrb, bsock, "close", mrb_win32_basicsocket_close, MRB_ARGS_NONE()); - mrb_define_method(mrb, bsock, "sysread", mrb_win32_basicsocket_sysread, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); - mrb_define_method(mrb, bsock, "sysseek", mrb_win32_basicsocket_sysseek, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, bsock, "syswrite", mrb_win32_basicsocket_syswrite, MRB_ARGS_REQ(1)); -#endif + struct RClass *ipsock = mrb_define_class_id(mrb, MRB_SYM(IPSocket), bsock); + mrb_define_class_method_id(mrb, ipsock, MRB_SYM(ntop), mrb_ipsocket_ntop, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, ipsock, MRB_SYM(pton), mrb_ipsocket_pton, MRB_ARGS_REQ(2)); + + struct RClass *tcpsock = mrb_define_class_id(mrb, MRB_SYM(TCPSocket), ipsock); + mrb_define_class_method_id(mrb, tcpsock, MRB_SYM(_allocate), mrb_tcpsocket_allocate, MRB_ARGS_NONE()); + + struct RClass *sock = mrb_define_class_id(mrb, MRB_SYM(Socket), bsock); + mrb_define_class_method_id(mrb, sock, MRB_SYM(_accept), mrb_socket_accept, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, sock, MRB_SYM(_accept2), mrb_socket_accept2, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, sock, MRB_SYM(_bind), mrb_socket_bind, MRB_ARGS_REQ(2)); + mrb_define_class_method_id(mrb, sock, MRB_SYM(_connect), mrb_socket_connect, MRB_ARGS_REQ(2)); + mrb_define_class_method_id(mrb, sock, MRB_SYM(_listen), mrb_socket_listen, MRB_ARGS_REQ(2)); + mrb_define_class_method_id(mrb, sock, MRB_SYM(_sockaddr_family), mrb_socket_sockaddr_family, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, sock, MRB_SYM(_socket), mrb_socket_socket, MRB_ARGS_REQ(3)); + mrb_define_class_method_id(mrb, sock, MRB_SYM(gethostname), mrb_socket_gethostname, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, sock, MRB_SYM(ip_address_list), mrb_socket_ip_address_list, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, sock, MRB_SYM(sockaddr_un), mrb_socket_sockaddr_un, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, sock, MRB_SYM(socketpair), mrb_socket_socketpair, MRB_ARGS_REQ(3)); + + struct RClass *option = mrb_define_class_under_id(mrb, sock, MRB_SYM(Option), mrb->object_class); + mrb_define_class_method_id(mrb, option, MRB_SYM(bool), socket_option_s_bool, MRB_ARGS_REQ(4)); + mrb_define_class_method_id(mrb, option, MRB_SYM(int), socket_option_s_int, MRB_ARGS_REQ(4)); + + MRB_MT_INIT_ROM(mrb, ainfo, addrinfo_rom_entries); + MRB_MT_INIT_ROM(mrb, bsock, basicsocket_rom_entries); + MRB_MT_INIT_ROM(mrb, ipsock, ipsocket_rom_entries); + MRB_MT_INIT_ROM(mrb, option, socket_option_rom_entries); - constants = mrb_define_module_under(mrb, sock, "Constants"); + struct RClass *constants = mrb_define_module_under_id(mrb, sock, MRB_SYM(Constants)); #define define_const(SYM) \ do { \ @@ -934,17 +1403,12 @@ mrb_mruby_socket_gem_init(mrb_state* mrb) } while (0) #include "const.cstub" + + mrb_include_module(mrb, sock, constants); } void mrb_mruby_socket_gem_final(mrb_state* mrb) { - mrb_value ai; - ai = mrb_mod_cv_get(mrb, mrb_class_get_id(mrb, MRB_SYM(Addrinfo)), MRB_SYM(_lastai)); - if (mrb_cptr_p(ai)) { - freeaddrinfo((struct addrinfo*)mrb_cptr(ai)); - } -#ifdef _WIN32 - WSACleanup(); -#endif + mrb_hal_socket_final(mrb); } diff --git a/mrbgems/mruby-socket/test/addrinfo.rb b/mrbgems/mruby-socket/test/addrinfo.rb index 4916561793..eaab2a39ee 100644 --- a/mrbgems/mruby-socket/test/addrinfo.rb +++ b/mrbgems/mruby-socket/test/addrinfo.rb @@ -7,6 +7,7 @@ end assert('Addrinfo.getaddrinfo') do + skip "localhost resolution unreliable in Windows getaddrinfo" if SocketTest.win? ary = Addrinfo.getaddrinfo("localhost", 53, Socket::AF_INET, Socket::SOCK_STREAM) assert_true(ary.size >= 1) ai = ary[0] @@ -18,6 +19,7 @@ end assert('Addrinfo.foreach') do + skip "localhost resolution unreliable in Windows getaddrinfo" if SocketTest.win? # assume Addrinfo.getaddrinfo works well a = Addrinfo.getaddrinfo("localhost", 80) b = [] diff --git a/mrbgems/mruby-socket/test/socket.rb b/mrbgems/mruby-socket/test/socket.rb index b64a67919e..3a57710b06 100644 --- a/mrbgems/mruby-socket/test/socket.rb +++ b/mrbgems/mruby-socket/test/socket.rb @@ -36,3 +36,17 @@ end end # win? + +# Socket.ip_address_list works on both POSIX (getifaddrs) and Windows +# (GetAdaptersAddresses), so this test runs everywhere. +assert('Socket.ip_address_list') do + list = Socket.ip_address_list + assert_kind_of Array, list + # Every host should have at least one address (loopback at minimum). + assert_true list.length >= 1 + list.each do |ai| + assert_kind_of Addrinfo, ai + # Only AF_INET and AF_INET6 are returned. + assert_true [Socket::AF_INET, Socket::AF_INET6].include?(ai.afamily) + end +end diff --git a/mrbgems/mruby-socket/test/sockettest.c b/mrbgems/mruby-socket/test/sockettest.c index 02eb1e1a8b..96ccfe3c68 100644 --- a/mrbgems/mruby-socket/test/sockettest.c +++ b/mrbgems/mruby-socket/test/sockettest.c @@ -1,9 +1,9 @@ #include -#include "mruby.h" -#include "mruby/error.h" +#include +#include -#if defined(_WIN32) || defined(_WIN64) +#if defined(_WIN32) #include diff --git a/mrbgems/mruby-sprintf/README.md b/mrbgems/mruby-sprintf/README.md new file mode 100644 index 0000000000..ee29bdc2bd --- /dev/null +++ b/mrbgems/mruby-sprintf/README.md @@ -0,0 +1,235 @@ +# mruby-sprintf + +This mrbgem provides `sprintf` functionality for mruby. It allows you to format strings according to a specified format string, similar to the `sprintf` function in C or other languages. + +## Functionality + +- Provides the `sprintf` kernel function. +- Adds the `%` operator to the `String` class as an alias for `sprintf`. + +## Usage + +You can use `sprintf` or the `String#%` operator for string formatting: + +```ruby +# Using sprintf +str = sprintf("Hello, %s! You are %d years old.", "World", 30) +puts str # Output: Hello, World! You are 30 years old. + +# Using String#% +str2 = "The value is %.2f" % 12.3456 +puts str2 # Output: The value is 12.35 + +# Using String#% with an array of arguments +str3 = "%d + %d = %d" % [1, 2, 1+2] +puts str3 # Output: 1 + 2 = 3 +``` + +## Format Specifiers + +This mrbgem supports a wide range of format specifiers, flags, width, and precision options. For a comprehensive list and detailed explanation of all supported format specifiers and their behavior, please refer to the extensive comments within the C source file: [`src/sprintf.c`](src/sprintf.c). + +## Format Specifiers Details + +The `sprintf` function in this mrbgem supports a variety of field type characters that control how arguments are interpreted and formatted. + +### Integer Formats + +| Field | Description | +| :---- | :---------------------------------------------------------------------------------------------------------------------- | +| `b` | Convert argument as a binary number. Negative numbers will be displayed as a two's complement prefixed with `..1`. | +| `B` | Equivalent to `b`, but uses an uppercase `0B` for prefix in the alternative format (when `#` flag is used). | +| `d` | Convert argument as a decimal number. | +| `i` | Identical to `d`. | +| `o` | Convert argument as an octal number. Negative numbers will be displayed as a two's complement prefixed with `..7`. | +| `u` | Identical to `d`. (In many `sprintf` implementations, `u` is for unsigned decimal, but here it behaves like `d`). | +| `x` | Convert argument as a hexadecimal number. Negative numbers will be displayed as a two's complement prefixed with `..f`. | +| `X` | Equivalent to `x`, but uses uppercase letters for hexadecimal digits (e.g., `0XFF` vs `0xff`). | + +### Float Formats + +| Field | Description | +| :---- | :------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `e` | Convert floating-point argument into exponential notation (e.g., `[-]d.dddddde[+-]dd`). Precision specifies digits after the decimal point (default 6). | +| `E` | Equivalent to `e`, but uses an uppercase `E` for the exponent. | +| `f` | Convert floating-point argument as `[-]ddd.dddddd`. Precision specifies digits after the decimal point. | +| `g` | Converts a floating-point number using `e` or `f` form based on exponent and precision. Precision specifies significant digits. | +| `G` | Equivalent to `g`, but uses an uppercase `E` in exponent form if applicable. | + +### Other Formats + +| Field | Description | +| :---- | :-------------------------------------------------------------------------------------------------------------- | +| `c` | Argument is treated as the numeric code for a single character or a single character string itself. | +| `p` | Argument's `inspect` method is called, and the result is substituted. | +| `s` | Argument is a string to be substituted. If precision is specified, at most that many characters will be copied. | +| `%` | A literal percent sign (`%`) itself will be displayed. No argument is taken. | + +### Flags + +Flags modify the behavior of the format specifiers: + +| Flag | Applies to | Meaning | +| :--------- | :---------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `space` | Numeric Formats (bBdiouxX, aAeEfgG) | Leave a space at the start of non-negative numbers. For `o`, `x`, `X`, `b`, `B`, uses a minus sign with absolute value for negative values. | +| `(digit)$` | All | Specifies the absolute argument number for this field (e.g., `%2$d` uses the second argument). Cannot be mixed with relative argument numbers. | +| `#` | bBoxX, aAeEfgG | Use an alternative format. For `o`, increases precision to make the first digit `0` if not formatted as complement. For `x`, `X`, `b`, `B` (non-zero), prefixes with `0x`, `0X`, `0b`, `0B`. For `e`, `E`, `f`, `g`, `G`, forces a decimal point. For `g`, `G`, do not remove trailing zeros. | +| `+` | Numeric Formats | Add a leading plus sign (`+`) to non-negative numbers. For `o`, `x`, `X`, `b`, `B`, uses a minus sign with absolute value for negative values. | +| `-` | All | Left-justify the result of this conversion. | +| `0` (zero) | Numeric Formats | Pad with zeros, not spaces. For `o`, `x`, `X`, `b`, `B`, radix-1 is used for negative numbers formatted as complements. | +| `*` | All | Use the next argument as the field width. If negative, left-justifies. If `*` is followed by a number and `$`, use that argument as width (e.g., `%*1$d`). | + +## Further Examples (from C source comments) + +Here are more examples illustrating the use of flags, width, and precision, adapted from the comments in `src/sprintf.c`. + +### Examples of Flags + +```ruby +# '+' and space flag specifies the sign of non-negative numbers. +puts sprintf("%d", 123) #=> "123" +puts sprintf("%+d", 123) #=> "+123" +puts sprintf("% d", 123) #=> " 123" + +# '#' flag for 'o' increases number of digits to show '0'. +# '+' and space flag changes format of negative numbers. +puts sprintf("%o", 123) #=> "173" +puts sprintf("%#o", 123) #=> "0173" +puts sprintf("%+o", -123) #=> "-173" +puts sprintf("%o", -123) #=> "..7605" +puts sprintf("%#o", -123) #=> "..7605" + +# '#' flag for 'x' add a prefix '0x' for non-zero numbers. +# '+' and space flag disables complements for negative numbers. +puts sprintf("%x", 123) #=> "7b" +puts sprintf("%#x", 123) #=> "0x7b" +puts sprintf("%+x", -123) #=> "-7b" +puts sprintf("%x", -123) #=> "..f85" +puts sprintf("%#x", -123) #=> "0x..f85" # Note: mruby's output might differ slightly for complements with # +puts sprintf("%#x", 0) #=> "0" + +# '#' for 'X' uses the prefix '0X'. +puts sprintf("%X", 123) #=> "7B" +puts sprintf("%#X", 123) #=> "0X7B" + +# '#' flag for 'b' add a prefix '0b' for non-zero numbers. +# '+' and space flag disables complements for negative numbers. +puts sprintf("%b", 123) #=> "1111011" +puts sprintf("%#b", 123) #=> "0b1111011" +puts sprintf("%+b", -123) #=> "-1111011" +puts sprintf("%b", -123) #=> "..10000101" +puts sprintf("%#b", -123) #=> "0b..10000101" # Note: mruby's output might differ slightly for complements with # +puts sprintf("%#b", 0) #=> "0" + +# '#' for 'B' uses the prefix '0B'. +puts sprintf("%B", 123) #=> "1111011" +puts sprintf("%#B", 123) #=> "0B1111011" + +# '#' for 'e' forces to show the decimal point. +puts sprintf("%.0e", 1) #=> "1e+00" +puts sprintf("%#.0e", 1) #=> "1.e+00" + +# '#' for 'f' forces to show the decimal point. +puts sprintf("%.0f", 1234) #=> "1234" +puts sprintf("%#.0f", 1234) #=> "1234." + +# '#' for 'g' forces to show the decimal point. +# It also disables stripping lowest zeros. +puts sprintf("%g", 123.4) #=> "123.4" +puts sprintf("%#g", 123.4) #=> "123.400" +puts sprintf("%g", 123456) #=> "123456" +puts sprintf("%#g", 123456) #=> "123456." +``` + +### Examples of Width + +The width specifies the minimum number of characters that will be written. +Padding is typically done with spaces, or with zeros if the `0` flag is used. + +```ruby +# padding is done by spaces, width=20 +# 0 or radix-1. <------------------> +puts sprintf("%20d", 123) #=> " 123" +puts sprintf("%+20d", 123) #=> " +123" +puts sprintf("%020d", 123) #=> "00000000000000000123" +puts sprintf("%+020d", 123) #=> "+0000000000000000123" +puts sprintf("% 020d", 123) #=> " 0000000000000000123" +puts sprintf("%-20d", 123) #=> "123 " +puts sprintf("%-+20d", 123) #=> "+123 " +puts sprintf("%- 20d", 123) #=> " 123 " +puts sprintf("%020x", -123) #=> "..ffffffffffffffff85" # Output for negative hex with 0-padding can vary +``` + +### Examples of Precision + +For numeric fields, precision controls the number of decimal places or minimum number of digits. +For strings, it determines the maximum number of characters. + +```ruby +# precision for 'd', 'o', 'x' and 'b' is minimum number of digits +# <------> +puts sprintf("%20.8d", 123) #=> " 00000123" +puts sprintf("%20.8o", 123) #=> " 00000173" +puts sprintf("%20.8x", 123) #=> " 0000007b" +puts sprintf("%20.8b", 123) #=> " 01111011" +puts sprintf("%20.8d", -123) #=> " -00000123" +puts sprintf("%20.8o", -123) #=> " ..777605" # Complemented output +puts sprintf("%20.8x", -123) #=> " ..ffff85" # Complemented output +puts sprintf("%20.8b", -11) #=> " ..110101" # Complemented output + +# "0x" and "0b" for '#x' and '#b' is not counted for precision, +# but "0" for '#o' is counted. <------> +puts sprintf("%#20.8d", 123) #=> " 00000123" +puts sprintf("%#20.8o", 123) #=> " 00000173" # '#' with 'o' and precision +puts sprintf("%#20.8x", 123) #=> " 0x0000007b" +puts sprintf("%#20.8b", 123) #=> " 0b01111011" +puts sprintf("%#20.8d", -123) #=> " -00000123" +puts sprintf("%#20.8o", -123) #=> " ..777605" # Complemented output +puts sprintf("%#20.8x", -123) #=> " 0x..ffff85" # Complemented output with # +puts sprintf("%#20.8b", -11) #=> " 0b..110101" # Complemented output with # + +# precision for 'e' is number of digits after the decimal point +# <------> +puts sprintf("%20.8e", 1234.56789) #=> " 1.23456789e+03" + +# precision for 'f' is number of digits after the decimal point +# <------> +puts sprintf("%20.8f", 1234.56789) #=> " 1234.56789000" + +# precision for 'g' is number of significant digits +# <-------> +puts sprintf("%20.8g", 1234.56789) #=> " 1234.5679" +# <-------> +puts sprintf("%20.8g", 123456789) #=> " 1.2345679e+08" + +# precision for 's' is maximum number of characters +# <------> +puts sprintf("%20.8s", "string test") #=> " string t" +``` + +### General Examples + +```ruby +puts sprintf("%d %04x", 123, 123) #=> "123 007b" +puts sprintf("%08b '%4s'", 123, 123) #=> "01111011 ' 123'" # Note: mruby output for %s with number might differ +puts sprintf("%1$*2$s %2$d %1$s", "hello", 8) #=> " hello 8 hello" +puts sprintf("%1$*2$s %2$d", "hello", -8) #=> "hello -8" +puts sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23) #=> "+1.23: 1.23:1.23" +puts sprintf("%u", -123) #=> "-123" +``` + +### Named References + +For more complex formatting, Ruby supports a reference by name. +`%s` style uses format style, but `%{name}` style doesn't (in standard Ruby, mruby might vary). + +```ruby +puts sprintf("%d : %f", { :foo => 1, :bar => 2.0 }) # Using a float for %f +#=> "1 : 2.000000" + +# The %{name} style might behave differently or not be supported in the same way as CRuby. +# Standard CRuby example: +# puts sprintf("%{foo}f", { :foo => 1 }) # => "1f" (This specific syntax might not apply or work in mruby) +# For mruby, stick to %type for named arguments: +puts sprintf("%s", { :foo => "test" }) #=> "test" +``` diff --git a/mrbgems/mruby-sprintf/mrblib/string.rb b/mrbgems/mruby-sprintf/mrblib/string.rb index d7e55536ac..dbd6263508 100644 --- a/mrbgems/mruby-sprintf/mrblib/string.rb +++ b/mrbgems/mruby-sprintf/mrblib/string.rb @@ -1,4 +1,19 @@ class String + # + # call-seq: + # str % arg -> new_str + # str % args -> new_str + # + # Format - Uses str as a format specification, and returns the result + # of applying it to arg. If the format specification contains more than + # one substitution, then arg must be an Array or Hash containing the values + # to be substituted. See sprintf for details of the format string. + # + # "%05d" % 123 #=> "00123" + # "%-5s: %016x" % [ "ID", self.object_id ] #=> "ID : 00002b054ec93168" + # "foo = %{foo}" % { :foo => 'bar' } #=> "foo = bar" + # "%{foo}f" % { :foo => 1 } #=> "1f" + # def %(args) if args.is_a? Array sprintf(self, *args) diff --git a/mrbgems/mruby-sprintf/src/sprintf.c b/mrbgems/mruby-sprintf/src/sprintf.c index 41c84eff1f..5f4404cf9e 100644 --- a/mrbgems/mruby-sprintf/src/sprintf.c +++ b/mrbgems/mruby-sprintf/src/sprintf.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include @@ -16,7 +16,7 @@ #define BITSPERDIG MRB_INT_BIT #define EXTENDSIGN(n, l) (((~0U << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0U << (n))) -mrb_value mrb_str_format(mrb_state *, mrb_int, const mrb_value *, mrb_value); +mrb_value mrb_bint_2comp(mrb_state *mrb, mrb_value x); static char* remove_sign_bits(char *str, int base) @@ -44,43 +44,22 @@ remove_sign_bits(char *str, int base) return t; } -static char -sign_bits(int base, const char *p) -{ - char c; - - switch (base) { - case 16: - if (*p == 'X') c = 'F'; - else c = 'f'; - break; - case 8: - c = '7'; break; - case 2: - c = '1'; break; - default: - c = '.'; break; - } - return c; -} - static char * mrb_uint_to_cstr(char *buf, size_t len, mrb_int num, int base) { char *b = buf + len - 1; const int mask = base-1; int shift; - mrb_uint val = (uint64_t)num; - char d; + mrb_uint val = (mrb_uint)num; if (num == 0) { buf[0] = '0'; buf[1] = '\0'; return buf; } switch (base) { - case 16: d = 'f'; shift = 4; break; - case 8: d = '7'; shift = 3; break; - case 2: d = '1'; shift = 1; break; + case 16: shift = 4; break; + case 8: shift = 3; break; + case 2: shift = 1; break; default: return NULL; } *--b = '\0'; @@ -90,9 +69,6 @@ mrb_uint_to_cstr(char *buf, size_t len, mrb_int num, int base) if (num < 0) { b = remove_sign_bits(b, base); - if (d && *b != d) { - *--b = d; - } } return b; @@ -108,9 +84,97 @@ mrb_uint_to_cstr(char *buf, size_t len, mrb_int num, int base) #define FPREC 64 #define FPREC0 128 +/* Format specifier types for lookup table */ +#define FMT_INVALID 0 +#define FMT_FLAG 1 /* space, #, +, -, 0 */ +#define FMT_DIGIT 2 /* 1-9 for width */ +#define FMT_NAMED 3 /* < { for named args */ +#define FMT_WIDTH 4 /* * for width from arg */ +#define FMT_PREC 5 /* . for precision */ +#define FMT_LITERAL 6 /* % \n \0 */ +#define FMT_CHAR 7 /* c */ +#define FMT_STRING 8 /* s p */ +#define FMT_INTEGER 9 /* d i o x X b B u */ +#define FMT_FLOAT 10 /* f g G e E */ + +/* Format specifier info structure */ +typedef struct { + int type; /* FMT_* type */ + int base; /* number base for integers */ + int subtype; /* format-specific subtype */ +} fmt_spec_t; + + +/* Get format specifier info for character c */ +static inline fmt_spec_t get_fmt_spec(unsigned char c) { + static const fmt_spec_t invalid = {FMT_INVALID, 0, 0}; + + switch (c) { + /* Control characters and whitespace */ + case '\0': case '\n': + return (fmt_spec_t){FMT_LITERAL, 0, 0}; + + /* Flags */ + case ' ': return (fmt_spec_t){FMT_FLAG, 0, FSPACE}; + case '#': return (fmt_spec_t){FMT_FLAG, 0, FSHARP}; + case '+': return (fmt_spec_t){FMT_FLAG, 0, FPLUS}; + case '-': return (fmt_spec_t){FMT_FLAG, 0, FMINUS}; + case '0': return (fmt_spec_t){FMT_FLAG, 0, FZERO}; + + /* Width digits */ + case '1': return (fmt_spec_t){FMT_DIGIT, 0, 1}; + case '2': return (fmt_spec_t){FMT_DIGIT, 0, 2}; + case '3': return (fmt_spec_t){FMT_DIGIT, 0, 3}; + case '4': return (fmt_spec_t){FMT_DIGIT, 0, 4}; + case '5': return (fmt_spec_t){FMT_DIGIT, 0, 5}; + case '6': return (fmt_spec_t){FMT_DIGIT, 0, 6}; + case '7': return (fmt_spec_t){FMT_DIGIT, 0, 7}; + case '8': return (fmt_spec_t){FMT_DIGIT, 0, 8}; + case '9': return (fmt_spec_t){FMT_DIGIT, 0, 9}; + + /* Width and precision */ + case '*': return (fmt_spec_t){FMT_WIDTH, 0, 0}; + case '.': return (fmt_spec_t){FMT_PREC, 0, 0}; + + /* Named arguments */ + case '<': return (fmt_spec_t){FMT_NAMED, 0, '<'}; + case '{': return (fmt_spec_t){FMT_NAMED, 0, '{'}; + + /* Literal percent */ + case '%': return (fmt_spec_t){FMT_LITERAL, 0, '%'}; + + /* Character formatting */ + case 'c': return (fmt_spec_t){FMT_CHAR, 0, 0}; + + /* String formatting */ + case 's': return (fmt_spec_t){FMT_STRING, 0, 0}; + case 'p': return (fmt_spec_t){FMT_STRING, 0, 1}; /* inspect format */ + + /* Integer formatting */ + case 'd': return (fmt_spec_t){FMT_INTEGER, 10, 1}; /* signed decimal */ + case 'i': return (fmt_spec_t){FMT_INTEGER, 10, 1}; /* signed decimal */ + case 'u': return (fmt_spec_t){FMT_INTEGER, 10, 1}; /* unsigned (same as signed in mruby) */ + case 'o': return (fmt_spec_t){FMT_INTEGER, 8, 0}; /* octal */ + case 'x': return (fmt_spec_t){FMT_INTEGER, 16, 0}; /* hex lowercase */ + case 'X': return (fmt_spec_t){FMT_INTEGER, 16, 1}; /* hex uppercase */ + case 'b': return (fmt_spec_t){FMT_INTEGER, 2, 0}; /* binary lowercase */ + case 'B': return (fmt_spec_t){FMT_INTEGER, 2, 1}; /* binary uppercase */ + + /* Float formatting */ + case 'f': return (fmt_spec_t){FMT_FLOAT, 0, 'f'}; + case 'e': return (fmt_spec_t){FMT_FLOAT, 0, 'e'}; + case 'E': return (fmt_spec_t){FMT_FLOAT, 0, 'E'}; + case 'g': return (fmt_spec_t){FMT_FLOAT, 0, 'g'}; + case 'G': return (fmt_spec_t){FMT_FLOAT, 0, 'G'}; + + default: + return invalid; + } +} + #ifndef MRB_NO_FLOAT static int -fmt_float(char *buf, size_t buf_size, char fmt, int flags, mrb_int width, int prec, mrb_float f) +fmt_float(char *buf, size_t buf_size, char fmt, int flags, int width, int prec, mrb_float f) { char sign = '\0'; int left_align = 0; @@ -146,7 +210,8 @@ fmt_float(char *buf, size_t buf_size, char fmt, int flags, mrb_int width, int pr memmove(&buf[width - len], buf, len); if (zero_pad) { memset(buf, '0', width - len); - } else { + } + else { memset(buf, ' ', width - len); } return width; @@ -237,7 +302,7 @@ check_name_arg(mrb_state *mrb, int posarg, const char *name, size_t len) #define GETNUM(n, val) do { \ if (!(p = get_num(mrb, p, end, &(n)))) \ - mrb_raise(mrb, E_ARGUMENT_ERROR, #val " too big 1"); \ + mrb_raise(mrb, E_ARGUMENT_ERROR, #val " too big"); \ } while(0) #define GETASTER(num) do { \ @@ -251,15 +316,17 @@ check_name_arg(mrb_state *mrb, int posarg, const char *name, size_t len) tmp_v = GETNEXTARG(); \ p = t; \ } \ - num = mrb_as_int(mrb, tmp_v); \ + num = (int)mrb_as_int(mrb, tmp_v); \ } while (0) -static const char * +static const char* get_num(mrb_state *mrb, const char *p, const char *end, int *valp) { char *e; - mrb_int n = mrb_int_read(p, end, &e); - if (e == NULL || n > INT_MAX) return NULL; + mrb_int n; + if (!mrb_read_int(p, end, &e, &n) || INT_MAX < n) { + return NULL; + } *valp = (int)n; return e; } @@ -267,294 +334,18 @@ get_num(mrb_state *mrb, const char *p, const char *end, int *valp) static void get_hash(mrb_state *mrb, mrb_value *hash, mrb_int argc, const mrb_value *argv) { - mrb_value tmp; - if (!mrb_undef_p(*hash)) return; if (argc != 2) { mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required"); } - tmp = mrb_check_hash_type(mrb, argv[1]); + mrb_value tmp = mrb_check_hash_type(mrb, argv[1]); if (mrb_nil_p(tmp)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required"); } *hash = tmp; } -/* - * call-seq: - * format(format_string [, arguments...] ) -> string - * sprintf(format_string [, arguments...] ) -> string - * - * Returns the string resulting from applying format_string to - * any additional arguments. Within the format string, any characters - * other than format sequences are copied to the result. - * - * The syntax of a format sequence is follows. - * - * %[flags][width][.precision]type - * - * A format - * sequence consists of a percent sign, followed by optional flags, - * width, and precision indicators, then terminated with a field type - * character. The field type controls how the corresponding - * sprintf argument is to be interpreted, while the flags - * modify that interpretation. - * - * The field type characters are: - * - * Field | Integer Format - * ------+-------------------------------------------------------------- - * b | Convert argument as a binary number. - * | Negative numbers will be displayed as a two's complement - * | prefixed with '..1'. - * B | Equivalent to 'b', but uses an uppercase 0B for prefix - * | in the alternative format by #. - * d | Convert argument as a decimal number. - * i | Identical to 'd'. - * o | Convert argument as an octal number. - * | Negative numbers will be displayed as a two's complement - * | prefixed with '..7'. - * u | Identical to 'd'. - * x | Convert argument as a hexadecimal number. - * | Negative numbers will be displayed as a two's complement - * | prefixed with '..f' (representing an infinite string of - * | leading 'ff's). - * X | Equivalent to 'x', but uses uppercase letters. - * - * Field | Float Format - * ------+-------------------------------------------------------------- - * e | Convert floating-point argument into exponential notation - * | with one digit before the decimal point as [-]d.dddddde[+-]dd. - * | The precision specifies the number of digits after the decimal - * | point (defaulting to six). - * E | Equivalent to 'e', but uses an uppercase E to indicate - * | the exponent. - * f | Convert floating-point argument as [-]ddd.dddddd, - * | where the precision specifies the number of digits after - * | the decimal point. - * g | Convert a floating-point number using exponential form - * | if the exponent is less than -4 or greater than or - * | equal to the precision, or in dd.dddd form otherwise. - * | The precision specifies the number of significant digits. - * G | Equivalent to 'g', but use an uppercase 'E' in exponent form. - * - * Field | Other Format - * ------+-------------------------------------------------------------- - * c | Argument is the numeric code for a single character or - * | a single character string itself. - * p | The valuing of argument.inspect. - * s | Argument is a string to be substituted. If the format - * | sequence contains a precision, at most that many characters - * | will be copied. - * % | A percent sign itself will be displayed. No argument taken. - * - * The flags modifies the behavior of the formats. - * The flag characters are: - * - * Flag | Applies to | Meaning - * ---------+---------------+----------------------------------------- - * space | bBdiouxX | Leave a space at the start of - * | aAeEfgG | non-negative numbers. - * | (numeric fmt) | For 'o', 'x', 'X', 'b' and 'B', use - * | | a minus sign with absolute value for - * | | negative values. - * ---------+---------------+----------------------------------------- - * (digit)$ | all | Specifies the absolute argument number - * | | for this field. Absolute and relative - * | | argument numbers cannot be mixed in a - * | | sprintf string. - * ---------+---------------+----------------------------------------- - * # | bBoxX | Use an alternative format. - * | aAeEfgG | For the conversions 'o', increase the precision - * | | until the first digit will be '0' if - * | | it is not formatted as complements. - * | | For the conversions 'x', 'X', 'b' and 'B' - * | | on non-zero, prefix the result with "0x", - * | | "0X", "0b" and "0B", respectively. - * | | For 'e', 'E', 'f', 'g', and 'G', - * | | force a decimal point to be added, - * | | even if no digits follow. - * | | For 'g' and 'G', do not remove trailing zeros. - * ---------+---------------+----------------------------------------- - * + | bBdiouxX | Add a leading plus sign to non-negative - * | aAeEfgG | numbers. - * | (numeric fmt) | For 'o', 'x', 'X', 'b' and 'B', use - * | | a minus sign with absolute value for - * | | negative values. - * ---------+---------------+----------------------------------------- - * - | all | Left-justify the result of this conversion. - * ---------+---------------+----------------------------------------- - * 0 (zero) | bBdiouxX | Pad with zeros, not spaces. - * | aAeEfgG | For 'o', 'x', 'X', 'b' and 'B', radix-1 - * | (numeric fmt) | is used for negative numbers formatted as - * | | complements. - * ---------+---------------+----------------------------------------- - * * | all | Use the next argument as the field width. - * | | If negative, left-justify the result. If the - * | | asterisk is followed by a number and a dollar - * | | sign, use the indicated argument as the width. - * - * Examples of flags: - * - * # '+' and space flag specifies the sign of non-negative numbers. - * sprintf("%d", 123) #=> "123" - * sprintf("%+d", 123) #=> "+123" - * sprintf("% d", 123) #=> " 123" - * - * # '#' flag for 'o' increases number of digits to show '0'. - * # '+' and space flag changes format of negative numbers. - * sprintf("%o", 123) #=> "173" - * sprintf("%#o", 123) #=> "0173" - * sprintf("%+o", -123) #=> "-173" - * sprintf("%o", -123) #=> "..7605" - * sprintf("%#o", -123) #=> "..7605" - * - * # '#' flag for 'x' add a prefix '0x' for non-zero numbers. - * # '+' and space flag disables complements for negative numbers. - * sprintf("%x", 123) #=> "7b" - * sprintf("%#x", 123) #=> "0x7b" - * sprintf("%+x", -123) #=> "-7b" - * sprintf("%x", -123) #=> "..f85" - * sprintf("%#x", -123) #=> "0x..f85" - * sprintf("%#x", 0) #=> "0" - * - * # '#' for 'X' uses the prefix '0X'. - * sprintf("%X", 123) #=> "7B" - * sprintf("%#X", 123) #=> "0X7B" - * - * # '#' flag for 'b' add a prefix '0b' for non-zero numbers. - * # '+' and space flag disables complements for negative numbers. - * sprintf("%b", 123) #=> "1111011" - * sprintf("%#b", 123) #=> "0b1111011" - * sprintf("%+b", -123) #=> "-1111011" - * sprintf("%b", -123) #=> "..10000101" - * sprintf("%#b", -123) #=> "0b..10000101" - * sprintf("%#b", 0) #=> "0" - * - * # '#' for 'B' uses the prefix '0B'. - * sprintf("%B", 123) #=> "1111011" - * sprintf("%#B", 123) #=> "0B1111011" - * - * # '#' for 'e' forces to show the decimal point. - * sprintf("%.0e", 1) #=> "1e+00" - * sprintf("%#.0e", 1) #=> "1.e+00" - * - * # '#' for 'f' forces to show the decimal point. - * sprintf("%.0f", 1234) #=> "1234" - * sprintf("%#.0f", 1234) #=> "1234." - * - * # '#' for 'g' forces to show the decimal point. - * # It also disables stripping lowest zeros. - * sprintf("%g", 123.4) #=> "123.4" - * sprintf("%#g", 123.4) #=> "123.400" - * sprintf("%g", 123456) #=> "123456" - * sprintf("%#g", 123456) #=> "123456." - * - * The field width is an optional integer, followed optionally by a - * period and a precision. The width specifies the minimum number of - * characters that will be written to the result for this field. - * - * Examples of width: - * - * # padding is done by spaces, width=20 - * # 0 or radix-1. <------------------> - * sprintf("%20d", 123) #=> " 123" - * sprintf("%+20d", 123) #=> " +123" - * sprintf("%020d", 123) #=> "00000000000000000123" - * sprintf("%+020d", 123) #=> "+0000000000000000123" - * sprintf("% 020d", 123) #=> " 0000000000000000123" - * sprintf("%-20d", 123) #=> "123 " - * sprintf("%-+20d", 123) #=> "+123 " - * sprintf("%- 20d", 123) #=> " 123 " - * sprintf("%020x", -123) #=> "..ffffffffffffffff85" - * - * For - * numeric fields, the precision controls the number of decimal places - * displayed. For string fields, the precision determines the maximum - * number of characters to be copied from the string. (Thus, the format - * sequence %10.10s will always contribute exactly ten - * characters to the result.) - * - * Examples of precisions: - * - * # precision for 'd', 'o', 'x' and 'b' is - * # minimum number of digits <------> - * sprintf("%20.8d", 123) #=> " 00000123" - * sprintf("%20.8o", 123) #=> " 00000173" - * sprintf("%20.8x", 123) #=> " 0000007b" - * sprintf("%20.8b", 123) #=> " 01111011" - * sprintf("%20.8d", -123) #=> " -00000123" - * sprintf("%20.8o", -123) #=> " ..777605" - * sprintf("%20.8x", -123) #=> " ..ffff85" - * sprintf("%20.8b", -11) #=> " ..110101" - * - * # "0x" and "0b" for '#x' and '#b' is not counted for - * # precision but "0" for '#o' is counted. <------> - * sprintf("%#20.8d", 123) #=> " 00000123" - * sprintf("%#20.8o", 123) #=> " 00000173" - * sprintf("%#20.8x", 123) #=> " 0x0000007b" - * sprintf("%#20.8b", 123) #=> " 0b01111011" - * sprintf("%#20.8d", -123) #=> " -00000123" - * sprintf("%#20.8o", -123) #=> " ..777605" - * sprintf("%#20.8x", -123) #=> " 0x..ffff85" - * sprintf("%#20.8b", -11) #=> " 0b..110101" - * - * # precision for 'e' is number of - * # digits after the decimal point <------> - * sprintf("%20.8e", 1234.56789) #=> " 1.23456789e+03" - * - * # precision for 'f' is number of - * # digits after the decimal point <------> - * sprintf("%20.8f", 1234.56789) #=> " 1234.56789000" - * - * # precision for 'g' is number of - * # significant digits <-------> - * sprintf("%20.8g", 1234.56789) #=> " 1234.5679" - * - * # <-------> - * sprintf("%20.8g", 123456789) #=> " 1.2345679e+08" - * - * # precision for 's' is - * # maximum number of characters <------> - * sprintf("%20.8s", "string test") #=> " string t" - * - * Examples: - * - * sprintf("%d %04x", 123, 123) #=> "123 007b" - * sprintf("%08b '%4s'", 123, 123) #=> "01111011 ' 123'" - * sprintf("%1$*2$s %2$d %1$s", "hello", 8) #=> " hello 8 hello" - * sprintf("%1$*2$s %2$d", "hello", -8) #=> "hello -8" - * sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23) #=> "+1.23: 1.23:1.23" - * sprintf("%u", -123) #=> "-123" - * - * For more complex formatting, Ruby supports a reference by name. - * %s style uses format style, but %{name} style doesn't. - * - * Examples: - * sprintf("%d : %f", { :foo => 1, :bar => 2 }) - * #=> 1 : 2.000000 - * sprintf("%{foo}f", { :foo => 1 }) - * # => "1f" - */ - static mrb_value -mrb_f_sprintf(mrb_state *mrb, mrb_value obj) -{ - mrb_int argc; - const mrb_value *argv; - - mrb_get_args(mrb, "*", &argv, &argc); - - if (argc <= 0) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "too few arguments"); - return mrb_nil_value(); - } - else { - return mrb_str_format(mrb, argc - 1, argv + 1, argv[0]); - } -} - -mrb_value mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fmt) { const char *p, *end; @@ -586,24 +377,46 @@ mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fm mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after precision"); \ } - ++argc; - --argv; + argc++; + argv--; mrb_ensure_string_type(mrb, fmt); + /* Duplicate the format string so that to_s/inspect callbacks invoked + during the loop cannot invalidate p/end by mutating the original + via String#replace or similar. mrb_str_dup shares the underlying + buffer, so this is O(1); String#replace on the original goes + through str_replace which decrements the shared refcount, leaving + our copy's buffer intact. */ + fmt = mrb_str_dup_frozen(mrb, fmt); p = RSTRING_PTR(fmt); end = p + RSTRING_LEN(fmt); blen = 0; - bsiz = 120; + /* Estimate initial buffer size to reduce reallocations: + * - format string length (for literal text) + * - base headroom (120 bytes) + * - per-specifier headroom (24 bytes each) + * - capped at 4096 to prevent over-allocation + */ + bsiz = (end - p) + 120; + for (const char *scan = p; scan < end; scan++) { + if (*scan == '%') bsiz += 24; + } + if (bsiz > 4096) bsiz = 4096; result = mrb_str_new_capa(mrb, bsiz); buf = RSTRING_PTR(result); memset(buf, 0, bsiz); + int ai = mrb_gc_arena_save(mrb); for (; p < end; p++) { const char *t; mrb_sym id = 0; int flags = FNONE; - for (t = p; t < end && *t != '%'; t++) ; - if (t + 1 == end) ++t; + for (t = p; t < end && *t != '%'; t++) + ; + if (t + 1 == end) { + /* % at the bottom */ + mrb_raise(mrb, E_ARGUMENT_ERROR, "incomplete format specifier; use %% (double %) instead"); + } PUSH(p, t - p); if (t >= end) goto sprint_exit; /* end of fmt string */ @@ -614,255 +427,255 @@ mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fm nextvalue = mrb_undef_value(); retry: - switch (*p) { - default: - mrb_raisef(mrb, E_ARGUMENT_ERROR, "malformed format string - %%%c", *p); - break; + if (p >= end) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed format string - unexpected end"); + } + { + fmt_spec_t spec = get_fmt_spec(*p); - case ' ': - CHECK_FOR_FLAGS(flags); - flags |= FSPACE; - p++; - goto retry; - - case '#': - CHECK_FOR_FLAGS(flags); - flags |= FSHARP; - p++; - goto retry; - - case '+': - CHECK_FOR_FLAGS(flags); - flags |= FPLUS; - p++; - goto retry; - - case '-': - CHECK_FOR_FLAGS(flags); - flags |= FMINUS; - p++; - goto retry; - - case '0': - CHECK_FOR_FLAGS(flags); - flags |= FZERO; - p++; - goto retry; - - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - GETNUM(n, width); - if (*p == '$') { - if (!mrb_undef_p(nextvalue)) { - mrb_raisef(mrb, E_ARGUMENT_ERROR, "value given twice - %i$", n); - } - nextvalue = GETPOSARG(n); + switch (spec.type) { + case FMT_INVALID: + mrb_raisef(mrb, E_ARGUMENT_ERROR, "malformed format string - %%%c", *p); + break; + + case FMT_FLAG: + CHECK_FOR_FLAGS(flags); + flags |= spec.subtype; p++; goto retry; - } - CHECK_FOR_WIDTH(flags); - width = n; - flags |= FWIDTH; - goto retry; - - case '<': - case '{': { - const char *start = p; - char term = (*p == '<') ? '>' : '}'; - for (; p < end && *p != term; ) - p++; - if (id) { - mrb_raisef(mrb, E_ARGUMENT_ERROR, "name%l after <%n>", - start, p - start + 1, id); - } - CHECKNAMEARG(start, p - start + 1); - get_hash(mrb, &hash, argc, argv); - id = mrb_intern_check(mrb, start + 1, p - start - 1); - if (id) { - nextvalue = mrb_hash_fetch(mrb, hash, mrb_symbol_value(id), mrb_undef_value()); - } - if (!id || mrb_undef_p(nextvalue)) { - mrb_raisef(mrb, E_KEY_ERROR, "key%l not found", start, p - start + 1); - } - if (term == '}') goto format_s; - p++; - goto retry; - } + case FMT_DIGIT: + GETNUM(n, width); + if (*p == '$') { + if (!mrb_undef_p(nextvalue)) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "value given twice - %i$", n); + } + nextvalue = GETPOSARG(n); + p++; + goto retry; + } + CHECK_FOR_WIDTH(flags); + width = n; + flags |= FWIDTH; + goto retry; - case '*': - CHECK_FOR_WIDTH(flags); - flags |= FWIDTH; - GETASTER(width); - if (width > INT16_MAX || INT16_MIN > width) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "width too big"); - } - if (width < 0) { - flags |= FMINUS; - width = -width; - } - p++; - goto retry; + case FMT_NAMED: { + const char *start = p; + char term = (spec.subtype == '<') ? '>' : '}'; - case '.': - if (flags & FPREC0) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "precision given twice"); + for (; p < end && *p != term; ) + p++; + if (id) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "name%l after <%n>", + start, p - start + 1, id); + } + CHECKNAMEARG(start, p - start + 1); + get_hash(mrb, &hash, argc, argv); + id = mrb_intern_check(mrb, start + 1, p - start - 1); + if (id) { + nextvalue = mrb_hash_fetch(mrb, hash, mrb_symbol_value(id), mrb_undef_value()); + } + if (!id || mrb_undef_p(nextvalue)) { + mrb_raisef(mrb, E_KEY_ERROR, "key%l not found", start, p - start + 1); + } + if (term == '}') goto format_s; + p++; + goto retry; } - flags |= FPREC|FPREC0; - p++; - if (*p == '*') { - GETASTER(prec); - if (prec < 0) { /* ignore negative precision */ - flags &= ~FPREC; + case FMT_WIDTH: + CHECK_FOR_WIDTH(flags); + flags |= FWIDTH; + GETASTER(width); + if (width > INT16_MAX || INT16_MIN > width) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "width too big"); + } + if (width < 0) { + flags |= FMINUS; + width = -width; } p++; goto retry; - } - GETNUM(prec, precision); - goto retry; - - case '\n': - case '\0': - p--; - /* fallthrough */ - case '%': - if (flags != FNONE) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format character - %"); - } - PUSH("%", 1); - break; - case 'c': { - mrb_value val = GETARG(); - mrb_value tmp; - char *c; + case FMT_PREC: + if (flags & FPREC0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "precision given twice"); + } + flags |= FPREC|FPREC0; - tmp = mrb_check_string_type(mrb, val); - if (!mrb_nil_p(tmp)) { - if (RSTRING_LEN(tmp) != 1) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "%c requires a character"); + p++; + if (*p == '*') { + GETASTER(prec); + if (prec < 0) { /* ignore negative precision */ + flags &= ~FPREC; + } + p++; + goto retry; } - } - else if (mrb_integer_p(val)) { - mrb_int n = mrb_integer(val); -#ifndef MRB_UTF8_STRING - char buf[1]; + GETNUM(prec, precision); + goto retry; + + case FMT_LITERAL: + if (spec.subtype == 0) { /* \n or \0 */ + p--; + } + if (flags != FNONE) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format character - %"); + } + PUSH("%", 1); + break; - buf[0] = (char)n&0xff; - tmp = mrb_str_new(mrb, buf, 1); + case FMT_CHAR: { + /* CHARACTER FORMATTING (%c) */ + mrb_value val = GETARG(); + const char *c; + char cbuf[4]; /* stack buffer for character bytes */ + int clen; + + if (mrb_integer_p(val)) { + /* Integer: encode directly to stack buffer (no allocation) */ + mrb_int code = mrb_integer(val); +#ifdef MRB_UTF8_STRING + clen = (int)mrb_utf8_to_buf(cbuf, (uint32_t)code); + if (clen == 0) clen = 1; /* invalid codepoint: write single byte */ #else - if (n < 0x80) { - char buf[1]; + cbuf[0] = (char)(code & 0xff); + clen = 1; +#endif + c = cbuf; + } + else { + /* String: validate and use directly */ + mrb_value tmp = mrb_check_string_type(mrb, val); + if (mrb_nil_p(tmp)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character"); + } + if (RSTRING_LEN(tmp) != 1) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "%c requires a character"); + } + c = RSTRING_PTR(tmp); + clen = (int)RSTRING_LEN(tmp); + } - buf[0] = (char)n; - tmp = mrb_str_new(mrb, buf, 1); + /* Format and output the character with width/alignment */ + n = clen; + if (!(flags & FWIDTH)) { + PUSH(c, n); + } + else if ((flags & FMINUS)) { + PUSH(c, n); + if (width>0) FILL(' ', width-1); } else { - tmp = mrb_funcall_id(mrb, val, MRB_SYM(chr), 0); - mrb_check_type(mrb, tmp, MRB_TT_STRING); + if (width>0) FILL(' ', width-1); + PUSH(c, n); } -#endif - } - else { - mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character"); } - c = RSTRING_PTR(tmp); - n = RSTRING_LEN(tmp); - if (!(flags & FWIDTH)) { - PUSH(c, n); - } - else if ((flags & FMINUS)) { - PUSH(c, n); - if (width>0) FILL(' ', width-1); - } - else { - if (width>0) FILL(' ', width-1); - PUSH(c, n); - } - } - break; + break; - case 's': - case 'p': + case FMT_STRING: format_s: - { - mrb_value arg = GETARG(); - mrb_int len; - mrb_int slen; - - if (*p == 'p') arg = mrb_inspect(mrb, arg); - str = mrb_obj_as_string(mrb, arg); - len = RSTRING_LEN(str); - if (RSTRING(result)->flags & MRB_STR_EMBED) { - mrb_int tmp_n = len; - RSTRING(result)->flags &= ~MRB_STR_EMBED_LEN_MASK; - RSTRING(result)->flags |= tmp_n << MRB_STR_EMBED_LEN_SHIFT; - } - else { - RSTRING(result)->as.heap.len = blen; - } - if (flags&(FPREC|FWIDTH)) { - slen = RSTRING_LEN(str); - if (slen < 0) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence"); + { + /* STRING FORMATTING (%s, %p) */ + mrb_value arg = GETARG(); + mrb_int len; + mrb_int slen; + + /* Convert to string (with inspect for %p) */ + if (spec.subtype == 1) arg = mrb_inspect(mrb, arg); /* 'p' format */ + str = mrb_obj_as_string(mrb, arg); + len = RSTRING_LEN(str); + + /* Update result string length for embedded strings */ + if (RSTRING(result)->flags & MRB_STR_EMBED) { + mrb_int tmp_n = len; + RSTRING(result)->flags &= ~MRB_STR_EMBED_LEN_MASK; + RSTRING(result)->flags |= tmp_n << MRB_STR_EMBED_LEN_SHIFT; } - if ((flags&FPREC) && (prec < slen)) { - char *p = RSTRING_PTR(str) + prec; - slen = prec; - len = (mrb_int)(p - RSTRING_PTR(str)); + else { + RSTRING(result)->as.heap.len = blen; } - /* need to adjust multi-byte string pos */ - if ((flags&FWIDTH) && (width > slen)) { - width -= (int)slen; - if (!(flags&FMINUS)) { - FILL(' ', width); + + /* Handle precision and width formatting */ + if (flags&(FPREC|FWIDTH)) { + slen = RSTRING_LEN(str); + if (slen < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence"); } - PUSH(RSTRING_PTR(str), len); - if (flags&FMINUS) { - FILL(' ', width); + if ((flags&FPREC) && (prec < slen)) { + char *p = RSTRING_PTR(str) + prec; + slen = prec; + len = (mrb_int)(p - RSTRING_PTR(str)); + } + /* Apply width formatting with padding */ + if ((flags&FWIDTH) && (width > slen)) { + width -= (int)slen; + if (!(flags&FMINUS)) { + FILL(' ', width); + } + PUSH(RSTRING_PTR(str), len); + if (flags&FMINUS) { + FILL(' ', width); + } + break; } - break; } + PUSH(RSTRING_PTR(str), len); + mrb_gc_arena_restore(mrb, ai); } - PUSH(RSTRING_PTR(str), len); - } - break; - - case 'd': - case 'i': - case 'o': - case 'x': - case 'X': - case 'b': - case 'B': - case 'u': { - mrb_value val = GETARG(); - char nbuf[69], *s; - const char *prefix = NULL; - int sign = 0, dots = 0; - char sc = 0; - mrb_int v = 0; - int base; - mrb_int len; - - if (flags & FSHARP) { - switch (*p) { - case 'o': prefix = "0"; break; - case 'x': prefix = "0x"; break; - case 'X': prefix = "0X"; break; - case 'b': prefix = "0b"; break; - case 'B': prefix = "0B"; break; - default: break; + break; + + case FMT_INTEGER: { + /* INTEGER FORMATTING (%d, %i, %o, %x, %X, %b, %B, %u) */ + mrb_value val = GETARG(); + char nbuf[69], *s; + const char *prefix = NULL; + int sign = 0, dots = 0; + char sc = 0; + char fc = 0; + mrb_int v = 0; + int base; + int len; + + /* Determine base and signedness from lookup table */ + base = spec.base; + if (spec.subtype == 1) { /* signed formats: d, i, u */ + sign = 1; } - } + /* Set prefix for alternative format (#) */ + if (flags & FSHARP) { + switch (base) { + case 8: prefix = "0"; break; + case 16: prefix = (spec.subtype == 1) ? "0X" : "0x"; break; + case 2: prefix = (spec.subtype == 1) ? "0B" : "0b"; break; + default: break; + } + } + + /* Convert value to integer and format as string */ bin_retry: - switch (mrb_type(val)) { + switch (mrb_type(val)) { #ifndef MRB_NO_FLOAT case MRB_TT_FLOAT: val = mrb_float_to_integer(mrb, val); goto bin_retry; +#endif +#ifdef MRB_USE_BIGINT + case MRB_TT_BIGINT: + { + mrb_int n = (mrb_bint_cmp(mrb, val, mrb_fixnum_value(0))); + mrb_bool need_dots = ((flags & FPLUS) == 0) && (base == 16 || base == 8 || base == 2) && n < 0; + if (need_dots) { + val = mrb_bint_2comp(mrb, val); + dots = 1; + v = -1; + } + mrb_value str = mrb_bint_to_s(mrb, val, base); + s = RSTRING_PTR(str); + len = (int)RSTRING_LEN(str); + } + goto str_skip; #endif case MRB_TT_STRING: val = mrb_str_to_integer(mrb, val, 0, TRUE); @@ -875,24 +688,6 @@ mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fm break; } - switch (*p) { - case 'o': - base = 8; break; - case 'x': - case 'X': - base = 16; break; - case 'b': - case 'B': - base = 2; break; - case 'u': - case 'd': - case 'i': - sign = 1; - /* fall through */ - default: - base = 10; break; - } - if (sign) { if (v >= 0) { if (flags & FPLUS) { @@ -918,21 +713,45 @@ mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fm dots = 1; } } + { - size_t size; - size = strlen(s); + size_t size = strlen(s); /* PARANOID: assert(size <= MRB_INT_MAX) */ - len = (mrb_int)size; + len = (int)size; + } + +#ifdef MRB_USE_BIGINT + str_skip: +#endif + switch (base) { + case 16: + fc = 'f'; break; + case 8: + fc = '7'; break; + case 2: + fc = '1'; break; } - if (*p == 'X') { - char *pp = s; - int c; - while ((c = (int)(unsigned char)*pp) != 0) { - *pp = toupper(c); - pp++; + if (dots) { + if (base == 8 && (*s == '1' || *s == '3')) { + s++; len--; + } + while (*s == fc) { + s++; len--; } } + /* Convert to uppercase for X, B formats */ + if (spec.subtype == 1) { /* uppercase formats: X, B */ + char *pp = s; + int c; + while ((c = (int)(unsigned char)*pp) != 0) { + *pp = toupper(c); + pp++; + } + if (base == 16) { + fc = 'F'; + } + } if (prefix && !prefix[1]) { /* octal */ if (dots) { @@ -951,11 +770,10 @@ mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fm } if (prefix) { - size_t size; - size = strlen(prefix); + size_t size = strlen(prefix); /* PARANOID: assert(size <= MRB_INT_MAX). * this check is absolutely paranoid. */ - width -= (mrb_int)size; + width -= (int)size; } if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) { @@ -985,6 +803,10 @@ mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fm prec -= 2; width -= 2; PUSH("..", 2); + if (*s != fc) { + FILL(fc, 1); + prec--; width--; + } } if (prec > len) { @@ -992,34 +814,31 @@ mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fm if ((flags & (FMINUS|FPREC)) != FMINUS) { char c = '0'; FILL(c, prec - len); - } else if (v < 0) { - char c = sign_bits(base, p); - FILL(c, prec - len); + } + else if (v < 0) { + FILL(fc, prec - len); } } - PUSH(s, len); - if (width > 0) { - FILL(' ', width); + PUSH(s, len); + if (width > 0) { + FILL(' ', width); + } } - } - break; + break; - case 'f': - case 'g': - case 'G': - case 'e': - case 'E': { + case FMT_FLOAT: { + /* FLOAT FORMATTING (%f, %g, %G, %e, %E) */ #ifdef MRB_NO_FLOAT - mrb_raisef(mrb, E_ARGUMENT_ERROR, "%%%c not supported with MRB_NO_FLOAT defined", *p); + mrb_raisef(mrb, E_ARGUMENT_ERROR, "%%%c not supported with MRB_NO_FLOAT defined", spec.subtype); #else - mrb_value val = GETARG(); + mrb_value val = GETARG(); double fval; mrb_int need = 6; fval = mrb_as_float(mrb, val); if (!isfinite(fval)) { const char *expr; - const mrb_int elen = 3; + const int elen = 3; char sign = '\0'; if (isnan(fval)) { @@ -1034,7 +853,7 @@ mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fm else if (flags & (FPLUS|FSPACE)) sign = (flags & FPLUS) ? '+' : ' '; if (sign) - ++need; + need++; if ((flags & FWIDTH) && need < width) need = width; @@ -1050,7 +869,7 @@ mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fm else { if (sign) buf[blen - elen - 1] = sign; - memcpy(&buf[blen - elen], expr, elen); + memcpy(&buf[blen - elen], expr, (size_t)elen); } break; } @@ -1070,46 +889,310 @@ mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fm need += (flags&FPREC) ? prec : 6; if ((flags&FWIDTH) && need < width) need = width; - if (need > MRB_INT_MAX - 20) { + if ((mrb_int)need > MRB_INT_MAX - 20) { goto too_big_width_prec; } need += 20; CHECK(need); - n = fmt_float(&buf[blen], need, *p, flags, width, prec, fval); + n = fmt_float(&buf[blen], need, spec.subtype, flags, width, prec, fval); if (n < 0 || n >= need) { mrb_raise(mrb, E_RUNTIME_ERROR, "formatting error"); } - blen += n; + blen += n; #endif + } + break; } - break; } - flags = FNONE; } sprint_exit: -#if 0 - /* XXX - We cannot validate the number of arguments if (digit)$ style used. - */ - if (posarg >= 0 && nextarg < argc) { - const char *mesg = "too many arguments for format string"; - if (mrb_test(ruby_debug)) mrb_raise(mrb, E_ARGUMENT_ERROR, mesg); - if (mrb_test(ruby_verbose)) mrb_warn(mrb, "%s", mesg); - } -#endif mrb_str_resize(mrb, result, blen); return result; } +/* + * call-seq: + * format(format_string [, arguments...] ) -> string + * sprintf(format_string [, arguments...] ) -> string + * + * Returns the string resulting from applying *format_string* to + * any additional arguments. Within the format string, any characters + * other than format sequences are copied to the result. + * + * The syntax of a format sequence is follows. + * + * %[flags][width][.precision]type + * + * A format + * sequence consists of a percent sign, followed by optional flags, + * width, and precision indicators, then terminated with a field type + * character. The field type controls how the corresponding + * `sprintf` argument is to be interpreted, while the flags + * modify that interpretation. + * + * The field type characters are: + * + * Field | Integer Format + * ------+-------------------------------------------------------------- + * b | Convert argument as a binary number. + * | Negative numbers will be displayed as a two's complement + * | prefixed with '..1'. + * B | Equivalent to 'b', but uses an uppercase 0B for prefix + * | in the alternative format by #. + * d | Convert argument as a decimal number. + * i | Identical to 'd'. + * o | Convert argument as an octal number. + * | Negative numbers will be displayed as a two's complement + * | prefixed with '..7'. + * u | Identical to 'd'. + * x | Convert argument as a hexadecimal number. + * | Negative numbers will be displayed as a two's complement + * | prefixed with '..f' (representing an infinite string of + * | leading 'ff's). + * X | Equivalent to 'x', but uses uppercase letters. + * + * Field | Float Format + * ------+-------------------------------------------------------------- + * e | Convert floating-point argument into exponential notation + * | with one digit before the decimal point as [-]d.dddddde[+-]dd. + * | The precision specifies the number of digits after the decimal + * | point (defaulting to six). + * E | Equivalent to 'e', but uses an uppercase E to indicate + * | the exponent. + * f | Convert floating-point argument as [-]ddd.dddddd, + * | where the precision specifies the number of digits after + * | the decimal point. + * g | Convert a floating-point number using exponential form + * | if the exponent is less than -4 or greater than or + * | equal to the precision, or in dd.dddd form otherwise. + * | The precision specifies the number of significant digits. + * G | Equivalent to 'g', but use an uppercase 'E' in exponent form. + * + * Field | Other Format + * ------+-------------------------------------------------------------- + * c | Argument is the numeric code for a single character or + * | a single character string itself. + * p | The valuing of argument.inspect. + * s | Argument is a string to be substituted. If the format + * | sequence contains a precision, at most that many characters + * | will be copied. + * % | A percent sign itself will be displayed. No argument taken. + * + * The flags modifies the behavior of the formats. + * The flag characters are: + * + * Flag | Applies to | Meaning + * ---------+---------------+----------------------------------------- + * space | bBdiouxX | Leave a space at the start of + * | aAeEfgG | non-negative numbers. + * | (numeric fmt) | For 'o', 'x', 'X', 'b' and 'B', use + * | | a minus sign with absolute value for + * | | negative values. + * ---------+---------------+----------------------------------------- + * (digit)$ | all | Specifies the absolute argument number + * | | for this field. Absolute and relative + * | | argument numbers cannot be mixed in a + * | | sprintf string. + * ---------+---------------+----------------------------------------- + * # | bBoxX | Use an alternative format. + * | aAeEfgG | For the conversions 'o', increase the precision + * | | until the first digit will be '0' if + * | | it is not formatted as complements. + * | | For the conversions 'x', 'X', 'b' and 'B' + * | | on non-zero, prefix the result with "0x", + * | | "0X", "0b" and "0B", respectively. + * | | For 'e', 'E', 'f', 'g', and 'G', + * | | force a decimal point to be added, + * | | even if no digits follow. + * | | For 'g' and 'G', do not remove trailing zeros. + * ---------+---------------+----------------------------------------- + * + | bBdiouxX | Add a leading plus sign to non-negative + * | aAeEfgG | numbers. + * | (numeric fmt) | For 'o', 'x', 'X', 'b' and 'B', use + * | | a minus sign with absolute value for + * | | negative values. + * ---------+---------------+----------------------------------------- + * - | all | Left-justify the result of this conversion. + * ---------+---------------+----------------------------------------- + * 0 (zero) | bBdiouxX | Pad with zeros, not spaces. + * | aAeEfgG | For 'o', 'x', 'X', 'b' and 'B', radix-1 + * | (numeric fmt) | is used for negative numbers formatted as + * | | complements. + * ---------+---------------+----------------------------------------- + * * | all | Use the next argument as the field width. + * | | If negative, left-justify the result. If the + * | | asterisk is followed by a number and a dollar + * | | sign, use the indicated argument as the width. + * + * Examples of flags: + * + * # '+' and space flag specifies the sign of non-negative numbers. + * sprintf("%d", 123) #=> "123" + * sprintf("%+d", 123) #=> "+123" + * sprintf("% d", 123) #=> " 123" + * + * # '#' flag for 'o' increases number of digits to show '0'. + * # '+' and space flag changes format of negative numbers. + * sprintf("%o", 123) #=> "173" + * sprintf("%#o", 123) #=> "0173" + * sprintf("%+o", -123) #=> "-173" + * sprintf("%o", -123) #=> "..7605" + * sprintf("%#o", -123) #=> "..7605" + * + * # '#' flag for 'x' add a prefix '0x' for non-zero numbers. + * # '+' and space flag disables complements for negative numbers. + * sprintf("%x", 123) #=> "7b" + * sprintf("%#x", 123) #=> "0x7b" + * sprintf("%+x", -123) #=> "-7b" + * sprintf("%x", -123) #=> "..f85" + * sprintf("%#x", -123) #=> "0x..f85" + * sprintf("%#x", 0) #=> "0" + * + * # '#' for 'X' uses the prefix '0X'. + * sprintf("%X", 123) #=> "7B" + * sprintf("%#X", 123) #=> "0X7B" + * + * # '#' flag for 'b' add a prefix '0b' for non-zero numbers. + * # '+' and space flag disables complements for negative numbers. + * sprintf("%b", 123) #=> "1111011" + * sprintf("%#b", 123) #=> "0b1111011" + * sprintf("%+b", -123) #=> "-1111011" + * sprintf("%b", -123) #=> "..10000101" + * sprintf("%#b", -123) #=> "0b..10000101" + * sprintf("%#b", 0) #=> "0" + * + * # '#' for 'B' uses the prefix '0B'. + * sprintf("%B", 123) #=> "1111011" + * sprintf("%#B", 123) #=> "0B1111011" + * + * # '#' for 'e' forces to show the decimal point. + * sprintf("%.0e", 1) #=> "1e+00" + * sprintf("%#.0e", 1) #=> "1.e+00" + * + * # '#' for 'f' forces to show the decimal point. + * sprintf("%.0f", 1234) #=> "1234" + * sprintf("%#.0f", 1234) #=> "1234." + * + * # '#' for 'g' forces to show the decimal point. + * # It also disables stripping lowest zeros. + * sprintf("%g", 123.4) #=> "123.4" + * sprintf("%#g", 123.4) #=> "123.400" + * sprintf("%g", 123456) #=> "123456" + * sprintf("%#g", 123456) #=> "123456." + * + * The field width is an optional integer, followed optionally by a + * period and a precision. The width specifies the minimum number of + * characters that will be written to the result for this field. + * + * Examples of width: + * + * # padding is done by spaces, width=20 + * # 0 or radix-1. <------------------> + * sprintf("%20d", 123) #=> " 123" + * sprintf("%+20d", 123) #=> " +123" + * sprintf("%020d", 123) #=> "00000000000000000123" + * sprintf("%+020d", 123) #=> "+0000000000000000123" + * sprintf("% 020d", 123) #=> " 0000000000000000123" + * sprintf("%-20d", 123) #=> "123 " + * sprintf("%-+20d", 123) #=> "+123 " + * sprintf("%- 20d", 123) #=> " 123 " + * sprintf("%020x", -123) #=> "..ffffffffffffffff85" + * + * For + * numeric fields, the precision controls the number of decimal places + * displayed. For string fields, the precision determines the maximum + * number of characters to be copied from the string. (Thus, the format + * sequence `%10.10s` will always contribute exactly ten + * characters to the result.) + * + * Examples of precisions: + * + * # precision for 'd', 'o', 'x' and 'b' is + * # minimum number of digits <------> + * sprintf("%20.8d", 123) #=> " 00000123" + * sprintf("%20.8o", 123) #=> " 00000173" + * sprintf("%20.8x", 123) #=> " 0000007b" + * sprintf("%20.8b", 123) #=> " 01111011" + * sprintf("%20.8d", -123) #=> " -00000123" + * sprintf("%20.8o", -123) #=> " ..777605" + * sprintf("%20.8x", -123) #=> " ..ffff85" + * sprintf("%20.8b", -11) #=> " ..110101" + * + * # "0x" and "0b" for '#x' and '#b' is not counted for + * # precision but "0" for '#o' is counted. <------> + * sprintf("%#20.8d", 123) #=> " 00000123" + * sprintf("%#20.8o", 123) #=> " 00000173" + * sprintf("%#20.8x", 123) #=> " 0x0000007b" + * sprintf("%#20.8b", 123) #=> " 0b01111011" + * sprintf("%#20.8d", -123) #=> " -00000123" + * sprintf("%#20.8o", -123) #=> " ..777605" + * sprintf("%#20.8x", -123) #=> " 0x..ffff85" + * sprintf("%#20.8b", -11) #=> " 0b..110101" + * + * # precision for 'e' is number of + * # digits after the decimal point <------> + * sprintf("%20.8e", 1234.56789) #=> " 1.23456789e+03" + * + * # precision for 'f' is number of + * # digits after the decimal point <------> + * sprintf("%20.8f", 1234.56789) #=> " 1234.56789000" + * + * # precision for 'g' is number of + * # significant digits <-------> + * sprintf("%20.8g", 1234.56789) #=> " 1234.5679" + * + * # <-------> + * sprintf("%20.8g", 123456789) #=> " 1.2345679e+08" + * + * # precision for 's' is + * # maximum number of characters <------> + * sprintf("%20.8s", "string test") #=> " string t" + * + * Examples: + * + * sprintf("%d %04x", 123, 123) #=> "123 007b" + * sprintf("%08b '%4s'", 123, 123) #=> "01111011 ' 123'" + * sprintf("%1$*2$s %2$d %1$s", "hello", 8) #=> " hello 8 hello" + * sprintf("%1$*2$s %2$d", "hello", -8) #=> "hello -8" + * sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23) #=> "+1.23: 1.23:1.23" + * sprintf("%u", -123) #=> "-123" + * + * For more complex formatting, Ruby supports a reference by name. + * %s style uses format style, but %{name} style doesn't. + * + * Examples: + * sprintf("%d : %f", { :foo => 1, :bar => 2 }) + * #=> 1 : 2.000000 + * sprintf("%{foo}f", { :foo => 1 }) + * # => "1f" + */ + +static mrb_value +mrb_f_sprintf(mrb_state *mrb, mrb_value obj) +{ + mrb_int argc; + const mrb_value *argv; + + mrb_get_args(mrb, "*", &argv, &argc); + + if (argc <= 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "too few arguments"); + return mrb_nil_value(); + } + else { + return mrb_str_format(mrb, argc - 1, argv + 1, argv[0]); + } +} void mrb_mruby_sprintf_gem_init(mrb_state *mrb) { struct RClass *krn = mrb->kernel_module; - mrb_define_module_function(mrb, krn, "sprintf", mrb_f_sprintf, MRB_ARGS_ANY()); - mrb_define_module_function(mrb, krn, "format", mrb_f_sprintf, MRB_ARGS_ANY()); + mrb_define_module_function_id(mrb, krn, MRB_SYM(sprintf), mrb_f_sprintf, MRB_ARGS_ANY()); + mrb_define_module_function_id(mrb, krn, MRB_SYM(format), mrb_f_sprintf, MRB_ARGS_ANY()); } void diff --git a/mrbgems/mruby-sprintf/test/sprintf.rb b/mrbgems/mruby-sprintf/test/sprintf.rb index aedc2787aa..c513188a14 100644 --- a/mrbgems/mruby-sprintf/test/sprintf.rb +++ b/mrbgems/mruby-sprintf/test/sprintf.rb @@ -91,7 +91,28 @@ end end -assert("String#% invalid format shared substring") do - fmt = ("x"*30+"%!")[0...-1] - assert_equal fmt, sprintf(fmt, "") +assert("sprintf %g with high precision") do + # Regression test: precision values larger than double's significand + # used to cause out-of-bounds reads in fp_uscale's fixed_width(). + assert_equal "7", "%.*g" % [51, 7] + assert_equal "7.5", "%.*g" % [51, 7.5] + assert_equal "7", "%.51g" % 7.0 + assert_equal "7." + "0" * 50, "%#.51g" % 7.0 + assert_equal "7", "%.*g" % [1000, 7.0] +end + +assert("sprintf with to_s mutating format string") do + # The to_s callback must not be able to invalidate sprintf's internal + # iteration pointers by mutating the format string. + fmt = "%s" + "B" * 200 + mutator = Object.new + $sprintf_test_fmt = fmt + def mutator.to_s + $sprintf_test_fmt.replace("Z") + "ok" + end + result = sprintf(fmt, mutator) + assert_equal 202, result.length + assert_equal "ok", result[0, 2] + assert_equal "B" * 200, result[2..] end diff --git a/mrbgems/mruby-strftime/README.md b/mrbgems/mruby-strftime/README.md new file mode 100644 index 0000000000..78e6c55303 --- /dev/null +++ b/mrbgems/mruby-strftime/README.md @@ -0,0 +1,140 @@ +# mruby-strftime + +`Time#strftime` implementation for mruby. + +## Overview + +This gem provides the `strftime` method for `Time` objects in mruby, enabling +formatted time string output using standard format directives. + +## Usage + +```ruby +require 'mruby-strftime' + +t = Time.new(2023, 12, 25, 10, 30, 45) + +t.strftime("%Y-%m-%d") #=> "2023-12-25" +t.strftime("%H:%M:%S") #=> "10:30:45" +t.strftime("%Y-%m-%d %H:%M:%S") #=> "2023-12-25 10:30:45" +t.strftime("%A, %B %d, %Y") #=> "Monday, December 25, 2023" +``` + +## Format Specifiers + +The `strftime` method supports standard format directives. The exact set of +available directives depends on your system's `strftime(3)` implementation. +Common directives include: + +### Date Components + +- `%Y` - Year with century (e.g., 2023) +- `%y` - Year without century (00-99) +- `%m` - Month of the year (01-12) +- `%B` - Full month name (e.g., "December") +- `%b` - Abbreviated month name (e.g., "Dec") +- `%d` - Day of the month (01-31) +- `%j` - Day of the year (001-366) + +### Time Components + +- `%H` - Hour of the day, 24-hour clock (00-23) +- `%I` - Hour of the day, 12-hour clock (01-12) +- `%M` - Minute of the hour (00-59) +- `%S` - Second of the minute (00-60) +- `%p` - AM/PM indicator +- `%Z` - Timezone name or abbreviation + +### Weekday + +- `%A` - Full weekday name (e.g., "Monday") +- `%a` - Abbreviated weekday name (e.g., "Mon") +- `%w` - Day of the week (0-6, Sunday is 0) +- `%u` - Day of the week (1-7, Monday is 1) + +### Combined Formats + +- `%c` - Preferred date and time representation +- `%x` - Preferred date representation +- `%X` - Preferred time representation +- `%F` - ISO 8601 date format (equivalent to `%Y-%m-%d`) +- `%T` - ISO 8601 time format (equivalent to `%H:%M:%S`) + +### Special Characters + +- `%%` - Literal `%` character +- `%n` - Newline character +- `%t` - Tab character + +## Features + +### Timezone Support + +The method respects the timezone of the `Time` object: + +```ruby +t_utc = Time.utc(2023, 12, 25, 12, 0, 0) +t_local = Time.local(2023, 12, 25, 12, 0, 0) + +t_utc.strftime("%Y-%m-%d %H:%M:%S %Z") +#=> "2023-12-25 12:00:00 UTC" + +t_local.strftime("%Y-%m-%d %H:%M:%S %z") +#=> "2023-12-25 12:00:00 +0900" (example for JST) +``` + +### NUL Byte Handling + +Unlike some implementations, mruby-strftime correctly handles NUL bytes (`\0`) +embedded in format strings, preserving them in the output: + +```ruby +t = Time.gm(2023, 12, 25) +result = t.strftime("year\0%Y") +result.length #=> 9 (includes the NUL byte) +``` + +This behavior maintains Ruby's string semantics where strings are length-based +rather than null-terminated. + +## Implementation Details + +### Buffer Management + +The implementation uses dynamic buffer allocation: + +- Initial buffer size: 64 bytes +- Maximum buffer size: 4096 bytes +- Automatically grows when needed for longer formatted strings + +### Platform Compatibility + +This gem uses the system's `strftime(3)` function for formatting. The exact +behavior of format directives may vary slightly between platforms, particularly +for: + +- Locale-dependent formats (day/month names, date/time preferences) +- Timezone representations +- Platform-specific extensions + +## Dependencies + +- `mruby-time` - Required for `Time` class support + +## Installation + +Add to your `build_config.rb`: + +```ruby +conf.gem :core => 'mruby-strftime' +``` + +Or include via gembox: + +```ruby +conf.gembox 'stdlib-ext' # Includes mruby-strftime +``` + +## License + +MIT License - See mruby's main license file for details. diff --git a/mrbgems/mruby-strftime/mrbgem.rake b/mrbgems/mruby-strftime/mrbgem.rake new file mode 100644 index 0000000000..89d37304bb --- /dev/null +++ b/mrbgems/mruby-strftime/mrbgem.rake @@ -0,0 +1,7 @@ +MRuby::Gem::Specification.new('mruby-strftime') do |spec| + spec.license = 'MIT' + spec.author = 'mruby developers' + spec.summary = 'Time#strftime implementation' + + spec.add_dependency 'mruby-time' +end diff --git a/mrbgems/mruby-strftime/src/strftime.c b/mrbgems/mruby-strftime/src/strftime.c new file mode 100644 index 0000000000..94d659a1fa --- /dev/null +++ b/mrbgems/mruby-strftime/src/strftime.c @@ -0,0 +1,118 @@ +/* +** strftime.c - Time#strftime +** +** See Copyright Notice in mruby.h +*/ + +#include +#include +#include +#include +#include +#include + +#define MAX_BUFFER_SIZE 4096 + +/* + * call-seq: + * time.strftime(format) -> string + * + * Formats time according to the directives in the given format string. + * + * The format string may contain NUL bytes, which will be preserved in + * the output string. + * + * Common format directives: + * %Y - Year with century (e.g., 2023) + * %m - Month of the year (01-12) + * %d - Day of the month (01-31) + * %H - Hour of the day, 24-hour clock (00-23) + * %M - Minute of the hour (00-59) + * %S - Second of the minute (00-60) + * %% - Literal % character + * + * See your system's strftime(3) documentation for a complete list. + * + * t = Time.new(2023, 12, 25, 10, 30, 45) + * t.strftime("%Y-%m-%d %H:%M:%S") #=> "2023-12-25 10:30:45" + * t.strftime("%A, %B %d, %Y") #=> "Monday, December 25, 2023" + */ +static mrb_value +mrb_time_strftime(mrb_state *mrb, mrb_value self) +{ + const char *format; + mrb_int format_len; + + mrb_get_args(mrb, "s", &format, &format_len); + struct tm *tm = mrb_time_get_tm(mrb, self); + + mrb_value result = mrb_str_new(mrb, NULL, 0); + const char *fmt_ptr = format; + mrb_int remaining = format_len; + + /* Process format string in segments, handling NUL bytes */ + while (remaining > 0) { + const char *nul_pos = (const char *)memchr(fmt_ptr, '\0', (size_t)remaining); + mrb_int segment_len = nul_pos ? (nul_pos - fmt_ptr) : remaining; + + /* Process this segment (up to NUL or end of string) */ + if (segment_len > 0) { + char *segment; + char *buf; + size_t n; + + /* Create null-terminated copy of this segment */ + /* Use mrb_temp_alloc for exception safety - GC will clean up on exception */ + segment = (char *)mrb_temp_alloc(mrb, (size_t)segment_len + 1); + memcpy(segment, fmt_ptr, (size_t)segment_len); + segment[segment_len] = '\0'; + +#ifdef _MSC_VER + /* Check for GNU extension %-flag which crashes on MSVC */ + /* Scan for %- patterns in the format string */ + for (const char *p = segment; *p != '\0'; p++) { + if (p[0] == '%' && p[1] == '-') { + mrb_raisef(mrb, E_ARGUMENT_ERROR, + "strftime format flag '%-' not supported on this platform (use '%%#' on Windows)"); + } + } +#endif + + /* Allocate buffer for formatted output */ + /* Use mrb_temp_alloc with max size for exception safety */ + buf = (char *)mrb_temp_alloc(mrb, MAX_BUFFER_SIZE); + + /* Try formatting with max buffer size */ + n = strftime(buf, MAX_BUFFER_SIZE, segment, tm); + + /* Append formatted output to result */ + mrb_str_cat(mrb, result, buf, n); + } + + /* If there was a NUL, append it to result and advance past it */ + if (nul_pos) { + mrb_str_cat(mrb, result, "\0", 1); + fmt_ptr = nul_pos + 1; + remaining -= segment_len + 1; + } + else { + break; + } + } + + return result; +} + +void +mrb_mruby_strftime_gem_init(mrb_state *mrb) +{ + struct RClass *time_class; + + time_class = mrb_class_get_id(mrb, MRB_SYM(Time)); + mrb_define_method_id(mrb, time_class, MRB_SYM(strftime), mrb_time_strftime, MRB_ARGS_REQ(1)); +} + +void +mrb_mruby_strftime_gem_final(mrb_state *mrb) +{ +} diff --git a/mrbgems/mruby-strftime/test/strftime.rb b/mrbgems/mruby-strftime/test/strftime.rb new file mode 100644 index 0000000000..6dade4ed90 --- /dev/null +++ b/mrbgems/mruby-strftime/test/strftime.rb @@ -0,0 +1,152 @@ +assert('Time#strftime') do + t = Time.now + assert_true t.respond_to?(:strftime) +end + +assert('Time#strftime with basic formats') do + t = Time.gm(2023, 12, 25, 10, 30, 45) + + assert_equal '2023', t.strftime('%Y') + assert_equal '23', t.strftime('%y') + assert_equal '12', t.strftime('%m') + assert_equal '25', t.strftime('%d') + assert_equal '10', t.strftime('%H') + assert_equal '30', t.strftime('%M') + assert_equal '45', t.strftime('%S') +end + +assert('Time#strftime with combined formats') do + t = Time.gm(2023, 12, 25, 10, 30, 45) + + assert_equal '2023-12-25', t.strftime('%Y-%m-%d') + assert_equal '10:30:45', t.strftime('%H:%M:%S') + assert_equal '2023-12-25 10:30:45', t.strftime('%Y-%m-%d %H:%M:%S') +end + +assert('Time#strftime with weekday formats') do + # 2023-12-25 is Monday + t = Time.gm(2023, 12, 25) + + result = t.strftime('%A') + assert_true result.include?('Mon') || result == 'Monday' + + result = t.strftime('%a') + assert_true result.length >= 2 + + assert_equal '1', t.strftime('%w') # Monday is day 1 +end + +assert('Time#strftime with month formats') do + t = Time.gm(2023, 12, 25) + + result = t.strftime('%B') + assert_true result.include?('Dec') || result == 'December' + + result = t.strftime('%b') + assert_true result.length >= 2 +end + +assert('Time#strftime with literal percent') do + t = Time.gm(2023, 12, 25) + + assert_equal '%', t.strftime('%%') + assert_equal '100%', t.strftime('100%%') + assert_equal '2023%12', t.strftime('%Y%%%m') +end + +assert('Time#strftime with empty format') do + t = Time.now + + assert_equal '', t.strftime('') +end + +assert('Time#strftime with no format specifiers') do + t = Time.now + + assert_equal 'hello', t.strftime('hello') + assert_equal 'test123', t.strftime('test123') +end + +assert('Time#strftime with NUL byte') do + t = Time.gm(2023, 12, 25) + + result = t.strftime("foo\0bar") + assert_equal 7, result.length + assert_equal 'f', result[0] + assert_equal 'o', result[1] + assert_equal 'o', result[2] + assert_equal "\0", result[3] + assert_equal 'b', result[4] + assert_equal 'a', result[5] + assert_equal 'r', result[6] +end + +assert('Time#strftime with NUL and format specifiers') do + t = Time.gm(2023, 12, 25) + + result = t.strftime("year\0%Y") + assert_true result.length >= 9 # "year\0" (5) + "2023" (4) + assert_true result.include?("\0") + # Check last 4 characters are "2023" + assert_equal '2023', result[-4, 4] +end + +assert('Time#strftime with multiple NULs') do + t = Time.now + + result = t.strftime("\0\0") + assert_equal 2, result.length + assert_equal "\0\0", result +end + +assert('Time#strftime with NUL at beginning') do + t = Time.gm(2023, 1, 1) + + result = t.strftime("\0%Y") + assert_equal 5, result.length + assert_equal "\0", result[0] + assert_equal '2023', result[1, 4] +end + +assert('Time#strftime with NUL at end') do + t = Time.gm(2023, 1, 1) + + result = t.strftime("%Y\0") + assert_equal 5, result.length + assert_equal '2023', result[0, 4] + assert_equal "\0", result[4] +end + +assert('Time#strftime preserves timezone') do + t_utc = Time.utc(2023, 1, 1, 12, 0, 0) + t_local = Time.local(2023, 1, 1, 12, 0, 0) + + # Both should format their time correctly + assert_equal '12', t_utc.strftime('%H') + assert_equal '12', t_local.strftime('%H') +end + +assert('Time#strftime with various time components') do + t = Time.gm(2023, 6, 15, 14, 23, 7) + + # Use portable format specifiers that work on all platforms + assert_equal '06', t.strftime('%m') # Month with leading zero (portable) + assert_equal '15', t.strftime('%d') + assert_equal '14', t.strftime('%H') + assert_equal '23', t.strftime('%M') + assert_equal '07', t.strftime('%S') +end + +assert('Time#strftime argument type error') do + t = Time.now + + assert_raise(TypeError) { t.strftime(123) } + assert_raise(TypeError) { t.strftime(nil) } +end + +assert('Time#strftime argument count error') do + t = Time.now + + assert_raise(ArgumentError) { t.strftime } + assert_raise(ArgumentError) { t.strftime('%Y', '%m') } +end diff --git a/mrbgems/mruby-string-ext/README.md b/mrbgems/mruby-string-ext/README.md new file mode 100644 index 0000000000..9b8b8e5ae9 --- /dev/null +++ b/mrbgems/mruby-string-ext/README.md @@ -0,0 +1,886 @@ +# mruby-string-ext + +This mrbgem adds extended string functionalities to mruby. + +## Methods + +### `String#clear` + +Makes string empty. + +```ruby +string.clear +``` + +Example: + +```ruby +a = "abcde" +a.clear #=> "" +``` + +### `String#lstrip` + +Returns a copy of the string with leading whitespace removed. + +```ruby +str.lstrip +``` + +Example: + +```ruby +" hello ".lstrip #=> "hello " +``` + +### `String#lstrip!` + +Removes leading whitespace from the string, returning `nil` if no change was made. Modifies the receiver in place. + +```ruby +str.lstrip! +``` + +Example: + +```ruby +a = " hello " +a.lstrip! #=> "hello " +b = "hello" +b.lstrip! #=> nil +``` + +### `String#rstrip` + +Returns a copy of the string with trailing whitespace removed. + +```ruby +str.rstrip +``` + +Example: + +```ruby +" hello ".rstrip #=> " hello" +``` + +### `String#rstrip!` + +Removes trailing whitespace from the string, returning `nil` if no change was made. Modifies the receiver in place. + +```ruby +str.rstrip! +``` + +Example: + +```ruby +a = " hello " +a.rstrip! #=> " hello" +b = "hello" +b.rstrip! #=> nil +``` + +### `String#strip` + +Returns a copy of the string with leading and trailing whitespace removed. + +```ruby +str.strip +``` + +Example: + +```ruby +" hello ".strip #=> "hello" +"\tgoodbye\r\n".strip #=> "goodbye" +``` + +### `String#strip!` + +Removes leading and trailing whitespace from the string. Returns `nil` if the string was not altered. Modifies the receiver in place. + +```ruby +str.strip! +``` + +Example: + +```ruby +a = " hello " +a.strip! #=> "hello" +b = "goodbye" +b.strip! #=> nil +``` + +### `String#partition` + +Searches for the first occurrence of the separator `sep` and returns a three-element array: the part before the separator, the separator itself, and the part after the separator. If the separator is not found, returns the original string and two empty strings. + +```ruby +string.partition(sep) +``` + +Example: + +```ruby +"hello".partition("l") #=> ["he", "l", "lo"] +"hello".partition("x") #=> ["hello", "", ""] +``` + +### `String#rpartition` + +Searches for the last occurrence of the separator `sep` and returns a three-element array: the part before the separator, the separator itself, and the part after the separator. If the separator is not found, returns two empty strings and the original string. + +```ruby +string.rpartition(sep) +``` + +Example: + +```ruby +"hello ello".rpartition("l") #=> ["hello el", "l", "o"] +"hello".rpartition("x") #=> ["", "", "hello"] +``` + +### `String#slice!` + +Deletes the specified portion from the string, and returns the portion deleted. Modifies the receiver in place. + +```ruby +str.slice!(fixnum) +str.slice!(fixnum, fixnum) +str.slice!(range) +str.slice!(other_str) +``` + +Example: + +```ruby +string = "hello world" +string.slice!(4) #=> "o" +# string is now "hell world" +string.slice!(2..3) #=> "ll" +# string is now "he world" +string.slice!("l") #=> "l" +# string is now "he word" +string.slice!("nomatch") #=> nil +``` + +### `String#insert` + +Inserts `other_str` before the character at the given `index`, modifying `str`. Negative indices count from the end of the string. + +```ruby +str.insert(index, other_str) +``` + +Example: + +```ruby +"abcd".insert(0, 'X') #=> "Xabcd" +"abcd".insert(3, 'X') #=> "abcXd" +"abcd".insert(4, 'X') #=> "abcdX" +"abcd".insert(-3, 'X') #=> "abXcd" +"abcd".insert(-1, 'X') #=> "abcdX" +``` + +### `String#ljust` + +If `integer` is greater than the length of `str`, returns a new string of length `integer` with `str` left justified and padded with `padstr`; otherwise, returns `str`. + +```ruby +str.ljust(integer, padstr=' ') +``` + +Example: + +```ruby +"hello".ljust(4) #=> "hello" +"hello".ljust(20) #=> "hello " +"hello".ljust(20, '1234') #=> "hello123412341234123" +``` + +### `String#rjust` + +If `integer` is greater than the length of `str`, returns a new string of length `integer` with `str` right justified and padded with `padstr`; otherwise, returns `str`. + +```ruby +str.rjust(integer, padstr=' ') +``` + +Example: + +```ruby +"hello".rjust(4) #=> "hello" +"hello".rjust(20) #=> " hello" +"hello".rjust(20, '1234') #=> "123412341234123hello" +``` + +### `String#center` + +Centers `str` in `width`. If `width` is greater than the length of `str`, returns a new String of length `width` with `str` centered and padded with `padstr`; otherwise, returns `str`. + +```ruby +str.center(width, padstr=' ') +``` + +Example: + +```ruby +"hello".center(4) #=> "hello" +"hello".center(20) #=> " hello " +"hello".center(20, '123') #=> "1231231hello12312312" +``` + +### `String#chars` + +If a block is given, calls the block for each character. Otherwise, returns an array of characters in the string. + +```ruby +str.chars #=> array +str.chars {|char| block } #=> str +``` + +Example: + +```ruby +"hello".chars #=> ["h", "e", "l", "l", "o"] +``` + +### `String#each_char` + +Calls the given block for each character of the string. If no block is given, returns an enumerator. + +```ruby +str.each_char {|char| block } #=> str +str.each_char #=> an_enumerator +``` + +Example: + +```ruby +s = "" +"hello".each_char {|c| s << c << '*' } # s is now "h*e*l*l*o*" +``` + +### `String#codepoints` (alias `each_codepoint`) + +If a block is given, calls the block with the Integer ordinal of each character in the string. If no block is given, returns an array of these ordinals. + +```ruby +str.codepoints #=> array +str.codepoints {|codepoint| block } #=> str +``` + +Example: + +```ruby +"h\u00E9llo".codepoints #=> [104, 233, 108, 108, 111] (if UTF-8 aware) +"hello".codepoints #=> [104, 101, 108, 108, 111] +``` + +### `String#prepend` + +Prepends the given string(s) to `str`. Modifies `str` in place. + +```ruby +str.prepend(other_str, ...) +``` + +Example: + +```ruby +a = "world" +a.prepend("hello ") #=> "hello world" +a #=> "hello world" +a.prepend("Greeting: ", "Bob! ") #=> "Greeting: Bob! hello world" +``` + +### `String#lines` + +Returns an array of strings, where each string is a line from the original string. Lines are separated by `\n`. If a block is given, it works the same as `each_line`. + +```ruby +string.lines #=> array +string.lines {|s| block} #=> string +``` + +Example: + +```ruby +a = "abc\ndef" +a.lines #=> ["abc\n", "def"] +"hello\nworld".lines {|line| puts line } # prints "hello\n" then "world" +``` + +### `String#upto` + +Iterates through successive values, starting at `str` and ending at `other_str` inclusive (unless `exclusive` is true). The `String#succ` method is used to generate each value. + +```ruby +str.upto(other_str, exclusive=false) {|s| block } #=> str +str.upto(other_str, exclusive=false) #=> an_enumerator +``` + +Example: + +```ruby +"a8".upto("b1") {|s| print s, ' ' } #=> prints: a8 a9 b0 b1 +"9".upto("11").to_a #=> ["9", "10", "11"] +"07".upto("11").to_a #=> ["07", "08", "09", "10", "11"] +"a".upto("c", true).to_a #=> ["a", "b"] +``` + +### `String#swapcase` + +Returns a copy of `str` with uppercase alphabetic characters converted to lowercase and lowercase characters converted to uppercase. Effective only in ASCII region. + +```ruby +str.swapcase #=> new_str +``` + +Example: + +```ruby +"Hello".swapcase #=> "hELLO" +"cYbEr_PuNk11".swapcase #=> "CyBeR_pUnK11" +``` + +### `String#swapcase!` + +Equivalent to `String#swapcase`, but modifies the receiver in place. Returns `str`, or `nil` if no changes were made. + +```ruby +str.swapcase! #=> str or nil +``` + +Example: + +```ruby +a = "Hello" +a.swapcase! #=> "hELLO" +a #=> "hELLO" +b = "123" +b.swapcase! #=> nil +``` + +### `String#concat` (alias `String#<<`) + +Appends the argument(s) to the string. If an argument is an Integer, it's considered a codepoint and converted to a character. Modifies the string in place. + +```ruby +str.concat(other_str, ...) #=> str +str << obj #=> str +``` + +Example: + +```ruby +s = 'foo' +s.concat('bar', 'baz') # => "foobarbaz" +s # => "foobarbaz" +s = 'foo' +s << 'bar' << 32 << 'baz' # => "foobar baz" (32 is space) +``` + +### `String#append_as_bytes` + +Works like `concat` but considers arguments as binary strings. Integer arguments are treated as byte values (0-255) and converted to characters. + +```ruby +str.append_as_bytes(*obj) #=> str +``` + +Example: + +```ruby +s = "test" +s.append_as_bytes(32, "bytes", 33) #=> "test bytes!" (32 is space, 33 is !) +s.append_as_bytes(256) #=> RangeError (byte 256 out of range) +``` + +### `String#start_with?` + +Returns true if `str` starts with one of the `prefixes` given. + +```ruby +str.start_with?([prefixes]+) #=> true or false +``` + +Example: + +```ruby +"hello".start_with?("hell") #=> true +"hello".start_with?("heaven", "hell") #=> true +"hello".start_with?("heaven", "paradise") #=> false +``` + +### `String#end_with?` + +Returns true if `str` ends with one of the `suffixes` given. + +```ruby +str.end_with?([suffixes]+) #=> true or false +``` + +Example: + +```ruby +"hello".end_with?("llo") #=> true +"hello".end_with?("heaven", "llo") #=> true +"hello".end_with?("heaven", "paradise") #=> false +``` + +### `String#tr` + +Returns a copy of `str` with the characters in `from_str` replaced by the corresponding characters in `to_str`. +Supports `c1-c2` range notation and `^` for negation in `from_str`. + +```ruby +str.tr(from_str, to_str) #=> new_str +``` + +Example: + +```ruby +"hello".tr('el', 'ip') #=> "hippo" +"hello".tr('aeiou', '*') #=> "h*ll*" +"hello".tr('a-y', 'b-z') #=> "ifmmp" +"hello".tr('^aeiou', '*') #=> "*e**o" +``` + +### `String#tr!` + +Translates `str` in place, using the same rules as `String#tr`. Returns `str`, or `nil` if no changes were made. + +```ruby +str.tr!(from_str, to_str) #=> str or nil +``` + +Example: + +```ruby +a = "hello" +a.tr!('el', 'ip') #=> "hippo" +a #=> "hippo" +b = "hello" +b.tr!('xyz', '123') #=> nil +``` + +### `String#tr_s` + +Processes a copy of `str` as described under `String#tr`, then removes duplicate characters in regions that were affected by the translation (squeeze). + +```ruby +str.tr_s(from_str, to_str) #=> new_str +``` + +Example: + +```ruby +"hello".tr_s('l', 'r') #=> "hero" +"hello".tr_s('el', '*') #=> "h*o" +"hello".tr_s('el', 'hx') #=> "hhxo" +``` + +### `String#tr_s!` + +Performs `String#tr_s` processing on `str` in place, returning `str`, or `nil` if no changes were made. + +```ruby +str.tr_s!(from_str, to_str) #=> str or nil +``` + +Example: + +```ruby +a = "hello" +a.tr_s!('l', 'r') #=> "hero" +a #=> "hero" +b = "hello" +b.tr_s!('x', 'y') #=> nil +``` + +### `String#squeeze` + +Builds a set of characters from the `other_str` parameter(s). Returns a new string where runs of the same character that occur in this set are replaced by a single character. If no arguments are given, all runs of identical characters are replaced. + +```ruby +str.squeeze([other_str]) #=> new_str +``` + +Example: + +```ruby +"yellow moon".squeeze #=> "yelow mon" +" now is the".squeeze(" ") #=> " now is the" +"putters shoot balls".squeeze("m-z") #=> "puters shot balls" +``` + +### `String#squeeze!` + +Squeezes `str` in place, returning either `str`, or `nil` if no changes were made. + +```ruby +str.squeeze!([other_str]) #=> str or nil +``` + +Example: + +```ruby +a = "yellow moon" +a.squeeze! #=> "yelow mon" +a #=> "yelow mon" +b = "abc" +b.squeeze! #=> nil +``` + +### `String#delete` + +Returns a copy of `str` with all characters in the intersection of its arguments removed. Arguments are selectors like for `String#count`. + +```ruby +str.delete([other_str]+) #=> new_str +``` + +Example: + +```ruby +"hello".delete "l" #=> "heo" +"hello".delete "aeiou" #=> "hll" +"hello".delete "aeiou", "^l" #=> "l" (deletes vowels, but not 'l') +``` + +### `String#delete!` + +Performs a `delete` operation in place, returning `str`, or `nil` if `str` was not modified. + +```ruby +str.delete!([other_str]+) #=> str or nil +``` + +Example: + +```ruby +a = "hello" +a.delete!("l") #=> "heo" +a #=> "heo" +b = "hello" +b.delete!("xyz") #=> nil +``` + +### `String#count` + +Each `other_str` parameter defines a set of characters to count. The intersection of these sets defines the characters to count in `str`. + +```ruby +str.count([other_str]+) #=> integer +``` + +Example: + +```ruby +"hello world".count("lo") #=> 5 +"hello world".count("lo", "o") #=> 2 (chars 'l' and 'o', but only 'o' is in both sets) +"hello world".count("h") #=> 1 +"hello world".count("a-e", "h-l") #=> 3 (chars 'h', 'e', 'l') +"hello world".count("^l") #=> 8 (all chars except 'l') +``` + +### `String#hex` + +Treats leading characters of `str` as a string of hexadecimal digits (with an optional sign and an optional `0x`) and returns the corresponding number. + +```ruby +str.hex #=> integer +``` + +Example: + +```ruby +"0x0a".hex #=> 10 +"10".hex #=> 16 +"-10".hex #=> -16 +"ff".hex #=> 255 +"hello".hex #=> 0 (if no valid hex digits at start) +"0xhello".hex #=> 0 +``` + +### `String#oct` + +Treats leading characters of `str` as a string of octal digits (with an optional sign) and returns the corresponding number. + +```ruby +str.oct #=> integer +``` + +Example: + +```ruby +"10".oct #=> 8 +"010".oct #=> 8 +"-10".oct #=> -8 +"077".oct #=> 63 +"hello".oct #=> 0 (if no valid octal digits at start) +"0o10".oct #=> 8 (common prefix, depends on MRuby version) +``` + +### `String#chr` (on String instances) + +Returns a one-character string at the beginning of the string. + +```ruby +string.chr #=> string +``` + +Example: + +```ruby +a = "abcde" +a.chr #=> "a" +"".chr #=> "" (or error, mruby specific behavior might differ from CRuby) +``` + +### `Integer#chr` + +Returns a string containing the character represented by the `int`'s value according to `encoding`. + +```ruby +int.chr([encoding]) #=> string +``` + +Example: + +```ruby +65.chr #=> "A" +230.chr #=> "\xE6" (in ASCII-8BIT) +230.chr("ASCII-8BIT") #=> "\xE6" +# 230.chr("UTF-8") #=> "\u00E6" (if MRB_UTF8_STRING enabled) +``` + +### `String#succ` (alias `String#next`) + +Returns the successor to `str`. Increments the rightmost alphanumeric characters. + +```ruby +str.succ #=> new_str +``` + +Example: + +```ruby +"a".succ #=> "b" +"z".succ #=> "aa" +"9".succ #=> "10" +"a9".succ #=> "b0" +"Az".succ #=> "Ba" +"zz".succ #=> "aaa" +``` + +### `String#succ!` (alias `String#next!`) + +Equivalent to `String#succ`, but modifies the receiver in place. + +```ruby +str.succ! #=> str +``` + +Example: + +```ruby +a = "a9" +a.succ! #=> "b0" +a #=> "b0" +``` + +### `String#ord` + +Returns the Integer ordinal (codepoint) of the first character in `str`. + +```ruby +str.ord #=> integer +``` + +Example: + +```ruby +"a".ord #=> 97 +"A".ord #=> 65 +"\u20AC".ord #=> 8364 (if UTF-8 and character is euro sign) +"".ord #=> ArgumentError (empty string) +``` + +### `String#delete_prefix` + +Returns a copy of `str` with leading `prefix` deleted. + +```ruby +str.delete_prefix(prefix) -> new_str +``` + +Example: + +```ruby +"hello".delete_prefix("hel") #=> "lo" +"hello".delete_prefix("llo") #=> "hello" +``` + +### `String#delete_prefix!` + +Deletes leading `prefix` from `str`, returning `nil` if no change was made. Modifies the receiver in place. + +```ruby +str.delete_prefix!(prefix) -> self or nil +``` + +Example: + +```ruby +a = "hello" +a.delete_prefix!("hel") #=> "lo" +a #=> "lo" +b = "hello" +b.delete_prefix!("llo") #=> nil +``` + +### `String#delete_suffix` + +Returns a copy of `str` with trailing `suffix` deleted. + +```ruby +str.delete_suffix(suffix) -> new_str +``` + +Example: + +```ruby +"hello".delete_suffix("llo") #=> "he" +"hello".delete_suffix("hel") #=> "hello" +``` + +### `String#delete_suffix!` + +Deletes trailing `suffix` from `str`, returning `nil` if no change was made. Modifies the receiver in place. + +```ruby +str.delete_suffix!(suffix) -> self or nil +``` + +Example: + +```ruby +a = "hello" +a.delete_suffix!("llo") #=> "he" +a #=> "he" +b = "hello" +b.delete_suffix!("hel") #=> nil +``` + +### `String#casecmp` + +Case-insensitive version of `String#<=>`. Returns -1, 0, or +1. Returns `nil` if `other_str` is not a String. + +```ruby +str.casecmp(other_str) #=> -1, 0, +1 or nil +``` + +Example: + +```ruby +"abcdef".casecmp("abcde") #=> 1 +"aBcDeF".casecmp("abcdef") #=> 0 +"abcdef".casecmp("abcdefg") #=> -1 +"abcdef".casecmp("ABCDEF") #=> 0 +``` + +### `String#casecmp?` + +Returns `true` if `str` and `other_str` are equal after case folding, `false` if they are not equal, and `nil` if `other_str` is not a String. + +```ruby +str.casecmp?(other_str) #=> true, false, or nil +``` + +Example: + +```ruby +"aBcDeF".casecmp?("abcdef") #=> true +"aBcDeF".casecmp?("abcdeg") #=> false +``` + +### `String#+@` (Unary Plus) + +Returns `self` if `self` is not frozen. Otherwise returns a mutable (not frozen) duplicate of `self`. + +```ruby ++string #=> new_string or self +``` + +Example: + +```ruby +a = "hello" +b = +a +a.equal?(b) #=> true (both point to the same object) + +c = "world".freeze +d = +c +c.equal?(d) #=> false (d is a mutable copy) +d.frozen? #=> false +``` + +### `String#-@` (Unary Minus) + +Returns a frozen, possibly pre-existing (interned) copy of the string. + +```ruby +-string #=> frozen_string +``` + +Example: + +```ruby +a = "hello" +b = -a +a.equal?(b) #=> false (b is a different, frozen object) +b.frozen? #=> true + +c = "world".freeze +d = -c +c.equal?(d) #=> true (if c was already suitably frozen/interned) +``` + +### `String#ascii_only?` + +Returns `true` if `str` contains only ASCII characters. + +```ruby +str.ascii_only? #=> true or false +``` + +Example: + +```ruby +"abc".ascii_only? #=> true +"abc\x80".ascii_only? #=> false (if \x80 is considered non-ASCII) +"日本語".ascii_only? #=> false +``` + +### `String#b` + +Returns a copy of `str` with its encoding set to ASCII-8BIT (binary). + +```ruby +str.b #=> new_str_in_binary_encoding +``` + +Example: + +```ruby +# Assuming str was UTF-8 +str_utf8 = "\u00E9" # "é" +str_bin = str_utf8.b +# str_bin might be "\xC3\xA9" if that's the UTF-8 byte representation +# str_bin.encoding will be ASCII-8BIT +``` diff --git a/mrbgems/mruby-string-ext/mrblib/string.rb b/mrbgems/mruby-string-ext/mrblib/string.rb index 4ae0fb7ac6..15d930a934 100644 --- a/mrbgems/mruby-string-ext/mrblib/string.rb +++ b/mrbgems/mruby-string-ext/mrblib/string.rb @@ -1,348 +1,51 @@ class String ## - # call-seq: - # string.clear -> string - # - # Makes string empty. - # - # a = "abcde" - # a.clear #=> "" - # - def clear - self.replace("") - end - - ## - # call-seq: - # str.lstrip -> new_str - # - # Returns a copy of str with leading whitespace removed. See also - # String#rstrip and String#strip. - # - # " hello ".lstrip #=> "hello " - # "hello".lstrip #=> "hello" - # - def lstrip - a = 0 - z = self.size - 1 - a += 1 while a <= z and " \f\n\r\t\v".include?(self[a]) - (z >= 0) ? self[a..z] : "" - end - - ## - # call-seq: - # str.rstrip -> new_str - # - # Returns a copy of str with trailing whitespace removed. See also - # String#lstrip and String#strip. - # - # " hello ".rstrip #=> " hello" - # "hello".rstrip #=> "hello" - # - def rstrip - a = 0 - z = self.size - 1 - z -= 1 while a <= z and " \f\n\r\t\v\0".include?(self[z]) - (z >= 0) ? self[a..z] : "" - end - - ## - # call-seq: - # str.strip -> new_str - # - # Returns a copy of str with leading and trailing whitespace removed. - # - # " hello ".strip #=> "hello" - # "\tgoodbye\r\n".strip #=> "goodbye" - # - def strip - a = 0 - z = self.size - 1 - a += 1 while a <= z and " \f\n\r\t\v".include?(self[a]) - z -= 1 while a <= z and " \f\n\r\t\v\0".include?(self[z]) - (z >= 0) ? self[a..z] : "" - end - - ## - # call-seq: - # str.lstrip! -> self or nil - # - # Removes leading whitespace from str, returning nil if no - # change was made. See also String#rstrip! and - # String#strip!. - # - # " hello ".lstrip #=> "hello " - # "hello".lstrip! #=> nil - # - def lstrip! - raise FrozenError, "can't modify frozen String" if frozen? - s = self.lstrip - (s == self) ? nil : self.replace(s) - end - - ## - # call-seq: - # str.rstrip! -> self or nil - # - # Removes trailing whitespace from str, returning nil if - # no change was made. See also String#lstrip! and - # String#strip!. - # - # " hello ".rstrip #=> " hello" - # "hello".rstrip! #=> nil - # - def rstrip! - raise FrozenError, "can't modify frozen String" if frozen? - s = self.rstrip - (s == self) ? nil : self.replace(s) - end - - ## - # call-seq: - # str.strip! -> str or nil - # - # Removes leading and trailing whitespace from str. Returns - # nil if str was not altered. - # - def strip! - raise FrozenError, "can't modify frozen String" if frozen? - s = self.strip - (s == self) ? nil : self.replace(s) - end - - def partition(sep) - raise TypeError, "type mismatch: #{sep.class} given" unless sep.is_a? String - n = index(sep) - unless n.nil? - m = n + sep.size - [ slice(0, n), sep, slice(m, size - m) ] - else - [ self[0..-1], "", "" ] - end - end - - def rpartition(sep) - raise TypeError, "type mismatch: #{sep.class} given" unless sep.is_a? String - n = rindex(sep) - unless n.nil? - m = n + sep.size - [ slice(0, n), sep, slice(m, size - m) ] - else - [ "", "", self ] - end - end - - ## - # call-seq: - # str.slice!(fixnum) -> new_str or nil - # str.slice!(fixnum, fixnum) -> new_str or nil - # str.slice!(range) -> new_str or nil - # str.slice!(other_str) -> new_str or nil - # - # Deletes the specified portion from str, and returns the portion - # deleted. - # - # string = "this is a string" - # string.slice!(2) #=> "i" - # string.slice!(3..6) #=> " is " - # string.slice!("r") #=> "r" - # string #=> "thsa sting" - # - def slice!(arg1, arg2=nil) - raise FrozenError, "can't modify frozen String" if frozen? - raise ArgumentError, "wrong number of arguments (expected 1..2)" if arg1.nil? && arg2.nil? - - if !arg1.nil? && !arg2.nil? - idx = arg1 - idx += self.size if arg1 < 0 - if idx >= 0 && idx <= self.size && arg2 > 0 - str = self[idx, arg2] - else - return nil - end - else - validated = false - if arg1.kind_of?(Range) - beg = arg1.begin - ed = arg1.end - beg += self.size if beg < 0 - ed += self.size if ed < 0 - ed -= 1 if arg1.exclude_end? - validated = true - elsif arg1.kind_of?(String) - validated = true - else - idx = arg1 - idx += self.size if arg1 < 0 - validated = true if idx >=0 && arg1 < self.size - end - if validated - str = self[arg1] - else - return nil - end - end - unless str.nil? || str == "" - if !arg1.nil? && !arg2.nil? - idx = arg1 >= 0 ? arg1 : self.size+arg1 - str2 = self[0...idx] + self[idx+arg2..-1].to_s - else - if arg1.kind_of?(Range) - idx = beg >= 0 ? beg : self.size+beg - idx2 = ed>= 0 ? ed : self.size+ed - str2 = self[0...idx] + self[idx2+1..-1].to_s - elsif arg1.kind_of?(String) - idx = self.index(arg1) - str2 = self[0...idx] + self[idx+arg1.size..-1] unless idx.nil? - else - idx = arg1 >= 0 ? arg1 : self.size+arg1 - str2 = self[0...idx] + self[idx+1..-1].to_s - end - end - self.replace(str2) unless str2.nil? - end - str - end - - ## - # call-seq: - # str.insert(index, other_str) -> str - # - # Inserts other_str before the character at the given - # index, modifying str. Negative indices count from the - # end of the string, and insert after the given character. - # The intent is insert aString so that it starts at the given - # index. - # - # "abcd".insert(0, 'X') #=> "Xabcd" - # "abcd".insert(3, 'X') #=> "abcXd" - # "abcd".insert(4, 'X') #=> "abcdX" - # "abcd".insert(-3, 'X') #=> "abXcd" - # "abcd".insert(-1, 'X') #=> "abcdX" - # - def insert(idx, str) - if idx == -1 - return self << str - elsif idx < 0 - idx += 1 + # Call the given block for each character of + # `self`. + def each_char(&block) + return to_enum(:each_char) unless block + pos = 0 + while pos < self.size + block.call(self[pos]) + pos += 1 end - self[idx, 0] = str self end ## - # call-seq: - # str.ljust(integer, padstr=' ') -> new_str - # - # If integer is greater than the length of str, returns a new - # String of length integer with str left justified - # and padded with padstr; otherwise, returns str. - # - # "hello".ljust(4) #=> "hello" - # "hello".ljust(20) #=> "hello " - # "hello".ljust(20, '1234') #=> "hello123412341234123" - def ljust(idx, padstr = ' ') - raise ArgumentError, 'zero width padding' if padstr == '' - return self if idx <= self.size - pad_repetitions = idx / padstr.size - padding = (padstr * pad_repetitions)[0, idx-self.size] - self + padding - end - - ## - # call-seq: - # str.rjust(integer, padstr=' ') -> new_str + # call-seq: + # str.chars -> array + # str.chars {|char| block } -> str # - # If integer is greater than the length of str, returns a new - # String of length integer with str right justified - # and padded with padstr; otherwise, returns str. + # Returns an array of characters in str when called without a block. + # When called with a block, passes each character to the block. # - # "hello".rjust(4) #=> "hello" - # "hello".rjust(20) #=> " hello" - # "hello".rjust(20, '1234') #=> "123412341234123hello" - def rjust(idx, padstr = ' ') - raise ArgumentError, 'zero width padding' if padstr == '' - return self if idx <= self.size - pad_repetitions = idx / padstr.size - padding = (padstr * pad_repetitions)[0, idx-self.size] - padding + self - end - - ## - # call-seq: - # str.center(width, padstr=' ') -> new_str + # "hello".chars #=> ["h", "e", "l", "l", "o"] + # "hello".chars {|c| print c } #=> "hello" # - # Centers +str+ in +width+. If +width+ is greater than the length of +str+, - # returns a new String of length +width+ with +str+ centered and padded with - # +padstr+; otherwise, returns +str+. - # - # "hello".center(4) #=> "hello" - # "hello".center(20) #=> " hello " - # "hello".center(20, '123') #=> "1231231hello12312312" - def center(width, padstr = ' ') - raise ArgumentError, 'zero width padding' if padstr == '' - return self if width <= self.size - width -= self.size - pad1 = width / 2 - pad2 = width - pad1 - (padstr*pad1)[0,pad1] + self + (padstr*pad2)[0,pad2] - end - def chars(&block) if block_given? - self.split('').each do |i| - block.call(i) - end + __chars.each(&block) self else - self.split('') + __chars end end - ## - # Call the given block for each character of - # +self+. - def each_char(&block) - return to_enum :each_char unless block - pos = 0 - while pos < self.size - block.call(self[pos]) - pos += 1 - end - self - end - def codepoints(&block) + cp = __codepoints() if block_given? - self.split('').each do|x| - block.call(x.ord) + cp.each do|x| + block.call(x) end self else - self.split('').map{|x| x.ord} + cp end end alias each_codepoint codepoints - ## - # call-seq: - # str.prepend(other_str) -> str - # - # Prepend---Prepend the given string to str. - # - # a = "world" - # a.prepend("hello ") #=> "hello world" - # a #=> "hello world" - def prepend(*args) - len = args.size - while len > 0 - len -= 1 - self[0, 0] = args[len] - end - self - end - ## # call-seq: # string.lines -> array of string @@ -353,15 +56,17 @@ def prepend(*args) # a = "abc\ndef" # a.lines #=> ["abc\n", "def"] # - # If a block is given, it works the same as each_line. + # If a block is given, it works the same as `each_line`. def lines(&blk) lines = self.__lines if blk lines.each do |line| blk.call(line) end + self + else + lines end - lines end ## @@ -369,10 +74,10 @@ def lines(&blk) # str.upto(other_str, exclusive=false) {|s| block } -> str # str.upto(other_str, exclusive=false) -> an_enumerator # - # Iterates through successive values, starting at str and - # ending at other_str inclusive, passing each value in turn to - # the block. The String#succ method is used to generate - # each value. If optional second argument exclusive is omitted or is false, + # Iterates through successive values, starting at *str* and + # ending at *other_str* inclusive, passing each value in turn to + # the block. The `String#succ` method is used to generate + # each value. If optional second argument exclusive is omitted or is false, # the last value will be included; otherwise it will be excluded. # # If no block is given, an enumerator is returned instead. @@ -387,7 +92,7 @@ def lines(&blk) # a8 a9 b0 b1 b2 b3 b4 b5 b6 # a8 a9 b0 b1 b2 b3 b4 b5 b6 # - # If str and other_str contains only ascii numeric characters, + # If *str* and *other_str* contains only ascii numeric characters, # both are recognized as decimal numbers. In addition, the width of # string (e.g. leading zeros) is handled appropriately. # @@ -458,4 +163,34 @@ def __upto_endless(&block) end self end + + ## + # call-seq: + # str.scrub -> new_str + # str.scrub(repl) -> new_str + # str.scrub {|bytes| block } -> new_str + # + # Returns a copy of +self+ with each maximal run of invalid UTF-8 bytes + # replaced by +repl+ (U+FFFD if +repl+ is omitted), or by the value + # returned from the block when one is given. The block receives the + # invalid bytes as a String. + # + # "abc\x80def".scrub #=> "abc\u{FFFD}def" + # "abc\x80def".scrub("?") #=> "abc?def" + # "\xE3\x81".scrub #=> "\u{FFFD}" + # "\x80\x81".scrub { |b| b.bytes.map { |c| "<%02X>" % c }.join } + # #=> "<80><81>" + def scrub(repl = nil, &block) + return __scrub(repl) unless block + chunks = __scrub_chunks + return chunks[0] if chunks.length == 1 + result = chunks[0].dup + i = 1 + while i < chunks.length + result << yield(chunks[i]).to_s + result << chunks[i + 1] if i + 1 < chunks.length + i += 2 + end + result + end end diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 3c3b1048dd..4878db56ea 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -10,41 +10,16 @@ #define ENC_BINARY "BINARY" #define ENC_UTF8 "UTF-8" -#define ENC_COMP_P(enc, enc_lit) \ - str_casecmp_p(RSTRING_PTR(enc), RSTRING_LEN(enc), enc_lit, sizeof(enc_lit"")-1) - -#ifdef MRB_NO_FLOAT -# define mrb_float_p(o) FALSE -#endif - -static mrb_bool -str_casecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2) -{ - const char *e1, *e2; - - if (len1 != len2) return FALSE; - e1 = s1 + len1; - e2 = s2 + len2; - while (s1 < e1 && s2 < e2) { - if (*s1 != *s2 && TOUPPER(*s1) != TOUPPER(*s2)) return FALSE; - ++s1; - ++s2; - } - return TRUE; -} - static mrb_value int_chr_binary(mrb_state *mrb, mrb_value num) { mrb_int cp = mrb_as_int(mrb, num); - char c; - mrb_value str; if (cp < 0 || 0xff < cp) { mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num); } - c = (char)cp; - str = mrb_str_new(mrb, &c, 1); + char c = (char)cp; + mrb_value str = mrb_str_new(mrb, &c, 1); RSTR_SET_ASCII_FLAG(mrb_str_ptr(str)); return str; } @@ -57,36 +32,16 @@ int_chr_utf8(mrb_state *mrb, mrb_value num) char utf8[4]; mrb_int len; mrb_value str; - uint32_t ascii_flag = 0; - if (cp < 0 || 0x10FFFF < cp) { + /* Reject negative, above U+10FFFF, and UTF-16 surrogates (RFC 3629). */ + if (cp < 0 || 0x10FFFF < cp || (0xD800 <= cp && cp <= 0xDFFF)) { mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num); } - if (cp < 0x80) { - utf8[0] = (char)cp; - len = 1; - ascii_flag = MRB_STR_ASCII; - } - else if (cp < 0x800) { - utf8[0] = (char)(0xC0 | (cp >> 6)); - utf8[1] = (char)(0x80 | (cp & 0x3F)); - len = 2; - } - else if (cp < 0x10000) { - utf8[0] = (char)(0xE0 | (cp >> 12)); - utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); - utf8[2] = (char)(0x80 | ( cp & 0x3F)); - len = 3; - } - else { - utf8[0] = (char)(0xF0 | (cp >> 18)); - utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); - utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); - utf8[3] = (char)(0x80 | ( cp & 0x3F)); - len = 4; - } + len = mrb_utf8_to_buf(utf8, (uint32_t)cp); str = mrb_str_new(mrb, utf8, len); - mrb_str_ptr(str)->flags |= ascii_flag; + if (len == 1) { + RSTR_SET_ASCII_FLAG(mrb_str_ptr(str)); + } return str; } #endif @@ -95,20 +50,19 @@ int_chr_utf8(mrb_state *mrb, mrb_value num) * call-seq: * str.swapcase! -> str or nil * - * Equivalent to String#swapcase, but modifies the receiver in - * place, returning str, or nil if no changes were made. + * Equivalent to `String#swapcase`, but modifies the receiver in + * place, returning *str*, or `nil` if no changes were made. * Note: case conversion is effective only in ASCII region. */ static mrb_value -mrb_str_swapcase_bang(mrb_state *mrb, mrb_value str) +str_swapcase_bang(mrb_state *mrb, mrb_value str) { - char *p, *pend; int modify = 0; struct RString *s = mrb_str_ptr(str); mrb_str_modify(mrb, s); - p = RSTRING_PTR(str); - pend = p + RSTRING_LEN(str); + char *p = RSTRING_PTR(str); + char *pend = p + RSTRING_LEN(str); while (p < pend) { if (ISUPPER(*p)) { *p = TOLOWER(*p); @@ -129,7 +83,7 @@ mrb_str_swapcase_bang(mrb_state *mrb, mrb_value str) * call-seq: * str.swapcase -> new_str * - * Returns a copy of str with uppercase alphabetic characters converted + * Returns a copy of *str* with uppercase alphabetic characters converted * to lowercase and lowercase characters converted to uppercase. * Note: case conversion is effective only in ASCII region. * @@ -137,29 +91,51 @@ mrb_str_swapcase_bang(mrb_state *mrb, mrb_value str) * "cYbEr_PuNk11".swapcase #=> "CyBeR_pUnK11" */ static mrb_value -mrb_str_swapcase(mrb_state *mrb, mrb_value self) +str_swapcase(mrb_state *mrb, mrb_value self) { - mrb_value str; - - str = mrb_str_dup(mrb, self); - mrb_str_swapcase_bang(mrb, str); + mrb_value str = mrb_str_dup(mrb, self); + str_swapcase_bang(mrb, str); return str; } static void -str_concat(mrb_state *mrb, mrb_value self, mrb_value str) +str_concat(mrb_state *mrb, mrb_value self, mrb_value str, mrb_bool binary) { - if (mrb_integer_p(str) || mrb_float_p(str)) + if (mrb_integer_p(str) || mrb_float_p(str)) { #ifdef MRB_UTF8_STRING - str = int_chr_utf8(mrb, str); + if (binary) { + str = int_chr_binary(mrb, str); + } + else { + str = int_chr_utf8(mrb, str); + } #else str = int_chr_binary(mrb, str); #endif + } else mrb_ensure_string_type(mrb, str); mrb_str_cat_str(mrb, self, str); } +static mrb_value +str_concat0(mrb_state *mrb, mrb_value self, mrb_bool binary) +{ + if (mrb_get_argc(mrb) == 1) { + str_concat(mrb, self, mrb_get_arg1(mrb), binary); + return self; + } + + mrb_value *args; + mrb_int alen; + + mrb_get_args(mrb, "*", &args, &alen); + for (mrb_int i=0; i str @@ -169,7 +145,7 @@ str_concat(mrb_state *mrb, mrb_value self, mrb_value str) * s.concat('bar', 'baz') # => "foobarbaz" * s # => "foobarbaz" * - * For each given object +object+ that is an \Integer, + * For each given object `object` that is an \Integer, * the value is considered a codepoint and converted to a character before concatenation: * * s = 'foo' @@ -177,28 +153,30 @@ str_concat(mrb_state *mrb, mrb_value self, mrb_value str) * */ static mrb_value -mrb_str_concat_m(mrb_state *mrb, mrb_value self) +str_concat_m(mrb_state *mrb, mrb_value self) { - if (mrb_get_argc(mrb) == 1) { - str_concat(mrb, self, mrb_get_arg1(mrb)); - return self; - } - - mrb_value *args; - mrb_int alen; + mrb_bool binary = RSTR_BINARY_P(mrb_str_ptr(self)); + return str_concat0(mrb, self, binary); +} - mrb_get_args(mrb, "*", &args, &alen); - for (mrb_int i=0; i str + * + * Works like `concat` but consider arguments as binary strings. + * + */ +static mrb_value +str_append_as_bytes(mrb_state *mrb, mrb_value self) +{ + return str_concat0(mrb, self, TRUE); } /* * call-seq: * str.start_with?([prefixes]+) -> true or false * - * Returns true if +str+ starts with one of the +prefixes+ given. + * Returns true if `str` starts with one of the `prefixes` given. * * "hello".start_with?("hell") #=> true * @@ -208,20 +186,19 @@ mrb_str_concat_m(mrb_state *mrb, mrb_value self) * "h".start_with?("heaven", "hell") #=> false */ static mrb_value -mrb_str_start_with(mrb_state *mrb, mrb_value self) +str_start_with(mrb_state *mrb, mrb_value self) { const mrb_value *argv; - mrb_int argc, i; + mrb_int argc; mrb_get_args(mrb, "*", &argv, &argc); - for (i = 0; i < argc; i++) { - size_t len_l, len_r; + for (mrb_int i = 0; i < argc; i++) { int ai = mrb_gc_arena_save(mrb); mrb_value sub = argv[i]; mrb_ensure_string_type(mrb, sub); mrb_gc_arena_restore(mrb, ai); - len_l = RSTRING_LEN(self); - len_r = RSTRING_LEN(sub); + size_t len_l = RSTRING_LEN(self); + size_t len_r = RSTRING_LEN(sub); if (len_l >= len_r) { if (memcmp(RSTRING_PTR(self), RSTRING_PTR(sub), len_r) == 0) { return mrb_true_value(); @@ -235,23 +212,22 @@ mrb_str_start_with(mrb_state *mrb, mrb_value self) * call-seq: * str.end_with?([suffixes]+) -> true or false * - * Returns true if +str+ ends with one of the +suffixes+ given. + * Returns true if `str` ends with one of the `suffixes` given. */ static mrb_value -mrb_str_end_with(mrb_state *mrb, mrb_value self) +str_end_with(mrb_state *mrb, mrb_value self) { const mrb_value *argv; - mrb_int argc, i; + mrb_int argc; mrb_get_args(mrb, "*", &argv, &argc); - for (i = 0; i < argc; i++) { - size_t len_l, len_r; + for (mrb_int i = 0; i < argc; i++) { int ai = mrb_gc_arena_save(mrb); mrb_value sub = argv[i]; mrb_ensure_string_type(mrb, sub); mrb_gc_arena_restore(mrb, ai); - len_l = RSTRING_LEN(self); - len_r = RSTRING_LEN(sub); + size_t len_l = RSTRING_LEN(self); + size_t len_r = RSTRING_LEN(sub); if (len_l >= len_r) { if (memcmp(RSTRING_PTR(self) + (len_l - len_r), RSTRING_PTR(sub), @@ -309,10 +285,9 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte const char *pattern = RSTRING_PTR(v_pattern); mrb_int pattern_length = RSTRING_LEN(v_pattern); mrb_bool flag_reverse = FALSE; - struct tr_pattern *pat1; mrb_int i = 0; - if(flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') { + if (flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') { flag_reverse = TRUE; i++; } @@ -320,9 +295,8 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte while (i < pattern_length) { /* is range pattern ? */ mrb_bool const ret_uninit = (ret->type == TR_UNINITIALIZED); - pat1 = ret_uninit - ? ret - : (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern)); + struct tr_pattern *pat1 = ret_uninit ? ret + : (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern)); if (pat1 == NULL) { if (pat0) tr_free_pattern(mrb, pat0); tr_free_pattern(mrb, ret); @@ -342,7 +316,6 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte else { /* in order pattern. */ mrb_int start_pos = i++; - mrb_int len; while (i < pattern_length) { if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') @@ -350,7 +323,7 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte i++; } - len = i - start_pos; + mrb_int len = i - start_pos; if (len > UINT16_MAX) { if (pat0) tr_free_pattern(mrb, pat0); tr_free_pattern(mrb, ret); @@ -386,8 +359,7 @@ tr_find_character(const struct tr_pattern *pat, const char *pat_str, int ch) while (pat != NULL) { if (pat->type == TR_IN_ORDER) { - int i; - for (i = 0; i < pat->n; i++) { + for (int i = 0; i < pat->n; i++) { if (pat_str[pat->val.start_pos + i] == ch) ret = n_sum + i; } } @@ -469,7 +441,7 @@ tr_compile_pattern(const struct tr_pattern *pat, mrb_value pstr, uint8_t bitmap[ mrb_int flag_reverse = pat ? pat->flag_reverse : 0; int i; - for (i=0; i<32; i++) { + for (int i=0; i<32; i++) { bitmap[i] = 0; } while (pat != NULL) { @@ -501,26 +473,26 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squee { struct tr_pattern pat = STATIC_TR_PATTERN; struct tr_pattern rep = STATIC_TR_PATTERN; - char *s; - mrb_int len; - mrb_int i; - mrb_int j; mrb_bool flag_changed = FALSE; mrb_int lastch = -1; mrb_str_modify(mrb, mrb_str_ptr(str)); tr_parse_pattern(mrb, &pat, p1, TRUE, NULL); tr_parse_pattern(mrb, &rep, p2, FALSE, &pat); - s = RSTRING_PTR(str); - len = RSTRING_LEN(str); + char *s = RSTRING_PTR(str); + mrb_int len = RSTRING_LEN(str); + /* Hoist pointer retrieval outside loop to avoid repeated conditionals */ + const char *p1_ptr = RSTRING_PTR(p1); + const char *p2_ptr = RSTRING_PTR(p2); + mrb_int i, j; for (i=j=0; ij) s[j] = s[i]; if (n >= 0) { flag_changed = TRUE; - mrb_int c = tr_get_character(&rep, RSTRING_PTR(p2), n); + mrb_int c = tr_get_character(&rep, p2_ptr, n); if (c < 0 || (squeeze && c == lastch)) { j--; @@ -584,13 +556,12 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squee * Note: conversion is effective only in ASCII region. */ static mrb_value -mrb_str_tr(mrb_state *mrb, mrb_value str) +str_tr_m(mrb_state *mrb, mrb_value str) { - mrb_value dup; mrb_value p1, p2; mrb_get_args(mrb, "SS", &p1, &p2); - dup = mrb_str_dup(mrb, str); + mrb_value dup = mrb_str_dup(mrb, str); str_tr(mrb, dup, p1, p2, FALSE); return dup; } @@ -603,7 +574,7 @@ mrb_str_tr(mrb_state *mrb, mrb_value str) * Returns str, or nil if no changes were made. */ static mrb_value -mrb_str_tr_bang(mrb_state *mrb, mrb_value str) +str_tr_bang(mrb_state *mrb, mrb_value str) { mrb_value p1, p2; @@ -626,13 +597,12 @@ mrb_str_tr_bang(mrb_state *mrb, mrb_value str) * "hello".tr_s('el', 'hx') #=> "hhxo" */ static mrb_value -mrb_str_tr_s(mrb_state *mrb, mrb_value str) +str_tr_s(mrb_state *mrb, mrb_value str) { - mrb_value dup; mrb_value p1, p2; mrb_get_args(mrb, "SS", &p1, &p2); - dup = mrb_str_dup(mrb, str); + mrb_value dup = mrb_str_dup(mrb, str); str_tr(mrb, dup, p1, p2, TRUE); return dup; } @@ -645,7 +615,7 @@ mrb_str_tr_s(mrb_state *mrb, mrb_value str) * str, or nil if no changes were made. */ static mrb_value -mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str) +str_tr_s_bang(mrb_state *mrb, mrb_value str) { mrb_value p1, p2; @@ -662,8 +632,6 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) struct tr_pattern pat_storage = STATIC_TR_PATTERN; struct tr_pattern *pat = NULL; mrb_int i, j; - char *s; - mrb_int len; mrb_bool flag_changed = FALSE; mrb_int lastch = -1; uint8_t bitmap[32]; @@ -674,8 +642,8 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) tr_compile_pattern(pat, v_pat, bitmap); tr_free_pattern(mrb, pat); } - s = RSTRING_PTR(str); - len = RSTRING_LEN(str); + char *s = RSTRING_PTR(str); + mrb_int len = RSTRING_LEN(str); if (pat) { for (i=j=0; i "puters shot balls" */ static mrb_value -mrb_str_squeeze(mrb_state *mrb, mrb_value str) +str_squeeze_m(mrb_state *mrb, mrb_value str) { mrb_value pat = mrb_nil_value(); - mrb_value dup; mrb_get_args(mrb, "|S", &pat); - dup = mrb_str_dup(mrb, str); + mrb_value dup = mrb_str_dup(mrb, str); str_squeeze(mrb, dup, pat); return dup; } @@ -739,7 +706,7 @@ mrb_str_squeeze(mrb_state *mrb, mrb_value str) * changes were made. */ static mrb_value -mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str) +str_squeeze_bang(mrb_state *mrb, mrb_value str) { mrb_value pat = mrb_nil_value(); @@ -754,9 +721,6 @@ static mrb_bool str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) { struct tr_pattern pat = STATIC_TR_PATTERN; - mrb_int i, j; - char *s; - mrb_int len; mrb_bool flag_changed = FALSE; uint8_t bitmap[32]; @@ -765,8 +729,9 @@ str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) tr_compile_pattern(&pat, v_pat, bitmap); tr_free_pattern(mrb, &pat); - s = RSTRING_PTR(str); - len = RSTRING_LEN(str); + char *s = RSTRING_PTR(str); + mrb_int len = RSTRING_LEN(str); + mrb_int i, j; for (i=j=0; ij) s[j] = s[i]; @@ -782,20 +747,21 @@ str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) return flag_changed; } +/* Internal helper for String#delete - returns new string with pattern characters removed */ static mrb_value -mrb_str_delete(mrb_state *mrb, mrb_value str) +str_delete_m(mrb_state *mrb, mrb_value str) { mrb_value pat; - mrb_value dup; mrb_get_args(mrb, "S", &pat); - dup = mrb_str_dup(mrb, str); + mrb_value dup = mrb_str_dup(mrb, str); str_delete(mrb, dup, pat); return dup; } +/* Internal helper for String#delete! - removes pattern characters in place */ static mrb_value -mrb_str_delete_bang(mrb_state *mrb, mrb_value str) +str_delete_bang(mrb_state *mrb, mrb_value str) { mrb_value pat; @@ -818,13 +784,9 @@ mrb_str_delete_bang(mrb_state *mrb, mrb_value str) * the end of a sequence or the end of a other_str. */ static mrb_value -mrb_str_count(mrb_state *mrb, mrb_value str) +str_count(mrb_state *mrb, mrb_value str) { mrb_value v_pat = mrb_nil_value(); - mrb_int i; - char *s; - mrb_int len; - mrb_int count = 0; struct tr_pattern pat = STATIC_TR_PATTERN; uint8_t bitmap[32]; @@ -833,22 +795,25 @@ mrb_str_count(mrb_state *mrb, mrb_value str) tr_compile_pattern(&pat, v_pat, bitmap); tr_free_pattern(mrb, &pat); - s = RSTRING_PTR(str); - len = RSTRING_LEN(str); - for (i = 0; i < len; i++) { + char *s = RSTRING_PTR(str); + mrb_int len = RSTRING_LEN(str); + mrb_int count = 0; + for (mrb_int i = 0; i < len; i++) { if (tr_bitmap_detect(bitmap, s[i])) count++; } return mrb_fixnum_value(count); } +/* Internal helper for String#hex - converts hex string to integer */ static mrb_value -mrb_str_hex(mrb_state *mrb, mrb_value self) +str_hex(mrb_state *mrb, mrb_value self) { return mrb_str_to_integer(mrb, self, 16, FALSE); } +/* Internal helper for String#oct - converts octal string to integer */ static mrb_value -mrb_str_oct(mrb_state *mrb, mrb_value self) +str_oct(mrb_state *mrb, mrb_value self) { return mrb_str_to_integer(mrb, self, 8, FALSE); } @@ -863,7 +828,7 @@ mrb_str_oct(mrb_state *mrb, mrb_value self) * a.chr #=> "a" */ static mrb_value -mrb_str_chr(mrb_state *mrb, mrb_value self) +str_chr(mrb_state *mrb, mrb_value self) { return mrb_str_substr(mrb, self, 0, 1); } @@ -872,9 +837,9 @@ mrb_str_chr(mrb_state *mrb, mrb_value self) * call-seq: * int.chr([encoding]) -> string * - * Returns a string containing the character represented by the +int+'s value - * according to +encoding+. +"ASCII-8BIT"+ (+"BINARY"+) and +"UTF-8"+ (only - * with +MRB_UTF8_STRING+) can be specified as +encoding+ (default is + * Returns a string containing the character represented by the `int`'s value + * according to `encoding`. +"ASCII-8BIT"+ (+"BINARY"+) and +"UTF-8"+ (only + * with `MRB_UTF8_STRING`) can be specified as `encoding` (default is * +"ASCII-8BIT"+). * * 65.chr #=> "A" @@ -883,19 +848,19 @@ mrb_str_chr(mrb_state *mrb, mrb_value self) * 230.chr("UTF-8") #=> "\u00E6" */ static mrb_value -mrb_int_chr(mrb_state *mrb, mrb_value num) +int_chr(mrb_state *mrb, mrb_value num) { mrb_value enc; mrb_bool enc_given; mrb_get_args(mrb, "|S?", &enc, &enc_given); if (!enc_given || - ENC_COMP_P(enc, ENC_ASCII_8BIT) || - ENC_COMP_P(enc, ENC_BINARY)) { + MRB_STR_CASECMP_P(enc, ENC_ASCII_8BIT) || + MRB_STR_CASECMP_P(enc, ENC_BINARY)) { return int_chr_binary(mrb, num); } #ifdef MRB_UTF8_STRING - else if (ENC_COMP_P(enc, ENC_UTF8)) { + else if (MRB_STR_CASECMP_P(enc, ENC_UTF8)) { return int_chr_utf8(mrb, num); } #endif @@ -912,23 +877,22 @@ mrb_int_chr(mrb_state *mrb, mrb_value num) * * Returns next sequence of the string; * - * a = "abc" - * a.succ #=> "abd" + * a = "bed" + * a.succ #=> "bee" */ static mrb_value -mrb_str_succ_bang(mrb_state *mrb, mrb_value self) +str_succ_bang(mrb_state *mrb, mrb_value self) { mrb_value result; - unsigned char *p, *e, *b, *t; const char *prepend; struct RString *s = mrb_str_ptr(self); - mrb_int l; if (RSTRING_LEN(self) == 0) return self; mrb_str_modify(mrb, s); - l = RSTRING_LEN(self); + mrb_int l = RSTRING_LEN(self); + unsigned char *p, *e, *b, *t; b = p = (unsigned char*) RSTRING_PTR(self); t = e = p + l; *(e--) = 0; @@ -994,86 +958,303 @@ mrb_str_succ_bang(mrb_state *mrb, mrb_value self) } static mrb_value -mrb_str_succ(mrb_state *mrb, mrb_value self) +str_succ(mrb_state *mrb, mrb_value self) { - mrb_value str; - - str = mrb_str_dup(mrb, self); - mrb_str_succ_bang(mrb, str); + mrb_value str = mrb_str_dup(mrb, self); + str_succ_bang(mrb, str); return str; } #ifdef MRB_UTF8_STRING extern const char mrb_utf8len_table[]; -static mrb_int -utf8code(unsigned char* p, mrb_int limit) +MRB_INLINE mrb_int +utf8code(mrb_state* mrb, const unsigned char* p, const unsigned char *e) { - mrb_int len; - - if (p[0] < 0x80) - return p[0]; + if (p[0] < 0x80) return p[0]; - len = mrb_utf8len_table[p[0]>>3]; - if (len <= limit && len > 1 && (p[1] & 0xc0) == 0x80) { + mrb_int len = mrb_utf8len_table[p[0]>>3]; + mrb_int cp = -1; + if (p+len <= e && len > 1 && (p[1] & 0xc0) == 0x80) { if (len == 2) - return ((p[0] & 0x1f) << 6) + (p[1] & 0x3f); - if ((p[2] & 0xc0) == 0x80) { + cp = ((p[0] & 0x1f) << 6) + (p[1] & 0x3f); + else if ((p[2] & 0xc0) == 0x80) { if (len == 3) - return ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) - + (p[2] & 0x3f); - if ((p[3] & 0xc0) == 0x80) { - if (len == 4) - return ((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12) - + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f); + cp = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f); + else if (len == 4 && (p[3] & 0xc0) == 0x80) { + cp = ((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12) + + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f); } } } + /* Reject overlong sequences, UTF-16 surrogates, and code points above + U+10FFFF (RFC 3629, Unicode D93b). */ + if (cp >= 0 && + ((len == 2 && cp >= 0x80) || + (len == 3 && cp >= 0x800 && (cp < 0xD800 || 0xDFFF < cp)) || + (len == 4 && cp >= 0x10000 && cp <= 0x10FFFF))) { + return cp; + } + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid UTF-8 byte sequence"); + /* not reached */ return -1; } static mrb_value -mrb_str_ord(mrb_state* mrb, mrb_value str) +str_ord(mrb_state* mrb, mrb_value str) { - if (RSTRING_LEN(str) == 0) + struct RString *s = mrb_str_ptr(str); + const unsigned char *p = (unsigned char*)RSTR_PTR(s); + const unsigned char *e = p + RSTR_LEN(s); + mrb_int c; + + if (p == e) { mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string"); - mrb_int c = utf8code((unsigned char*)RSTRING_PTR(str), RSTRING_LEN(str)); - if (c < 0) mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid UTF-8 byte sequence"); + } + if (RSTR_SINGLE_BYTE_P(s) || RSTR_BINARY_P(s)) { + c = p[0]; + } + else { + c = utf8code(mrb, p, e); + } return mrb_fixnum_value(c); } + +/* Returns the byte length of a valid UTF-8 char starting at p, or -1 for + any invalid sequence (illegal lead byte, truncated tail, invalid + continuation byte, overlong encoding, UTF-16 surrogate, or codepoint + above U+10FFFF). Like utf8code() but reports rather than raises. */ +static mrb_int +str_scrub_char_len(const unsigned char *p, const unsigned char *e) +{ + if (p[0] < 0x80) return 1; + mrb_int len = mrb_utf8len_table[p[0]>>3]; + if (len < 2 || len > e - p) return -1; + for (mrb_int i = 1; i < len; i++) { + if ((p[i] & 0xc0) != 0x80) return -1; + } + mrb_int cp; + if (len == 2) { + cp = ((p[0] & 0x1f) << 6) | (p[1] & 0x3f); + if (cp < 0x80) return -1; + } + else if (len == 3) { + cp = ((p[0] & 0x0f) << 12) | ((p[1] & 0x3f) << 6) | (p[2] & 0x3f); + if (cp < 0x800) return -1; + if (cp >= 0xD800 && cp <= 0xDFFF) return -1; + } + else { /* len == 4 */ + cp = ((p[0] & 0x07) << 18) | ((p[1] & 0x3f) << 12) + | ((p[2] & 0x3f) << 6) | (p[3] & 0x3f); + if (cp < 0x10000 || cp > 0x10FFFF) return -1; + } + return len; +} + +static void +str_scrub_validate_replacement(mrb_state *mrb, mrb_value repl) +{ + const unsigned char *p = (const unsigned char*)RSTRING_PTR(repl); + const unsigned char *e = p + RSTRING_LEN(repl); + while (p < e) { + mrb_int len = str_scrub_char_len(p, e); + if (len < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "replacement must be valid UTF-8"); + } + p += len; + } +} + +/* Core of String#scrub for the no-block case. Returns a new string with + each maximal run of invalid UTF-8 bytes replaced by `repl` (or U+FFFD + if `repl` is nil). Already-valid strings are returned via mrb_str_dup. */ +static mrb_value +str_scrub_core(mrb_state *mrb, mrb_value self) +{ + mrb_value repl = mrb_nil_value(); + mrb_get_args(mrb, "|S!", &repl); + + const char *replace; + mrb_int replace_len; + if (mrb_nil_p(repl)) { + replace = "\xEF\xBF\xBD"; /* U+FFFD REPLACEMENT CHARACTER */ + replace_len = 3; + } + else { + str_scrub_validate_replacement(mrb, repl); + replace = RSTRING_PTR(repl); + replace_len = RSTRING_LEN(repl); + } + + struct RString *s = mrb_str_ptr(self); + if (RSTR_SINGLE_BYTE_P(s) || RSTR_BINARY_P(s)) { + return mrb_str_dup(mrb, self); + } + + const unsigned char *p = (const unsigned char*)RSTR_PTR(s); + const unsigned char *e = p + RSTR_LEN(s); + const unsigned char *valid_start = p; + const unsigned char *q = p; + mrb_value result = mrb_nil_value(); /* lazily allocated on first invalid byte */ + + while (q < e) { + mrb_int len = str_scrub_char_len(q, e); + if (len < 0) { + if (mrb_nil_p(result)) { + result = mrb_str_new(mrb, NULL, 0); + } + mrb_str_cat(mrb, result, (const char*)valid_start, q - valid_start); + mrb_str_cat(mrb, result, replace, replace_len); + q++; + while (q < e && str_scrub_char_len(q, e) < 0) q++; + valid_start = q; + } + else { + q += len; + } + } + + if (mrb_nil_p(result)) { + return mrb_str_dup(mrb, self); /* already valid */ + } + mrb_str_cat(mrb, result, (const char*)valid_start, q - valid_start); + return result; +} + +/* Splits self into alternating valid/invalid byte runs and returns them + as an Array of strings ([valid, invalid, valid, ...], odd length). + Used by the block form of String#scrub in mrblib; the block can then + map each invalid run to a replacement of its choosing without the C + side having to call back into the VM. */ +static mrb_value +str_scrub_chunks(mrb_state *mrb, mrb_value self) +{ + mrb_value ary = mrb_ary_new(mrb); + struct RString *s = mrb_str_ptr(self); + if (RSTR_SINGLE_BYTE_P(s) || RSTR_BINARY_P(s)) { + mrb_ary_push(mrb, ary, mrb_str_dup(mrb, self)); + return ary; + } + const unsigned char *p = (const unsigned char*)RSTR_PTR(s); + const unsigned char *e = p + RSTR_LEN(s); + const unsigned char *valid_start = p; + const unsigned char *q = p; + while (q < e) { + mrb_int len = str_scrub_char_len(q, e); + if (len < 0) { + mrb_ary_push(mrb, ary, mrb_str_new(mrb, (const char*)valid_start, q - valid_start)); + const unsigned char *invalid_start = q; + q++; + while (q < e && str_scrub_char_len(q, e) < 0) q++; + mrb_ary_push(mrb, ary, mrb_str_new(mrb, (const char*)invalid_start, q - invalid_start)); + valid_start = q; + } + else { + q += len; + } + } + mrb_ary_push(mrb, ary, mrb_str_new(mrb, (const char*)valid_start, q - valid_start)); + return ary; +} + +/* Internal helper for String#codepoints - returns array of character codepoints */ +static mrb_value +str_codepoints(mrb_state *mrb, mrb_value str) +{ + struct RString *s = mrb_str_ptr(str); + const unsigned char *p = (unsigned char*)RSTR_PTR(s); + const unsigned char *e = p + RSTR_LEN(s); + + mrb->c->ci->mid = 0; + mrb_value result = mrb_ary_new(mrb); + if (RSTR_SINGLE_BYTE_P(s) || RSTR_BINARY_P(s)) { + while (p < e) { + mrb_ary_push(mrb, result, mrb_int_value(mrb, (mrb_int)*p)); + p++; + } + } + else { + while (p < e) { + mrb_int c = utf8code(mrb, p, e); + mrb_ary_push(mrb, result, mrb_int_value(mrb, c)); + p += mrb_utf8len_table[p[0]>>3]; + } + } + return result; +} #else static mrb_value -mrb_str_ord(mrb_state* mrb, mrb_value str) +str_ord(mrb_state* mrb, mrb_value str) { if (RSTRING_LEN(str) == 0) mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string"); return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[0]); } + +static mrb_value +str_codepoints(mrb_state *mrb, mrb_value self) +{ + char *p = RSTRING_PTR(self); + char *e = p + RSTRING_LEN(self); + + mrb->c->ci->mid = 0; + mrb_value result = mrb_ary_new(mrb); + while (p < e) { + mrb_ary_push(mrb, result, mrb_int_value(mrb, (mrb_int)*p)); + p++; + } + return result; +} + +/* Non-UTF-8 builds: scrub is a no-op. The replacement arg is accepted + (and validated only as a String) for API parity with the UTF-8 build. */ +static mrb_value +str_scrub_core(mrb_state *mrb, mrb_value self) +{ + mrb_value repl = mrb_nil_value(); + mrb_get_args(mrb, "|S!", &repl); + return mrb_str_dup(mrb, self); +} + +static mrb_value +str_scrub_chunks(mrb_state *mrb, mrb_value self) +{ + mrb_value ary = mrb_ary_new(mrb); + mrb_ary_push(mrb, ary, mrb_str_dup(mrb, self)); + return ary; +} #endif +static mrb_bool +str_prefix_p(mrb_state *mrb, mrb_value str, const char *prefix_ptr, mrb_int prefix_len) +{ + mrb_int str_len = RSTRING_LEN(str); + if (prefix_len > str_len) return FALSE; + return memcmp(RSTRING_PTR(str), prefix_ptr, prefix_len) == 0; +} + /* * call-seq: * str.delete_prefix!(prefix) -> self or nil * - * Deletes leading prefix from str, returning - * nil if no change was made. + * Deletes leading `prefix` from *str*, returning + * `nil` if no change was made. * * "hello".delete_prefix!("hel") #=> "lo" * "hello".delete_prefix!("llo") #=> nil */ static mrb_value -mrb_str_del_prefix_bang(mrb_state *mrb, mrb_value self) +str_del_prefix_bang(mrb_state *mrb, mrb_value self) { - mrb_int plen, slen; + mrb_int plen; const char *ptr; - char *s; - struct RString *str = RSTRING(self); mrb_get_args(mrb, "s", &ptr, &plen); - slen = RSTR_LEN(str); + struct RString *str = RSTRING(self); + mrb_int slen = RSTR_LEN(str); if (plen > slen) return mrb_nil_value(); - s = RSTR_PTR(str); - if (memcmp(s, ptr, plen) != 0) return mrb_nil_value(); + char *s = RSTR_PTR(str); + if (!str_prefix_p(mrb, self, ptr, plen)) return mrb_nil_value(); if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) { str->as.heap.ptr += plen; } @@ -1090,54 +1271,55 @@ mrb_str_del_prefix_bang(mrb_state *mrb, mrb_value self) * call-seq: * str.delete_prefix(prefix) -> new_str * - * Returns a copy of str with leading prefix deleted. + * Returns a copy of *str* with leading `prefix` deleted. * * "hello".delete_prefix("hel") #=> "lo" * "hello".delete_prefix("llo") #=> "hello" */ static mrb_value -mrb_str_del_prefix(mrb_state *mrb, mrb_value self) +str_del_prefix(mrb_state *mrb, mrb_value self) { - mrb_int plen, slen; + mrb_int plen; const char *ptr; mrb_get_args(mrb, "s", &ptr, &plen); - slen = RSTRING_LEN(self); + mrb_int slen = RSTRING_LEN(self); if (plen > slen) return mrb_str_dup(mrb, self); - if (memcmp(RSTRING_PTR(self), ptr, plen) != 0) + if (!str_prefix_p(mrb, self, ptr, plen)) return mrb_str_dup(mrb, self); return mrb_str_substr(mrb, self, plen, slen-plen); } +static mrb_bool +str_suffix_p(mrb_state *mrb, mrb_value str, const char *suffix_ptr, mrb_int suffix_len) +{ + mrb_int str_len = RSTRING_LEN(str); + if (suffix_len > str_len) return FALSE; + return memcmp(RSTRING_PTR(str) + (str_len - suffix_len), suffix_ptr, suffix_len) == 0; +} + /* * call-seq: * str.delete_suffix!(suffix) -> self or nil * - * Deletes trailing suffix from str, returning - * nil if no change was made. + * Deletes trailing `suffix` from *str*, returning + * `nil` if no change was made. * * "hello".delete_suffix!("llo") #=> "he" * "hello".delete_suffix!("hel") #=> nil */ static mrb_value -mrb_str_del_suffix_bang(mrb_state *mrb, mrb_value self) +str_del_suffix_bang(mrb_state *mrb, mrb_value self) { - mrb_int plen, slen; + mrb_int plen; const char *ptr; - char *s; - struct RString *str = RSTRING(self); mrb_get_args(mrb, "s", &ptr, &plen); - slen = RSTR_LEN(str); + struct RString *str = RSTRING(self); + mrb_check_frozen(mrb, str); + mrb_int slen = RSTR_LEN(str); if (plen > slen) return mrb_nil_value(); - s = RSTR_PTR(str); - if (memcmp(s+slen-plen, ptr, plen) != 0) return mrb_nil_value(); - if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) { - /* no need to modify string */ - } - else { - mrb_str_modify(mrb, str); - } + if (!str_suffix_p(mrb, self, ptr, plen)) return mrb_nil_value(); RSTR_SET_LEN(str, slen-plen); return self; } @@ -1146,21 +1328,21 @@ mrb_str_del_suffix_bang(mrb_state *mrb, mrb_value self) * call-seq: * str.delete_suffix(suffix) -> new_str * - * Returns a copy of str with leading suffix deleted. + * Returns a copy of *str* with leading `suffix` deleted. * * "hello".delete_suffix("hel") #=> "lo" * "hello".delete_suffix("llo") #=> "hello" */ static mrb_value -mrb_str_del_suffix(mrb_state *mrb, mrb_value self) +str_del_suffix(mrb_state *mrb, mrb_value self) { - mrb_int plen, slen; + mrb_int plen; const char *ptr; mrb_get_args(mrb, "s", &ptr, &plen); - slen = RSTRING_LEN(self); + mrb_int slen = RSTRING_LEN(self); if (plen > slen) return mrb_str_dup(mrb, self); - if (memcmp(RSTRING_PTR(self)+slen-plen, ptr, plen) != 0) + if (!str_suffix_p(mrb, self, ptr, plen)) return mrb_str_dup(mrb, self); return mrb_str_substr(mrb, self, 0, slen-plen); } @@ -1171,7 +1353,7 @@ mrb_str_del_suffix(mrb_state *mrb, mrb_value self) * call-seq: * str.casecmp(other_str) -> -1, 0, +1 or nil * - * Case-insensitive version of String#<=>. + * Case-insensitive version of `String#<=>`. * * "abcdef".casecmp("abcde") #=> 1 * "aBcDeF".casecmp("abcdef") #=> 0 @@ -1179,18 +1361,21 @@ mrb_str_del_suffix(mrb_state *mrb, mrb_value self) * "abcdef".casecmp("ABCDEF") #=> 0 */ static mrb_value -mrb_str_casecmp(mrb_state *mrb, mrb_value self) +str_casecmp(mrb_state *mrb, mrb_value self) { - mrb_value str; + mrb_value str = mrb_get_arg1(mrb); - mrb_get_args(mrb, "o", &str); if (!mrb_string_p(str)) return mrb_nil_value(); struct RString *s1 = mrb_str_ptr(self); struct RString *s2 = mrb_str_ptr(str); - mrb_int len = lesser(RSTR_LEN(s1), RSTR_LEN(s2)); + + mrb_int len1 = RSTR_LEN(s1); + mrb_int len2 = RSTR_LEN(s2); + mrb_int len = lesser(len1, len2); char *p1 = RSTR_PTR(s1); char *p2 = RSTR_PTR(s2); + if (p1 == p2) return mrb_fixnum_value(0); for (mrb_int i=0; i c2) return mrb_fixnum_value(1); if (c1 < c2) return mrb_fixnum_value(-1); } - if (RSTR_LEN(s1) == RSTR_LEN(s2)) return mrb_fixnum_value(0); - if (RSTR_LEN(s1) > RSTR_LEN(s2)) return mrb_fixnum_value(1); + if (len1 == len2) return mrb_fixnum_value(0); + if (len1 > len2) return mrb_fixnum_value(1); return mrb_fixnum_value(-1); } +#undef lesser /* * call-seq: @@ -1212,76 +1398,1066 @@ mrb_str_casecmp(mrb_state *mrb, mrb_value self) * false if they are not equal, and nil if other is not a string. */ static mrb_value -mrb_str_casecmp_p(mrb_state *mrb, mrb_value self) +str_casecmp_p(mrb_state *mrb, mrb_value self) { - mrb_value c = mrb_str_casecmp(mrb, self); + mrb_value c = str_casecmp(mrb, self); if (mrb_nil_p(c)) return c; return mrb_bool_value(mrb_fixnum(c) == 0); } +/* Internal helper for String#lines - splits string into array of lines */ static mrb_value -mrb_str_lines(mrb_state *mrb, mrb_value self) +str_lines(mrb_state *mrb, mrb_value self) { - mrb_value result; - int ai; - mrb_int len; char *b = RSTRING_PTR(self); char *p = b, *t; char *e = b + RSTRING_LEN(self); mrb->c->ci->mid = 0; - result = mrb_ary_new(mrb); - ai = mrb_gc_arena_save(mrb); + mrb_value result = mrb_ary_new(mrb); + int ai = mrb_gc_arena_save(mrb); while (p < e) { t = p; while (p < e && *p != '\n') p++; if (*p == '\n') p++; - len = (mrb_int) (p - t); + mrb_int len = (mrb_int) (p - t); mrb_ary_push(mrb, result, mrb_str_new(mrb, t, len)); mrb_gc_arena_restore(mrb, ai); } return result; } -void -mrb_mruby_string_ext_gem_init(mrb_state* mrb) +/* + * call-seq: + * +string -> new_string or self + * + * Returns `self` if `self` is not frozen. + * + * Otherwise returns `self.dup`, which is not frozen. + */ +static mrb_value +str_uplus(mrb_state *mrb, mrb_value str) +{ + if (mrb_frozen_p(mrb_obj_ptr(str))) { + return mrb_str_dup(mrb, str); + } + else { + return str; + } +} + +/* + * call-seq: + * -string -> frozen_string + * + * Returns a frozen, possibly pre-existing copy of the string. + * + */ +static mrb_value +str_uminus(mrb_state *mrb, mrb_value str) +{ + if (mrb_frozen_p(mrb_obj_ptr(str))) { + return str; + } + return mrb_obj_freeze(mrb, mrb_str_dup(mrb, str)); +} + +/* Internal helper for String#ascii_only? - checks if string contains only ASCII characters */ +static mrb_value +str_ascii_only_p(mrb_state *mrb, mrb_value str) +{ + struct RString *s = mrb_str_ptr(str); + const char *p = RSTR_PTR(s); + const char *e = p + RSTR_LEN(s); + + while (p < e) { + if (*p & 0x80) return mrb_false_value(); + p++; + } + mrb_str_ptr(str)->flags |= MRB_STR_SINGLE_BYTE; + return mrb_true_value(); +} + +/* Internal helper for String#b - returns binary encoded copy of string */ +static mrb_value +str_b(mrb_state *mrb, mrb_value self) +{ + mrb_value str = mrb_str_dup(mrb, self); + mrb_str_ptr(str)->flags |= MRB_STR_BINARY; + return str; +} + +/* + * Check if character is whitespace (space, tab, newline, carriage return, form feed, vertical tab) + */ +static inline mrb_bool +is_whitespace(char c) +{ + return (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'); +} + +/* + * Check if character is whitespace or null (for rstrip) + */ +static inline mrb_bool +is_whitespace_or_null(char c) +{ + return (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v' || c == '\0'); +} + +/* + * call-seq: + * str.lstrip -> new_str + * + * Returns a copy of str with leading whitespace removed. + * + * " hello ".lstrip #=> "hello " + * "hello".lstrip #=> "hello" + */ +static mrb_value +str_lstrip(mrb_state *mrb, mrb_value self) +{ + struct RString *s = mrb_str_ptr(self); + const char *ptr = RSTR_PTR(s); + mrb_int len = RSTR_LEN(s); + mrb_int start = 0; + + /* Find first non-whitespace character */ + while (start < len && is_whitespace(ptr[start])) { + start++; + } + + /* Return empty string if all whitespace */ + if (start >= len) { + return mrb_str_new_lit(mrb, ""); + } + + /* Return substring from first non-whitespace to end */ + return mrb_str_substr(mrb, self, start, len - start); +} + +/* + * call-seq: + * str.rstrip -> new_str + * + * Returns a copy of str with trailing whitespace removed. + * + * " hello ".rstrip #=> " hello" + * "hello".rstrip #=> "hello" + */ +static mrb_value +str_rstrip(mrb_state *mrb, mrb_value self) { - struct RClass * s = mrb->string_class; - - mrb_define_method(mrb, s, "dump", mrb_str_dump, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "swapcase!", mrb_str_swapcase_bang, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "concat", mrb_str_concat_m, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "<<", mrb_str_concat_m, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "count", mrb_str_count, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "tr", mrb_str_tr, MRB_ARGS_REQ(2)); - mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2)); - mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2)); - mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2)); - mrb_define_method(mrb, s, "squeeze", mrb_str_squeeze, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, s, "squeeze!", mrb_str_squeeze_bang, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, s, "delete", mrb_str_delete, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "delete!", mrb_str_delete_bang, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST()); - mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST()); - mrb_define_method(mrb, s, "hex", mrb_str_hex, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "oct", mrb_str_oct, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "succ", mrb_str_succ, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "succ!", mrb_str_succ_bang, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "next", mrb_str_succ, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "next!", mrb_str_succ_bang, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "delete_prefix!", mrb_str_del_prefix_bang, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "delete_prefix", mrb_str_del_prefix, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "delete_suffix!", mrb_str_del_suffix_bang, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "delete_suffix", mrb_str_del_suffix, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "casecmp", mrb_str_casecmp, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "casecmp?", mrb_str_casecmp_p, MRB_ARGS_REQ(1)); - - mrb_define_method(mrb, s, "__lines", mrb_str_lines, MRB_ARGS_NONE()); - - mrb_define_method(mrb, mrb->integer_class, "chr", mrb_int_chr, MRB_ARGS_OPT(1)); + struct RString *s = mrb_str_ptr(self); + const char *ptr = RSTR_PTR(s); + mrb_int len = RSTR_LEN(s); + mrb_int end = len; + + /* Find last non-whitespace character */ + while (end > 0 && is_whitespace_or_null(ptr[end - 1])) { + end--; + } + + /* Return empty string if all whitespace */ + if (end <= 0) { + return mrb_str_new_lit(mrb, ""); + } + + /* Return substring from start to last non-whitespace */ + return mrb_str_substr(mrb, self, 0, end); +} + +/* + * call-seq: + * str.strip -> new_str + * + * Returns a copy of str with leading and trailing whitespace removed. + * + * " hello ".strip #=> "hello" + * "\tgoodbye\r\n".strip #=> "goodbye" + */ +static mrb_value +str_strip(mrb_state *mrb, mrb_value self) +{ + struct RString *s = mrb_str_ptr(self); + const char *ptr = RSTR_PTR(s); + mrb_int len = RSTR_LEN(s); + mrb_int start = 0; + mrb_int end = len; + + /* Find first non-whitespace character */ + while (start < len && is_whitespace(ptr[start])) { + start++; + } + + /* Find last non-whitespace character */ + while (end > start && is_whitespace_or_null(ptr[end - 1])) { + end--; + } + + /* Return empty string if all whitespace */ + if (start >= end) { + return mrb_str_new_lit(mrb, ""); + } + + /* Return substring from first to last non-whitespace */ + return mrb_str_substr(mrb, self, start, end - start); +} + +/* + * call-seq: + * str.lstrip! -> self or nil + * + * Removes leading whitespace from str, returning nil if no change was made. + * + * " hello ".lstrip! #=> "hello " + * "hello".lstrip! #=> nil + */ +static mrb_value +str_lstrip_bang(mrb_state *mrb, mrb_value self) +{ + struct RString *s = mrb_str_ptr(self); + char *ptr = RSTR_PTR(s); + mrb_int len = RSTR_LEN(s); + mrb_int start = 0; + + mrb_check_frozen(mrb, mrb_obj_ptr(self)); + mrb_str_modify(mrb, s); + + /* Find first non-whitespace character */ + while (start < len && is_whitespace(ptr[start])) { + start++; + } + + /* No change needed */ + if (start == 0) { + return mrb_nil_value(); + } + + /* Move remaining characters to beginning */ + if (start < len) { + memmove(ptr, ptr + start, len - start); + RSTR_SET_LEN(s, len - start); + ptr[len - start] = '\0'; + } + else { + /* All whitespace - make empty */ + RSTR_SET_LEN(s, 0); + ptr[0] = '\0'; + } + + return self; +} + +/* + * call-seq: + * str.rstrip! -> self or nil + * + * Removes trailing whitespace from str, returning nil if no change was made. + * + * " hello ".rstrip! #=> " hello" + * "hello".rstrip! #=> nil + */ +static mrb_value +str_rstrip_bang(mrb_state *mrb, mrb_value self) +{ + struct RString *s = mrb_str_ptr(self); + char *ptr = RSTR_PTR(s); + mrb_int len = RSTR_LEN(s); + mrb_int end = len; + + mrb_check_frozen(mrb, mrb_obj_ptr(self)); + mrb_str_modify(mrb, s); + + /* Find last non-whitespace character */ + while (end > 0 && is_whitespace_or_null(ptr[end - 1])) { + end--; + } + + /* No change needed */ + if (end == len) { + return mrb_nil_value(); + } + + /* Truncate string */ + RSTR_SET_LEN(s, end); + ptr[end] = '\0'; + + return self; +} + +/* + * call-seq: + * str.strip! -> self or nil + * + * Removes leading and trailing whitespace from str, returning nil if no change was made. + * + * " hello ".strip! #=> "hello" + * "hello".strip! #=> nil + */ +static mrb_value +str_strip_bang(mrb_state *mrb, mrb_value self) +{ + struct RString *s = mrb_str_ptr(self); + char *ptr = RSTR_PTR(s); + mrb_int len = RSTR_LEN(s); + mrb_int start = 0; + mrb_int end = len; + mrb_bool changed = FALSE; + + mrb_check_frozen(mrb, mrb_obj_ptr(self)); + mrb_str_modify(mrb, s); + + /* Find first non-whitespace character */ + while (start < len && is_whitespace(ptr[start])) { + start++; + } + + /* Find last non-whitespace character */ + while (end > start && is_whitespace_or_null(ptr[end - 1])) { + end--; + } + + /* Check if any changes needed */ + if (start > 0) { + changed = TRUE; + if (start < end) { + memmove(ptr, ptr + start, end - start); + } + } + + if (end != len) { + changed = TRUE; + } + + if (!changed) { + return mrb_nil_value(); + } + + /* Set new length */ + RSTR_SET_LEN(s, end - start); + ptr[end - start] = '\0'; + + return self; +} + +/* Internal helper to count UTF-8 characters in a string using mruby's standard function */ +static mrb_int +str_char_count(mrb_value str) +{ +#ifdef MRB_UTF8_STRING + struct RString *s = mrb_str_ptr(str); + + if (RSTR_SINGLE_BYTE_P(s) || RSTR_BINARY_P(s)) { + /* ASCII/Binary: each byte is a character */ + return RSTR_LEN(s); + } + + /* UTF-8: use mruby's standard UTF-8 character counting function */ + return mrb_utf8_strlen(RSTR_PTR(s), RSTR_LEN(s)); +#else + /* Non-UTF8 build: treat as single bytes */ + return RSTRING_LEN(str); +#endif +} + +/* Internal fast path for String#chars - returns array of individual characters */ +static mrb_value +str_chars_ary(mrb_state *mrb, mrb_value self) +{ + struct RString *s = mrb_str_ptr(self); + const unsigned char *p = (unsigned char*)RSTR_PTR(s); + const unsigned char *e = p + RSTR_LEN(s); + + /* Estimate character count for array pre-allocation */ + mrb_int estimated_chars = RSTR_LEN(s); + if (!RSTR_SINGLE_BYTE_P(s) && !RSTR_BINARY_P(s)) { + estimated_chars = estimated_chars / 2; /* rough estimate for UTF-8 */ + } + mrb_value result = mrb_ary_new_capa(mrb, estimated_chars); + + if (RSTR_SINGLE_BYTE_P(s) || RSTR_BINARY_P(s)) { + /* ASCII/Binary: each byte is a character */ + while (p < e) { + mrb_value char_str = mrb_str_new(mrb, (char*)p, 1); + mrb_ary_push(mrb, result, char_str); + p++; + } + } + else { +#ifdef MRB_UTF8_STRING + /* UTF-8: handle multi-byte characters */ + while (p < e) { + mrb_int char_len = mrb_utf8len_table[p[0] >> 3]; + if (char_len == 0 || char_len > 4 || p + char_len > e) { + /* Invalid UTF-8, treat as single byte */ + char_len = 1; + } + else { + /* Validate UTF-8 sequence */ + mrb_bool valid = TRUE; + if (char_len > 1) { + for (mrb_int i = 1; i < char_len; i++) { + if ((p[i] & 0xC0) != 0x80) { + valid = FALSE; + break; + } + } + } + if (!valid) { + char_len = 1; + } + } + mrb_value char_str = mrb_str_new(mrb, (char*)p, char_len); + mrb_ary_push(mrb, result, char_str); + p += char_len; + } +#else + /* Non-UTF8 build: treat as single bytes */ + while (p < e) { + mrb_value char_str = mrb_str_new(mrb, (char*)p, 1); + mrb_ary_push(mrb, result, char_str); + p++; + } +#endif + } + + return result; +} + +/* + * call-seq: + * str.ljust(integer, padstr=' ') -> new_str + * + * If integer is greater than the length of str, returns a new + * String of length integer with str left justified and padded with padstr; + * otherwise, returns str. + */ +static mrb_value +str_ljust_core(mrb_state *mrb, mrb_value self) +{ + mrb_int width; + mrb_value padstr = mrb_str_new_lit(mrb, " "); + + mrb_get_args(mrb, "i|S", &width, &padstr); + + if (RSTRING_LEN(padstr) == 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "zero width padding"); + } + + mrb_int char_len = str_char_count(self); + if (width <= char_len) { + return mrb_str_dup(mrb, self); + } + + mrb_int padsize = width - char_len; + mrb_int pad_char_len = str_char_count(padstr); + if (pad_char_len == 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "zero width padding"); + } + + /* Build padding string by repeating padstr */ + mrb_value padding = mrb_str_new_lit(mrb, ""); + mrb_int chars_needed = padsize; + while (chars_needed > 0) { + if (chars_needed >= pad_char_len) { + mrb_str_cat_str(mrb, padding, padstr); + chars_needed -= pad_char_len; + } + else { + /* Need partial padding - use substr to get exact characters */ + mrb_value partial = mrb_str_substr(mrb, padstr, 0, chars_needed); + mrb_str_cat_str(mrb, padding, partial); + chars_needed = 0; + } + } + + return mrb_str_cat_str(mrb, mrb_str_dup(mrb, self), padding); +} + +/* + * call-seq: + * str.rjust(integer, padstr=' ') -> new_str + * + * If integer is greater than the length of str, returns a new + * String of length integer with str right justified and padded with padstr; + * otherwise, returns str. + */ +static mrb_value +str_rjust_core(mrb_state *mrb, mrb_value self) +{ + mrb_int width; + mrb_value padstr = mrb_str_new_lit(mrb, " "); + + mrb_get_args(mrb, "i|S", &width, &padstr); + + if (RSTRING_LEN(padstr) == 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "zero width padding"); + } + + mrb_int char_len = str_char_count(self); + if (width <= char_len) { + return mrb_str_dup(mrb, self); + } + + mrb_int padsize = width - char_len; + mrb_int pad_char_len = str_char_count(padstr); + if (pad_char_len == 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "zero width padding"); + } + + /* Build padding string by repeating padstr */ + mrb_value padding = mrb_str_new_lit(mrb, ""); + mrb_int chars_needed = padsize; + while (chars_needed > 0) { + if (chars_needed >= pad_char_len) { + mrb_str_cat_str(mrb, padding, padstr); + chars_needed -= pad_char_len; + } + else { + /* Need partial padding - use substr to get exact characters */ + mrb_value partial = mrb_str_substr(mrb, padstr, 0, chars_needed); + mrb_str_cat_str(mrb, padding, partial); + chars_needed = 0; + } + } + + return mrb_str_cat_str(mrb, padding, self); +} + +/* + * call-seq: + * str.center(width, padstr=' ') -> new_str + * + * Centers str in width. If width is greater than the length of str, + * returns a new String of length width with str centered and padded with + * padstr; otherwise, returns str. + */ +static mrb_value +str_center_core(mrb_state *mrb, mrb_value self) +{ + mrb_int width; + mrb_value padstr = mrb_str_new_lit(mrb, " "); + + mrb_get_args(mrb, "i|S", &width, &padstr); + + if (RSTRING_LEN(padstr) == 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "zero width padding"); + } + + mrb_int char_len = str_char_count(self); + if (width <= char_len) { + return mrb_str_dup(mrb, self); + } + + mrb_int total_pad = width - char_len; + mrb_int left_pad = total_pad / 2; + mrb_int right_pad = total_pad - left_pad; + + mrb_int pad_char_len = str_char_count(padstr); + if (pad_char_len == 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "zero width padding"); + } + + /* Build left padding */ + mrb_value left_padding = mrb_str_new_lit(mrb, ""); + mrb_int chars_needed = left_pad; + while (chars_needed > 0) { + if (chars_needed >= pad_char_len) { + mrb_str_cat_str(mrb, left_padding, padstr); + chars_needed -= pad_char_len; + } + else { + mrb_value partial = mrb_str_substr(mrb, padstr, 0, chars_needed); + mrb_str_cat_str(mrb, left_padding, partial); + chars_needed = 0; + } + } + + /* Build right padding */ + mrb_value right_padding = mrb_str_new_lit(mrb, ""); + chars_needed = right_pad; + while (chars_needed > 0) { + if (chars_needed >= pad_char_len) { + mrb_str_cat_str(mrb, right_padding, padstr); + chars_needed -= pad_char_len; + } + else { + mrb_value partial = mrb_str_substr(mrb, padstr, 0, chars_needed); + mrb_str_cat_str(mrb, right_padding, partial); + chars_needed = 0; + } + } + + mrb_value result = mrb_str_cat_str(mrb, left_padding, self); + return mrb_str_cat_str(mrb, result, right_padding); +} + +#ifdef MRB_UTF8_STRING +/* + * Given a character index, find the byte offset in a UTF-8 string. + * Returns -1 if the character index is out of bounds. + */ +static mrb_int +str_char_to_byte_offset(mrb_value str, mrb_int char_index) +{ + struct RString *s = mrb_str_ptr(str); + const char *p = RSTR_PTR(s); + mrb_int byte_len = RSTR_LEN(s); + + if (RSTR_SINGLE_BYTE_P(s) || RSTR_BINARY_P(s)) { + return char_index; + } + + if (char_index < 0) return -1; + + mrb_int byte_offset = 0; + mrb_int current_char_index = 0; + while (byte_offset < byte_len && current_char_index < char_index) { + mrb_int char_len = mrb_utf8len(p + byte_offset, p + byte_len - byte_offset); + if (char_len == 0) break; + byte_offset += char_len; + current_char_index++; + } + + if (current_char_index < char_index) return -1; + return byte_offset; +} + +/* + * Given a starting character index and a character length, find the byte length. + */ +static mrb_int +str_chars_to_byte_len(mrb_value str, mrb_int char_start, mrb_int char_len) +{ + struct RString *s = mrb_str_ptr(str); + const char *p = RSTR_PTR(s); + mrb_int str_byte_len = RSTR_LEN(s); + + if (RSTR_SINGLE_BYTE_P(s) || RSTR_BINARY_P(s)) { + return char_len; + } + + mrb_int start_byte_offset = str_char_to_byte_offset(str, char_start); + if (start_byte_offset == -1) return 0; + + mrb_int byte_offset = start_byte_offset; + mrb_int current_char_len = 0; + while (byte_offset < str_byte_len && current_char_len < char_len) { + mrb_int cl = mrb_utf8len(p + byte_offset, p + str_byte_len - byte_offset); + if (cl == 0) break; + byte_offset += cl; + current_char_len++; + } + + return byte_offset - start_byte_offset; +} +#endif + +static mrb_value +mrb_str_slice_bang(mrb_state *mrb, mrb_value self) +{ + mrb_check_frozen(mrb, mrb_obj_ptr(self)); + + mrb_value arg1, arg2; + mrb_int argc = mrb_get_args(mrb, "o|o", &arg1, &arg2); + + struct RString *str = mrb_str_ptr(self); + const char *ptr = RSTRING_PTR(self); + +#ifdef MRB_UTF8_STRING + mrb_int str_len = str_char_count(self); +#else + mrb_int str_len = RSTRING_LEN(self); +#endif + + mrb_int beg, len; + + if (argc == 1) { + if (mrb_string_p(arg1)) { + mrb_int pos = mrb_str_index(mrb, self, RSTRING_PTR(arg1), RSTRING_LEN(arg1), 0); + if (pos == -1) return mrb_nil_value(); +#ifdef MRB_UTF8_STRING + beg = str_char_count(mrb_str_substr(mrb, self, 0, pos)); + len = str_char_count(arg1); +#else + beg = pos; + len = RSTRING_LEN(arg1); +#endif + } + else if (mrb_range_p(arg1)) { + if (mrb_range_beg_len(mrb, arg1, &beg, &len, str_len, TRUE) != MRB_RANGE_OK) { + return mrb_nil_value(); + } + } + else { + beg = mrb_as_int(mrb, arg1); + if (beg < 0) beg += str_len; + if (beg < 0 || beg >= str_len) return mrb_nil_value(); + len = 1; + } + } + else { // argc == 2 + beg = mrb_as_int(mrb, arg1); + len = mrb_as_int(mrb, arg2); + if (beg < 0) beg += str_len; + if (len < 0) return mrb_nil_value(); + if (beg < 0 || beg > str_len) return mrb_nil_value(); + } + + if (beg > str_len) return mrb_nil_value(); + if (beg + len > str_len) { + len = str_len - beg; + } + if (len < 0) len = 0; + +#ifdef MRB_UTF8_STRING + mrb_int byte_beg = str_char_to_byte_offset(self, beg); + mrb_int byte_len = str_chars_to_byte_len(self, beg, len); +#else + mrb_int byte_beg = beg; + mrb_int byte_len = len; +#endif + + if (byte_beg < 0 || byte_beg > RSTRING_LEN(self) || byte_beg + byte_len > RSTRING_LEN(self)) { + return mrb_nil_value(); + } + + mrb_value result = mrb_str_new(mrb, RSTRING_PTR(self) + byte_beg, byte_len); + + mrb_str_modify(mrb, str); + ptr = RSTRING_PTR(self); + memmove((char*)ptr + byte_beg, ptr + byte_beg + byte_len, RSTRING_LEN(self) - byte_beg - byte_len); + RSTR_SET_LEN(str, RSTRING_LEN(self) - byte_len); + + return result; +} + +/* + * call-seq: + * string.clear -> string + * + * Makes string empty. + * + * a = "abcde" + * a.clear #=> "" + */ +static mrb_value +str_clear(mrb_state *mrb, mrb_value self) +{ + struct RString *s = mrb_str_ptr(self); + mrb_str_modify(mrb, s); + RSTR_SET_LEN(s, 0); + return self; +} + +/* + * call-seq: + * str.partition(sep) -> [head, sep, tail] + * + * Searches for the first occurrence of `sep` in `str`. If `sep` is found, + * returns a 3-element array containing the part of `str` before `sep`, + * `sep` itself, and the part of `str` after `sep`. + * + * If `sep` is not found, returns a 3-element array containing `str`, + * an empty string, and an empty string. + * + * "hello world".partition(" ") #=> ["hello", " ", "world"] + * "hello world".partition("o") #=> ["hell", "o", " world"] + * "hello world".partition("x") #=> ["hello world", "", ""] + */ +static mrb_value +str_partition(mrb_state *mrb, mrb_value self) +{ + mrb_value sep; + mrb_get_args(mrb, "S", &sep); + + mrb_int self_len = RSTRING_LEN(self); + mrb_int sep_len = RSTRING_LEN(sep); + const char *self_ptr = RSTRING_PTR(self); + const char *sep_ptr = RSTRING_PTR(sep); + + mrb_value result_ary = mrb_ary_new_capa(mrb, 3); + + if (sep_len == 0) { + mrb_ary_push(mrb, result_ary, mrb_str_new_lit(mrb, "")); + mrb_ary_push(mrb, result_ary, mrb_str_new_lit(mrb, "")); + mrb_ary_push(mrb, result_ary, mrb_str_dup(mrb, self)); + return result_ary; + } + + const char *found_ptr = NULL; + for (mrb_int i = 0; i <= self_len - sep_len; ++i) { + if (memcmp(self_ptr + i, sep_ptr, sep_len) == 0) { + found_ptr = self_ptr + i; + break; + } + } + + if (found_ptr) { + mrb_int pre_len = found_ptr - self_ptr; + mrb_int post_len = self_len - pre_len - sep_len; + + mrb_ary_push(mrb, result_ary, mrb_str_new(mrb, self_ptr, pre_len)); + mrb_ary_push(mrb, result_ary, mrb_str_dup(mrb, sep)); + mrb_ary_push(mrb, result_ary, mrb_str_new(mrb, found_ptr + sep_len, post_len)); + } + else { + mrb_ary_push(mrb, result_ary, mrb_str_dup(mrb, self)); + mrb_ary_push(mrb, result_ary, mrb_str_new_lit(mrb, "")); + mrb_ary_push(mrb, result_ary, mrb_str_new_lit(mrb, "")); + } + + return result_ary; +} + +/* + * call-seq: + * str.rpartition(sep) -> [head, sep, tail] + * + * Searches for the last occurrence of `sep` in `str`. If `sep` is found, + * returns a 3-element array containing the part of `str` before `sep`, + * `sep` itself, and the part of `str` after `sep`. + * + * If `sep` is not found, returns a 3-element array containing an empty string, + * an empty string, and `str`. + * + * "hello world".rpartition(" ") #=> ["hello", " ", "world"] + * "hello world".rpartition("o") #=> ["hello w", "o", "rld"] + * "hello world".rpartition("x") #=> ["", "", "hello world"] + */ +static mrb_value +str_rpartition(mrb_state *mrb, mrb_value self) +{ + mrb_value sep; + mrb_get_args(mrb, "S", &sep); + + mrb_int self_len = RSTRING_LEN(self); + mrb_int sep_len = RSTRING_LEN(sep); + const char *self_ptr = RSTRING_PTR(self); + const char *sep_ptr = RSTRING_PTR(sep); + + mrb_value result_ary = mrb_ary_new_capa(mrb, 3); + + if (sep_len == 0) { + mrb_ary_push(mrb, result_ary, mrb_str_dup(mrb, self)); + mrb_ary_push(mrb, result_ary, mrb_str_new_lit(mrb, "")); + mrb_ary_push(mrb, result_ary, mrb_str_new_lit(mrb, "")); + return result_ary; + } + + const char *found_ptr = NULL; + for (mrb_int i = self_len - sep_len; i >= 0; --i) { + if (memcmp(self_ptr + i, sep_ptr, sep_len) == 0) { + found_ptr = self_ptr + i; + break; + } + } + + if (found_ptr) { + mrb_int pre_len = found_ptr - self_ptr; + mrb_int post_len = self_len - pre_len - sep_len; + + mrb_ary_push(mrb, result_ary, mrb_str_new(mrb, self_ptr, pre_len)); + mrb_ary_push(mrb, result_ary, mrb_str_dup(mrb, sep)); + mrb_ary_push(mrb, result_ary, mrb_str_new(mrb, found_ptr + sep_len, post_len)); + } + else { + mrb_ary_push(mrb, result_ary, mrb_str_new_lit(mrb, "")); + mrb_ary_push(mrb, result_ary, mrb_str_new_lit(mrb, "")); + mrb_ary_push(mrb, result_ary, mrb_str_dup(mrb, self)); + } + + return result_ary; +} + +/* + * call-seq: + * str.insert(index, other_str) -> str + * + * Inserts *other_str* before the character at the given + * *index*, modifying *str*. Negative indices count from the + * end of the string, and insert after the given character. + * The intent is insert *aString* so that it starts at the given + * *index*. + * + * "abcd".insert(0, 'X') #=> "Xabcd" + * "abcd".insert(3, 'X') #=> "abcXd" + * "abcd".insert(4, 'X') #=> "abcdX" + * "abcd".insert(-3, 'X') #=> "abXcd" + * "abcd".insert(-1, 'X') #=> "abcdX" + */ +static mrb_value +str_insert(mrb_state *mrb, mrb_value self) +{ + mrb_int idx; + mrb_value str_to_insert; + mrb_get_args(mrb, "iS", &idx, &str_to_insert); + + struct RString *s = mrb_str_ptr(self); + mrb_int self_len = RSTR_LEN(s); + mrb_int insert_len = RSTRING_LEN(str_to_insert); + + mrb_check_frozen(mrb, s); + + if (idx < 0) { + idx = self_len + idx + 1; + } + + if (idx < 0 || idx > self_len) { + mrb_raisef(mrb, E_INDEX_ERROR, "index %S out of string", mrb_int_value(mrb, idx)); + } + + mrb_str_modify(mrb, s); + mrb_str_resize(mrb, self, self_len + insert_len); + + char *p = RSTRING_PTR(self); + memmove(p + idx + insert_len, p + idx, self_len - idx); + memcpy(p + idx, RSTRING_PTR(str_to_insert), insert_len); + + return self; +} + +/* + * call-seq: + * str.prepend(*other_str) -> str + * + * Prepend---Prepend the given strings to *str*. + * + * a = "world" + * a.prepend("hello ") #=> "hello world" + * a #=> "hello world" + * + * Multiple arguments are prepended in order: + * + * a = "world" + * a.prepend("hello ", "beautiful ") #=> "hello beautiful world" + */ +static mrb_value +str_prepend(mrb_state *mrb, mrb_value self) +{ + mrb_value *argv; + mrb_int argc; + mrb_get_args(mrb, "*", &argv, &argc); + + if (argc == 0) { + return self; + } + + struct RString *s = mrb_str_ptr(self); + mrb_check_frozen(mrb, s); + + /* Calculate total length needed for all prepended strings */ + mrb_int total_prepend_len = 0; + for (mrb_int i = 0; i < argc; i++) { + mrb_ensure_string_type(mrb, argv[i]); + total_prepend_len += RSTRING_LEN(argv[i]); + } + + if (total_prepend_len == 0) { + return self; + } + + mrb_int self_len = RSTRING_LEN(self); + mrb_str_modify(mrb, s); + mrb_str_resize(mrb, self, self_len + total_prepend_len); + + char *p = RSTRING_PTR(self); + + /* Move original content to the end. The original self data now lives + at p + total_prepend_len, which we use as the source for any + self-referencing arguments (e.g., s.prepend(s, s)) to avoid reading + data that has already been overwritten by earlier copies. */ + memmove(p + total_prepend_len, p, self_len); + + /* Copy prepended strings in order */ + mrb_int offset = 0; + for (mrb_int i = 0; i < argc; i++) { + const char *src; + mrb_int arg_len; + if (mrb_obj_eq(mrb, self, argv[i])) { + src = p + total_prepend_len; + arg_len = self_len; + } + else { + src = RSTRING_PTR(argv[i]); + arg_len = RSTRING_LEN(argv[i]); + } + if (arg_len > 0) { + memcpy(p + offset, src, arg_len); + offset += arg_len; + } + } + + return self; +} + +/* ---------------------------*/ +static const mrb_mt_entry string_ext_rom_entries[] = { + MRB_MT_ENTRY(mrb_str_dump, MRB_SYM(dump), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_swapcase_bang, MRB_SYM_B(swapcase), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_str_slice_bang, MRB_SYM_B(slice), MRB_ARGS_ARG(1,1)), + MRB_MT_ENTRY(str_swapcase, MRB_SYM(swapcase), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_clear, MRB_SYM(clear), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_concat_m, MRB_OPSYM(lshift), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_concat_m, MRB_SYM(concat), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_append_as_bytes, MRB_SYM(append_as_bytes), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_count, MRB_SYM(count), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_tr_m, MRB_SYM(tr), MRB_ARGS_REQ(2)), + MRB_MT_ENTRY(str_partition, MRB_SYM(partition), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_rpartition, MRB_SYM(rpartition), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_insert, MRB_SYM(insert), MRB_ARGS_REQ(2)), + MRB_MT_ENTRY(str_prepend, MRB_SYM(prepend), MRB_ARGS_REST()), + MRB_MT_ENTRY(str_tr_bang, MRB_SYM_B(tr), MRB_ARGS_REQ(2)), + MRB_MT_ENTRY(str_tr_s, MRB_SYM(tr_s), MRB_ARGS_REQ(2)), + MRB_MT_ENTRY(str_tr_s_bang, MRB_SYM_B(tr_s), MRB_ARGS_REQ(2)), + MRB_MT_ENTRY(str_squeeze_m, MRB_SYM(squeeze), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(str_squeeze_bang, MRB_SYM_B(squeeze), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(str_delete_m, MRB_SYM(delete), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_delete_bang, MRB_SYM_B(delete), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_start_with, MRB_SYM_Q(start_with), MRB_ARGS_REST()), + MRB_MT_ENTRY(str_end_with, MRB_SYM_Q(end_with), MRB_ARGS_REST()), + MRB_MT_ENTRY(str_hex, MRB_SYM(hex), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_oct, MRB_SYM(oct), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_chr, MRB_SYM(chr), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_succ, MRB_SYM(succ), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_succ_bang, MRB_SYM_B(succ), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_succ, MRB_SYM(next), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_succ_bang, MRB_SYM_B(next), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_ord, MRB_SYM(ord), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_del_prefix_bang, MRB_SYM_B(delete_prefix), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_del_prefix, MRB_SYM(delete_prefix), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_del_suffix_bang, MRB_SYM_B(delete_suffix), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_del_suffix, MRB_SYM(delete_suffix), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_casecmp, MRB_SYM(casecmp), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_casecmp_p, MRB_SYM_Q(casecmp), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(str_uplus, MRB_OPSYM(plus), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_uminus, MRB_OPSYM(minus), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_ascii_only_p, MRB_SYM_Q(ascii_only), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_b, MRB_SYM(b), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_lines, MRB_SYM(__lines), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_codepoints, MRB_SYM(__codepoints), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_scrub_core, MRB_SYM(__scrub), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(str_scrub_chunks, MRB_SYM(__scrub_chunks), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_lstrip, MRB_SYM(lstrip), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_rstrip, MRB_SYM(rstrip), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_strip, MRB_SYM(strip), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_lstrip_bang, MRB_SYM_B(lstrip), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_rstrip_bang, MRB_SYM_B(rstrip), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_strip_bang, MRB_SYM_B(strip), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_chars_ary, MRB_SYM(__chars), MRB_ARGS_NONE()), + MRB_MT_ENTRY(str_ljust_core, MRB_SYM(ljust), MRB_ARGS_ARG(1,1)), + MRB_MT_ENTRY(str_rjust_core, MRB_SYM(rjust), MRB_ARGS_ARG(1,1)), + MRB_MT_ENTRY(str_center_core, MRB_SYM(center), MRB_ARGS_ARG(1,1)), +}; + +void +mrb_mruby_string_ext_gem_init(mrb_state* mrb) +{ + struct RClass *s = mrb->string_class; + + MRB_MT_INIT_ROM(mrb, s, string_ext_rom_entries); + mrb_define_method_id(mrb, mrb->integer_class, MRB_SYM(chr), int_chr, MRB_ARGS_NONE()|MRB_ARGS_OPT(1)); } void diff --git a/mrbgems/mruby-string-ext/test/numeric.rb b/mrbgems/mruby-string-ext/test/numeric.rb index dfcb9ebf4d..1d876dedef 100644 --- a/mrbgems/mruby-string-ext/test/numeric.rb +++ b/mrbgems/mruby-string-ext/test/numeric.rb @@ -1,5 +1,3 @@ -# coding: utf-8 - assert('Integer#chr') do assert_equal("A", 65.chr) assert_equal("B", 0x42.chr) @@ -24,6 +22,11 @@ assert_equal("«", 171.chr("utf-8")) assert_equal("あ", 12354.chr("Utf-8")) assert_raise(RangeError) { -1.chr("utf-8") } - assert_raise(RangeError) { 0x110000.chr.chr("UTF-8") } + assert_raise(RangeError) { 0x110000.chr("UTF-8") } + # UTF-16 surrogates are not valid Unicode scalar values (RFC 3629, #2708) + assert_raise(RangeError) { 0xD800.chr("UTF-8") } + assert_raise(RangeError) { 0xDFFF.chr("UTF-8") } + assert_equal "\u{D7FF}", 0xD7FF.chr("UTF-8") + assert_equal "\u{E000}", 0xE000.chr("UTF-8") end end diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 7be673aa60..c0fd381db3 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -1,4 +1,3 @@ -# coding: utf-8 ## # String(Ext) Test @@ -198,6 +197,9 @@ def assert_upto(exp, receiver, *args) assert_equal ["aaaa", "b", ""], "aaaab".partition("b") assert_equal ["", "b", "aaaa"], "baaaa".partition("b") assert_equal ["", "", ""], "".partition("a") + assert_equal ["hello", " ", "world"], "hello world".partition(" ") + assert_equal ["hell", "o", " world"], "hello world".partition("o") + assert_equal ["hello world", "", ""], "hello world".partition("x") end assert('String#rpartition') do @@ -208,6 +210,9 @@ def assert_upto(exp, receiver, *args) assert_equal ["aaaa", "b", ""], "aaaab".rpartition("b") assert_equal ["", "b", "aaaa"], "baaaa".rpartition("b") assert_equal ["", "", ""], "".rpartition("a") + assert_equal ["hello", " ", "world"], "hello world".rpartition(" ") + assert_equal ["hello w", "o", "rld"], "hello world".rpartition("o") + assert_equal ["", "", "hello world"], "hello world".rpartition("x") end assert('String#hex') do @@ -453,9 +458,55 @@ def assert_upto(exp, receiver, *args) end assert('String#prepend') do + # Basic prepend test a = "world" assert_equal "hello world", a.prepend("hello ") assert_equal "hello world", a + + # Multiple arguments test + b = "world" + assert_equal "hello beautiful world", b.prepend("hello ", "beautiful ") + assert_equal "hello beautiful world", b + + # Empty string test + c = "test" + assert_equal "test", c.prepend("") + assert_equal "test", c + + # No arguments test + d = "test" + assert_equal "test", d.prepend() + assert_equal "test", d + + # Prepend to empty string + e = "" + assert_equal "hello", e.prepend("hello") + assert_equal "hello", e + + # Multiple empty strings + f = "world" + assert_equal "world", f.prepend("", "", "") + assert_equal "world", f + + # Mixed empty and non-empty + g = "world" + assert_equal "hello world", g.prepend("", "hello ", "") + assert_equal "hello world", g + + # Self-referencing arguments (GHSA-3hgj-g76g-878c) + h = "A" * 100 + h.prepend(h, h) + assert_equal 300, h.length + assert_equal "A" * 300, h + + # Mixed self-reference and literal + i = "AB" + i.prepend("XYZ", i) + assert_equal "XYZABAB", i + + j = "AB" + j.prepend(j, "X", j) + assert_equal "ABXABAB", j end assert('String#ljust') do @@ -616,6 +667,19 @@ def assert_upto(exp, receiver, *args) assert_equal expect, got end if UTF8STRING +assert('String#ord(UTF-8) rejects ill-formed sequences', '#2708') do + # overlong encodings (RFC 3629) + assert_raise(ArgumentError) { "\xC0\x80".ord } # 2-byte overlong NUL + assert_raise(ArgumentError) { "\xE0\x80\x80".ord } # 3-byte overlong NUL + assert_raise(ArgumentError) { "\xF0\x80\x80\x80".ord } # 4-byte overlong NUL + assert_raise(ArgumentError) { "\xE0\x9F\xBF".ord } # overlong U+07FF as 3 bytes + # UTF-16 surrogates encoded as UTF-8 + assert_raise(ArgumentError) { "\xED\xA0\x80".ord } # U+D800 + assert_raise(ArgumentError) { "\xED\xBF\xBF".ord } # U+DFFF + # above U+10FFFF + assert_raise(ArgumentError) { "\xF4\x90\x80\x80".ord } # U+110000 +end if UTF8STRING + assert('String#chr') do assert_equal "a", "abcde".chr assert_equal "h", "hello!".chr @@ -713,3 +777,74 @@ def assert_upto(exp, receiver, *args) assert_equal "he", "hello".delete_suffix!("llo") assert_nil "hello".delete_suffix!("he") end + +assert('String#+@') do + a = +"abc" + assert_false(a.frozen?) + a = +(a.freeze) + assert_false(a.frozen?) +end + +assert('String#-@') do + a = -"abc" + assert_true(a.frozen?) + a = -(a.freeze) + assert_true(a.frozen?) +end + +assert('String#scrub default replacement (U+FFFD)') do + # scrub has UTF-8 semantics; on builds without MRB_UTF8_STRING it + # degrades to a no-op (verified separately below). + skip unless "あ".length == 1 + assert_equal "\u{FFFD}", "\xE3\x81".scrub + assert_equal "abc\u{FFFD}def", "abc\x80def".scrub + assert_equal "\u{FFFD}", "\x80\x81\x82".scrub # run collapsed + assert_equal "", "".scrub + assert_equal "hello", "hello".scrub # already valid + assert_equal "あい", "あい".scrub # already valid multibyte +end + +assert('String#scrub rejects malformed sequences') do + skip unless "あ".length == 1 + # overlong, UTF-16 surrogate, codepoint above U+10FFFF + assert_equal "\u{FFFD}", "\xC0\xAF".scrub # overlong "/" + assert_equal "\u{FFFD}", "\xED\xA0\x80".scrub # surrogate U+D800 + assert_equal "\u{FFFD}", "\xF4\x90\x80\x80".scrub # > U+10FFFF +end + +assert('String#scrub with replacement string') do + skip unless "あ".length == 1 + assert_equal "abc?def", "abc\x80def".scrub("?") + assert_equal "abcdef", "abc\x80def".scrub("") + assert_equal "abcdef", "abc\x80def".scrub("") +end + +assert('String#scrub raises on invalid replacement') do + skip unless "あ".length == 1 + assert_raise(ArgumentError) { "abc\x80".scrub("\xFF") } +end + +assert('String#scrub with block') do + skip unless "あ".length == 1 + assert_equal "abc<80>def", + "abc\x80def".scrub { |b| "<" + b.bytes.first.to_s(16) + ">" } + # Block not called when string is already valid + called = false + "hello".scrub { |_| called = true; "X" } + assert_false called + # Multiple invalid runs each get their own block invocation + result = "a\x80b\x81c".scrub { |b| "[#{b.bytes.first}]" } + assert_equal "a[128]b[129]c", result + # Non-String block return values are coerced via to_s (mruby leniency; + # CRuby raises TypeError instead). Locking this in so the choice is + # explicit and doesn't drift accidentally. + assert_equal "abc42def", "abc\x80def".scrub { 42 } +end + +assert('String#scrub is a no-op without MRB_UTF8_STRING') do + skip if "あ".length == 1 + # Method is still defined and returns a (string-equal) copy. + assert_equal "abc\x80def", "abc\x80def".scrub + assert_equal "abc\x80def", "abc\x80def".scrub("?") + assert_equal "abc\x80def", "abc\x80def".scrub { |_| "?" } +end diff --git a/mrbgems/mruby-struct/README.md b/mrbgems/mruby-struct/README.md new file mode 100644 index 0000000000..366610ae58 --- /dev/null +++ b/mrbgems/mruby-struct/README.md @@ -0,0 +1,105 @@ +# mruby-struct + +This mrbgem provides the `Struct` class, a convenient way to bundle a number of attributes together, much like Ruby's core `Struct` class. It allows you to create simple classes (structs) with a defined set of accessor methods for these attributes. + +## Functionality + +- Define new Struct classes with a specific set of members. +- Create instances of these Structs with positional or keyword arguments. +- Support for keyword initialization mode (`keyword_init` option). +- Access and assign to struct members using accessor methods or by index/symbol. +- Iterate over members and values. +- Convert structs to Arrays or Hashes. + +## Basic Usage + +```ruby +# Define a new Struct class +Point = Struct.new(:x, :y) + +# Create an instance of the Point struct +origin = Point.new(0, 0) + +# Access attributes +puts origin.x # Output: 0 +puts origin[:y] # Output: 0 + +# Set attributes +origin.x = 10 +origin[:y] = 20 + +puts origin.inspect # Output: # + +# Another example +Customer = Struct.new("Customer", :name, :address) +joe = Customer.new("Joe Smith", "123 Maple, Anytown NC") + +puts joe.name # Output: Joe Smith +puts joe.address # Output: 123 Maple, Anytown NC +``` + +## Keyword Initialization + +As of mruby 3.4, Struct supports keyword initialization through the `keyword_init` option: + +```ruby +# Create a Struct with keyword initialization enabled +Person = Struct.new(:name, :age, keyword_init: true) + +# Must use keyword arguments when keyword_init: true +person = Person.new(name: "Alice", age: 30) +puts person.name # Output: Alice +puts person.age # Output: 30 + +# Can create with partial keywords (missing values are nil) +person2 = Person.new(name: "Bob") +puts person2.name # Output: Bob +puts person2.age # Output: nil + +# Empty initialization is allowed +person3 = Person.new +puts person3.name # Output: nil + +# Positional arguments will raise an error when keyword_init: true +# Person.new("Charlie", 25) # ArgumentError: wrong arguments, expected keyword arguments +``` + +### Keyword Initialization Modes + +The `keyword_init` option supports three modes: + +1. **`keyword_init: true`** - Only keyword arguments are accepted +2. **`keyword_init: false`** - Only positional arguments are accepted (hashes are treated as values) +3. **`keyword_init: nil` (default)** - Flexible mode: accepts both positional arguments and keyword arguments (single hash) + +```ruby +# Flexible mode (default behavior) +FlexPoint = Struct.new(:x, :y) +p1 = FlexPoint.new(1, 2) # Positional arguments +p2 = FlexPoint.new(x: 3, y: 4) # Keyword arguments (single hash) + +# Keyword-only mode +KeywordPoint = Struct.new(:x, :y, keyword_init: true) +p3 = KeywordPoint.new(x: 5, y: 6) # Only keyword arguments allowed + +# Positional-only mode +PositionalPoint = Struct.new(:x, :y, keyword_init: false) +p4 = PositionalPoint.new(7, 8) # Only positional arguments +p5 = PositionalPoint.new({x: 9, y: 10}) # Hash is treated as first value +``` + +## Available Methods + +Instances of classes created with `Struct.new` have several useful methods, including: + +- `members`: Returns an array of symbols representing the names of the instance variables. +- `each`: Calls a block for each attribute, passing the value. +- `each_pair`: Calls a block for each attribute, passing the name (symbol) and value. +- `select`: Returns an array containing values for which the block returns true. +- `to_a` / `values`: Returns an array containing the values of the struct. +- `to_h`: Returns a hash mapping member names (symbols) to their values. +- `length` / `size`: Returns the number of members in the struct. +- `dig`: Extracts a nested value specified by a sequence of keys. +- `==`, `eql?`: For comparing struct instances. + +For more details on specific methods, please refer to the mruby documentation or the core Ruby `Struct` class documentation, as `mruby-struct` aims for compatibility. diff --git a/mrbgems/mruby-struct/mrblib/struct.rb b/mrbgems/mruby-struct/mrblib/struct.rb index f80c545183..aa8e3f8a67 100644 --- a/mrbgems/mruby-struct/mrblib/struct.rb +++ b/mrbgems/mruby-struct/mrblib/struct.rb @@ -6,74 +6,45 @@ class Struct include Enumerable ## - # Calls the given block for each element of +self+ + # Calls the given block for each element of `self` # and pass the respective element. # # ISO 15.2.18.4.4 def each(&block) - self.class.members.each{|field| + self.class.members.each {|field| block.call(self[field]) } self end ## - # Calls the given block for each element of +self+ + # Calls the given block for each element of `self` # and pass the name and value of the respective # element. # # ISO 15.2.18.4.5 def each_pair(&block) - self.class.members.each{|field| + self.class.members.each {|field| block.call(field.to_sym, self[field]) } self end ## - # Calls the given block for each element of +self+ + # Calls the given block for each element of `self` # and returns an array with all elements of which # block is not false. # # ISO 15.2.18.4.7 def select(&block) ary = [] - self.class.members.each{|field| + self.class.members.each {|field| val = self[field] ary.push(val) if block.call(val) } ary end - def _inspect(recur_list) - return "#" if recur_list[self.object_id] - recur_list[self.object_id] = true - name = self.class.to_s - if name[0] == "#" - str = "#" - end - - ## - # call-seq: - # struct.to_s -> string - # struct.inspect -> string - # - # Describe the contents of this struct in a string. - # - # 15.2.18.4.10(x) - # - def inspect - self._inspect({}) - end - ## # 15.2.18.4.11(x) # @@ -83,9 +54,9 @@ def inspect # call-seq: # hsh.dig(key,...) -> object # - # Extracts the nested value specified by the sequence of key - # objects by calling +dig+ at each step, returning +nil+ if any - # intermediate step is +nil+. + # Extracts the nested value specified by the sequence of *key* + # objects by calling `dig` at each step, returning `nil` if any + # intermediate step is `nil`. # def dig(idx,*args) n = self[idx] diff --git a/mrbgems/mruby-struct/src/struct.c b/mrbgems/mruby-struct/src/struct.c index beecea6cbe..4bd0129a0c 100644 --- a/mrbgems/mruby-struct/src/struct.c +++ b/mrbgems/mruby-struct/src/struct.c @@ -14,7 +14,6 @@ #include #include #include -#include #define RSTRUCT_LEN(st) RARRAY_LEN(st) #define RSTRUCT_PTR(st) RARRAY_PTR(st) @@ -27,47 +26,46 @@ struct_class(mrb_state *mrb) return mrb_class_get_id(mrb, MRB_SYM(Struct)); } -static inline mrb_value -struct_ivar_get(mrb_state *mrb, mrb_value cls, mrb_sym id) +static void +struct_corrupted(mrb_state *mrb) { - struct RClass* c = mrb_class_ptr(cls); - struct RClass* sclass = struct_class(mrb); - mrb_value ans; - - for (;;) { - ans = mrb_iv_get(mrb, mrb_obj_value(c), id); - if (!mrb_nil_p(ans)) return ans; - c = c->super; - if (c == sclass || c == 0) - return mrb_nil_value(); - } + mrb_raise(mrb, E_TYPE_ERROR, "corrupted struct"); } static mrb_value -struct_s_members(mrb_state *mrb, struct RClass *klass) +struct_s_members(mrb_state *mrb, struct RClass *c) { - mrb_value members = struct_ivar_get(mrb, mrb_obj_value(klass), MRB_SYM(__members__)); + struct RClass* sclass = struct_class(mrb); - if (mrb_nil_p(members)) { - mrb_raise(mrb, E_TYPE_ERROR, "uninitialized struct"); - } - if (!mrb_array_p(members)) { - mrb_raise(mrb, E_TYPE_ERROR, "corrupted struct"); + for (;;) { + mrb_value members = mrb_iv_get(mrb, mrb_obj_value(c), MRB_SYM(__members__)); + + if (!mrb_nil_p(members)) { + if (!mrb_array_p(members)) { + struct_corrupted(mrb); + } + return members; + } + c = c->super; + if (c == sclass || c == 0) { + mrb_raise(mrb, E_TYPE_ERROR, "uninitialized struct"); + } } - return members; } static mrb_value struct_members(mrb_state *mrb, mrb_value s) { - mrb_value members = struct_s_members(mrb, mrb_obj_class(mrb, s)); - if (!mrb_struct_p(s) || RSTRUCT_LEN(s) == 0) { - mrb_raise(mrb, E_TYPE_ERROR, "corrupted struct"); + if (!mrb_struct_p(s)) { + struct_corrupted(mrb); } - if (RSTRUCT_LEN(s) != RARRAY_LEN(members)) { + mrb_value members = struct_s_members(mrb, mrb_obj_class(mrb, s)); + mrb_int len = RSTRUCT_LEN(s); + mrb_int mlen = RARRAY_LEN(members); + if (len > 0 && len != mlen) { mrb_raisef(mrb, E_TYPE_ERROR, "struct size differs (%i required %i given)", - RARRAY_LEN(members), RSTRUCT_LEN(s)); + mlen, len); } return members; } @@ -75,20 +73,13 @@ struct_members(mrb_state *mrb, mrb_value s) static mrb_value mrb_struct_s_members_m(mrb_state *mrb, mrb_value klass) { - mrb_value members, ary; - - members = struct_s_members(mrb, mrb_class_ptr(klass)); - ary = mrb_ary_new_capa(mrb, RARRAY_LEN(members)); + mrb_value members = struct_s_members(mrb, mrb_class_ptr(klass)); + mrb_value ary = mrb_ary_new_capa(mrb, RARRAY_LEN(members)); mrb_ary_replace(mrb, ary, members); return ary; } -static void -mrb_struct_modify(mrb_state *mrb, mrb_value strct) -{ - mrb_check_frozen(mrb, mrb_basic_ptr(strct)); - mrb_write_barrier(mrb, mrb_basic_ptr(strct)); -} +#define mrb_struct_modify(mrb,s) mrb_check_frozen((mrb), mrb_basic_ptr(s)) /* 15.2.18.4.6 */ /* @@ -109,13 +100,25 @@ mrb_struct_members(mrb_state *mrb, mrb_value obj) return mrb_struct_s_members_m(mrb, mrb_obj_value(mrb_obj_class(mrb, obj))); } +static mrb_int +num_members(mrb_state *mrb, mrb_value self) +{ + mrb_value members = struct_members(mrb, self); + return RARRAY_LEN(members); +} + static mrb_value mrb_struct_ref(mrb_state *mrb, mrb_value obj) { + mrb_int argc = mrb_get_argc(mrb); + if (argc != 0) { + mrb_argnum_error(mrb, argc, 0, 0); + } mrb_int i = mrb_integer(mrb_proc_cfunc_env_get(mrb, 0)); + mrb_int len = num_members(mrb, obj); mrb_value *ptr = RSTRUCT_PTR(obj); - if (!ptr) return mrb_nil_value(); + if (!ptr || len <= i) return mrb_nil_value(); return ptr[i]; } @@ -125,15 +128,13 @@ mrb_id_attrset(mrb_state *mrb, mrb_sym id) #define ONSTACK_ALLOC_MAX 32 #define ONSTACK_STRLEN_MAX (ONSTACK_ALLOC_MAX - 1) /* '=' character */ - const char *name; char *buf; mrb_int len; - mrb_sym mid; char onstack[ONSTACK_ALLOC_MAX]; - name = mrb_sym_name_len(mrb, id, &len); + const char *name = mrb_sym_name_len(mrb, id, &len); if (len > ONSTACK_STRLEN_MAX) { - buf = (char *)mrb_malloc(mrb, (size_t)len+1); + buf = (char*)mrb_malloc(mrb, (size_t)len+1); } else { buf = onstack; @@ -141,7 +142,7 @@ mrb_id_attrset(mrb_state *mrb, mrb_sym id) memcpy(buf, name, (size_t)len); buf[len] = '='; - mid = mrb_intern(mrb, buf, len+1); + mrb_sym mid = mrb_intern(mrb, buf, len+1); if (buf != onstack) { mrb_free(mrb, buf); } @@ -152,17 +153,9 @@ static mrb_value mrb_struct_set_m(mrb_state *mrb, mrb_value obj) { mrb_int i = mrb_integer(mrb_proc_cfunc_env_get(mrb, 0)); - mrb_value *ptr; mrb_value val = mrb_get_arg1(mrb); - mrb_struct_modify(mrb, obj); - ptr = RSTRUCT_PTR(obj); - if (ptr == NULL || i >= RSTRUCT_LEN(obj)) { - mrb_ary_set(mrb, obj, i, val); - } - else { - ptr[i] = val; - } + mrb_ary_set(mrb, obj, i, val); return val; } @@ -170,11 +163,10 @@ static void make_struct_define_accessors(mrb_state *mrb, mrb_value members, struct RClass *c) { const mrb_value *ptr_members = RARRAY_PTR(members); - mrb_int i; mrb_int len = RARRAY_LEN(members); int ai = mrb_gc_arena_save(mrb); - for (i=0; i ) -> StructClass - * StructClass.new(arg, ...) -> obj - * StructClass[arg, ...] -> obj + * Struct.new([aString] [, aSym]+, keyword_init: false) -> StructClass + * StructClass.new(arg, ...) -> obj + * StructClass[arg, ...] -> obj * - * Creates a new class, named by aString, containing accessor - * methods for the given symbols. If the name aString is + * Creates a new class, named by *aString*, containing accessor + * methods for the given symbols. If the name *aString* is * omitted, an anonymous structure class will be created. Otherwise, * the name of this struct will appear as a constant in class - * Struct, so it must be unique for all - * Structs in the system and should start with a capital + * `Struct`, so it must be unique for all + * `Struct`s in the system and should start with a capital * letter. Assigning a structure class to a constant effectively gives * the class the name of the constant. * - * Struct::new returns a new Class object, + * `Struct::new` returns a new `Class` object, * which can then be used to create specific instances of the new * structure. The number of actual parameters must be * less than or equal to the number of attributes defined for this - * class; unset parameters default to nil. Passing too many - * parameters will raise an ArgumentError. + * class; unset parameters default to `nil`. Passing too many + * parameters will raise an `ArgumentError`. + * + * If `keyword_init` is true, the struct will accept keyword + * arguments for initialization instead of positional arguments: + * + * Person = Struct.new(:name, :age, keyword_init: true) + * Person.new(name: "Alice", age: 30) + * #=> # * * The remaining methods listed in this section (class and instance) * are defined for this generated class. @@ -256,97 +256,166 @@ make_struct(mrb_state *mrb, mrb_value name, mrb_value members, struct RClass *kl * # Create a structure named by its constant * Customer = Struct.new(:name, :address) #=> Customer * Customer.new("Dave", "123 Main") #=> # + * + * # Create a structure with keyword initialization + * User = Struct.new(:id, :email, keyword_init: true) + * User.new(id: 1, email: "user@example.com") + * #=> # */ static mrb_value mrb_struct_s_def(mrb_state *mrb, mrb_value klass) { - mrb_value name, rest; - const mrb_value *pargv; - mrb_int argcnt; - mrb_int i; - mrb_value b, st; - mrb_sym id; + mrb_value name = mrb_nil_value(); + mrb_value b; const mrb_value *argv; mrb_int argc; + mrb_value keyword_init_val = mrb_nil_value(); - name = mrb_nil_value(); mrb_get_args(mrb, "*&", &argv, &argc, &b); - if (argc == 0) { /* special case to avoid crash */ - mrb_argnum_error(mrb, argc, 1, -1); + if (argc == 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (given 0, expected 1+)"); } - else { - pargv = argv; - argcnt = argc; - if (argc > 0) { - name = argv[0]; - if (mrb_symbol_p(name)) { - /* 1stArgument:symbol -> name=nil rest=argv[0..n] */ - name = mrb_nil_value(); - } - else { - pargv++; - argcnt--; - } - } - rest = mrb_ary_new_from_values(mrb, argcnt, pargv); - for (i=0; i 0 && mrb_hash_p(argv[argc-1])) { + mrb_value options = argv[argc-1]; + mrb_value keyword_init_sym = mrb_symbol_value(MRB_SYM(keyword_init)); + + if (mrb_hash_key_p(mrb, options, keyword_init_sym)) { + keyword_init_val = mrb_hash_get(mrb, options, keyword_init_sym); + argc--; /* Don't treat the options hash as a member name */ } + } - return st; + const mrb_value *pargv = argv; + mrb_int argcnt = argc; + if (argc > 0 && !mrb_symbol_p(argv[0])) { + /* 1stArgument:!symbol -> name=argv[0] rest=argv[0..n] */ + name = argv[0]; + pargv++; + argcnt--; + } + mrb_value members = mrb_ary_new_from_values(mrb, argcnt, pargv); + for (mrb_int i=0; i 0) { + mrb_value keys_str = mrb_ary_join(mrb, invalid_keys, mrb_str_new_lit(mrb, ", ")); + mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown keywords: %S", keys_str); + } + + return self; +} + static mrb_value mrb_struct_initialize(mrb_state *mrb, mrb_value self) { const mrb_value *argv; mrb_int argc; - mrb_get_args(mrb, "*!", &argv, &argc); - return mrb_struct_initialize_withArg(mrb, argc, argv, self); + mrb_get_args(mrb, "*", &argv, &argc); + + mrb_value klass = mrb_obj_value(mrb_obj_class(mrb, self)); + mrb_value keyword_init = mrb_iv_get(mrb, klass, MRB_IVSYM(__keyword_init__)); + + if (mrb_test(keyword_init)) { /* keyword_init: true or other truthy value */ + if (argc > 1 || (argc == 1 && !mrb_hash_p(argv[0]))) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong arguments, expected keyword arguments"); + } + mrb_value hash = (argc == 1) ? argv[0] : mrb_hash_new(mrb); + return mrb_struct_init_with_keywords(mrb, hash, self); + } + else if (mrb_equal(mrb, keyword_init, mrb_false_value())) { /* keyword_init: false */ + return mrb_struct_init_with_args(mrb, argc, argv, self); + } + else { /* keyword_init: nil (default) */ + if (argc == 1 && mrb_hash_p(argv[0])) { + return mrb_struct_init_with_keywords(mrb, argv[0], self); + } + else { + return mrb_struct_init_with_args(mrb, argc, argv, self); + } + } } /* 15.2.18.4.9 */ @@ -361,7 +430,7 @@ mrb_struct_init_copy(mrb_state *mrb, mrb_value copy) mrb_raise(mrb, E_TYPE_ERROR, "wrong argument class"); } if (!mrb_struct_p(s)) { - mrb_raise(mrb, E_TYPE_ERROR, "corrupted struct"); + struct_corrupted(mrb); } mrb_ary_replace(mrb, copy, s); return copy; @@ -370,35 +439,41 @@ mrb_struct_init_copy(mrb_state *mrb, mrb_value copy) static mrb_value struct_aref_sym(mrb_state *mrb, mrb_value obj, mrb_sym id) { - mrb_value members, *ptr; - const mrb_value *ptr_members; - mrb_int i, len; - - members = struct_members(mrb, obj); - ptr_members = RARRAY_PTR(members); - len = RARRAY_LEN(members); - ptr = RSTRUCT_PTR(obj); - for (i=0; i= RSTRUCT_LEN(s)) return mrb_nil_value(); return RSTRUCT_PTR(s)[idx]; } @@ -409,9 +484,9 @@ struct_aref_int(mrb_state *mrb, mrb_value s, mrb_int i) * struct[fixnum] -> anObject * * Attribute Reference---Returns the value of the instance variable - * named by symbol, or indexed (0..length-1) by - * fixnum. Will raise NameError if the named - * variable does not exist, or IndexError if the index is + * named by *symbol*, or indexed (0..length-1) by + * *fixnum*. Will raise `NameError` if the named + * variable does not exist, or `IndexError` if the index is * out of range. * * Customer = Struct.new(:name, :address, :zip) @@ -439,18 +514,12 @@ mrb_struct_aref(mrb_state *mrb, mrb_value s) static mrb_value mrb_struct_aset_sym(mrb_state *mrb, mrb_value s, mrb_sym id, mrb_value val) { - mrb_value members, *ptr; - const mrb_value *ptr_members; - mrb_int i, len; - - members = struct_members(mrb, s); - len = RARRAY_LEN(members); - ptr = RSTRUCT_PTR(s); - ptr_members = RARRAY_PTR(members); - for (i=0; i obj * * Attribute Assignment---Assigns to the instance variable named by - * symbol or fixnum the value obj and - * returns it. Will raise a NameError if the named - * variable does not exist, or an IndexError if the index + * *symbol* or *fixnum* the value *obj* and + * returns it. Will raise a `NameError` if the named + * variable does not exist, or an `IndexError` if the index * is out of range. * * Customer = Struct.new(:name, :address, :zip) @@ -483,7 +552,6 @@ mrb_struct_aset_sym(mrb_state *mrb, mrb_value s, mrb_sym id, mrb_value val) static mrb_value mrb_struct_aset(mrb_state *mrb, mrb_value s) { - mrb_int i; mrb_value idx; mrb_value val; @@ -497,18 +565,9 @@ mrb_struct_aset(mrb_state *mrb, mrb_value s) return mrb_struct_aset_sym(mrb, s, mrb_symbol(idx), val); } - i = mrb_as_int(mrb, idx); - if (i < 0) i = RSTRUCT_LEN(s) + i; - if (i < 0) { - mrb_raisef(mrb, E_INDEX_ERROR, - "offset %i too small for struct(size:%i)", i, RSTRUCT_LEN(s)); - } - if (RSTRUCT_LEN(s) <= i) { - mrb_raisef(mrb, E_INDEX_ERROR, - "offset %i too large for struct(size:%i)", i, RSTRUCT_LEN(s)); - } - mrb_struct_modify(mrb, s); - return RSTRUCT_PTR(s)[i] = val; + mrb_int i = struct_index(mrb, mrb_as_int(mrb, idx), num_members(mrb, s)); + mrb_ary_set(mrb, s, i, val); + return val; } /* 15.2.18.4.1 */ @@ -516,10 +575,10 @@ mrb_struct_aset(mrb_state *mrb, mrb_value s) * call-seq: * struct == other_struct -> true or false * - * Equality---Returns true if other_struct is + * Equality---Returns `true` if *other_struct* is * equal to this one: they must be of the same class as generated by - * Struct::new, and the values of all instance variables - * must be equal (according to Object#==). + * `Struct::new`, and the values of all instance variables + * must be equal (according to `Object#==`). * * Customer = Struct.new(:name, :address, :zip) * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) @@ -533,8 +592,6 @@ static mrb_value mrb_struct_equal(mrb_state *mrb, mrb_value s) { mrb_value s2 = mrb_get_arg1(mrb); - mrb_value *ptr, *ptr2; - mrb_int i, len; if (mrb_obj_equal(mrb, s, s2)) { return mrb_true_value(); @@ -543,15 +600,23 @@ mrb_struct_equal(mrb_state *mrb, mrb_value s) return mrb_false_value(); } if (RSTRUCT_LEN(s) != RSTRUCT_LEN(s2)) { - mrb_bug(mrb, "inconsistent struct"); /* should never happen */ + return mrb_false_value(); } - ptr = RSTRUCT_PTR(s); - ptr2 = RSTRUCT_PTR(s2); - len = RSTRUCT_LEN(s); - for (i=0; i true or false * * Two structures are equal if they are the same object, or if all their - * fields are equal (using eql?). + * fields are equal (using `eql?`). */ static mrb_value mrb_struct_eql(mrb_state *mrb, mrb_value s) @@ -579,8 +644,14 @@ mrb_struct_eql(mrb_state *mrb, mrb_value s) return mrb_false_value(); } if (RSTRUCT_LEN(s) != RSTRUCT_LEN(s2)) { - mrb_bug(mrb, "inconsistent struct"); /* should never happen */ + return mrb_false_value(); } + + /* Check for recursion */ + if (MRB_RECURSIVE_BINARY_FUNC_P(mrb, MRB_SYM_Q(eql), s, s2)) { + return mrb_false_value(); + } + ptr = RSTRUCT_PTR(s); ptr2 = RSTRUCT_PTR(s2); len = RSTRUCT_LEN(s); @@ -628,14 +699,11 @@ mrb_struct_to_a(mrb_state *mrb, mrb_value self) static mrb_value mrb_struct_to_h(mrb_state *mrb, mrb_value self) { - mrb_value members, ret; - mrb_int i; + mrb_value members = struct_members(mrb, self); + mrb_value ret = mrb_hash_new_capa(mrb, RARRAY_LEN(members)); - members = struct_members(mrb, self); - ret = mrb_hash_new_capa(mrb, RARRAY_LEN(members)); - - for (i = 0; i < RARRAY_LEN(members); ++i) { - mrb_hash_set(mrb, ret, RARRAY_PTR(members)[i], RSTRUCT_PTR(self)[i]); + for (mrb_int i = 0; i < RARRAY_LEN(members); i++) { + mrb_hash_set(mrb, ret, RARRAY_PTR(members)[i], mrb_ary_ref(mrb, self, i)); } return ret; @@ -653,43 +721,88 @@ mrb_struct_values_at(mrb_state *mrb, mrb_value self) } /* - * A Struct is a convenient way to bundle a number of + * call-seq: + * struct.to_s -> string + * struct.inspect -> string + * + * Returns a string representation of Data + */ +static mrb_value +mrb_struct_to_s(mrb_state *mrb, mrb_value self) +{ + mrb->c->ci->mid = MRB_SYM(inspect); + mrb_value ret = mrb_str_new_lit(mrb, "#"); + return ret; + } + mrb_value members = struct_members(mrb, self); + mrb_int mlen = RARRAY_LEN(members); + mrb_value *mems = RARRAY_PTR(members); + for (mrb_int i=0; i0) mrb_str_cat_lit(mrb, ret, ", "); + mrb_str_cat(mrb, ret, name, len); + mrb_str_cat_lit(mrb, ret, "="); + mrb_str_cat_str(mrb, ret, mrb_inspect(mrb, mrb_ary_ref(mrb, self, i))); + mrb_gc_arena_restore(mrb, ai); + } + mrb_str_cat_lit(mrb, ret, ">"); + + return ret; +} + +/* + * A `Struct` is a convenient way to bundle a number of * attributes together, using accessor methods, without having to write * an explicit class. * - * The Struct class is a generator of specific classes, + * The `Struct` class is a generator of specific classes, * each one of which is defined to hold a set of variables and their * accessors. In these examples, we'll call the generated class - * "CustomerClass," and we'll show an example instance of that - * class as "CustomerInst." + * "*Customer*Class," and we'll show an example instance of that + * class as "*Customer*Inst." * - * In the descriptions that follow, the parameter symbol refers + * In the descriptions that follow, the parameter *symbol* refers * to a symbol, which is either a quoted string or a - * Symbol (such as :name). + * `Symbol` (such as `:name`). */ +/* ---------------------------*/ +static const mrb_mt_entry struct_rom_entries[] = { + MRB_MT_ENTRY(mrb_struct_equal, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), /* 15.2.18.4.1 */ + MRB_MT_ENTRY(mrb_struct_aref, MRB_OPSYM(aref), MRB_ARGS_REQ(1)), /* 15.2.18.4.2 */ + MRB_MT_ENTRY(mrb_struct_aset, MRB_OPSYM(aset), MRB_ARGS_REQ(2)), /* 15.2.18.4.3 */ + MRB_MT_ENTRY(mrb_struct_members, MRB_SYM(members), MRB_ARGS_NONE()), /* 15.2.18.4.6 */ + MRB_MT_ENTRY(mrb_struct_initialize, MRB_SYM(initialize), MRB_ARGS_ANY()), /* 15.2.18.4.8 */ + MRB_MT_ENTRY(mrb_struct_init_copy, MRB_SYM(initialize_copy), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), /* 15.2.18.4.9 */ + MRB_MT_ENTRY(mrb_struct_eql, MRB_SYM_Q(eql), MRB_ARGS_REQ(1)), /* 15.2.18.4.12(x) */ + MRB_MT_ENTRY(mrb_struct_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), /* 15.2.18.4.11(x) */ + MRB_MT_ENTRY(mrb_struct_to_s, MRB_SYM(inspect), MRB_ARGS_NONE()), /* 15.2.18.4.10(x) */ + MRB_MT_ENTRY(mrb_struct_len, MRB_SYM(size), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_struct_len, MRB_SYM(length), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_struct_to_a, MRB_SYM(to_a), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_struct_to_a, MRB_SYM(values), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_struct_to_h, MRB_SYM(to_h), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_struct_values_at, MRB_SYM(values_at), MRB_ARGS_ANY()), +}; + void mrb_mruby_struct_gem_init(mrb_state* mrb) { - struct RClass *st; - st = mrb_define_class(mrb, "Struct", mrb->object_class); - MRB_SET_INSTANCE_TT(st, MRB_TT_ARRAY); - - mrb_define_class_method(mrb, st, "new", mrb_struct_s_def, MRB_ARGS_ANY()); /* 15.2.18.3.1 */ - - mrb_define_method(mrb, st, "==", mrb_struct_equal, MRB_ARGS_REQ(1)); /* 15.2.18.4.1 */ - mrb_define_method(mrb, st, "[]", mrb_struct_aref, MRB_ARGS_REQ(1)); /* 15.2.18.4.2 */ - mrb_define_method(mrb, st, "[]=", mrb_struct_aset, MRB_ARGS_REQ(2)); /* 15.2.18.4.3 */ - mrb_define_method(mrb, st, "members", mrb_struct_members, MRB_ARGS_NONE()); /* 15.2.18.4.6 */ - mrb_define_method(mrb, st, "initialize", mrb_struct_initialize, MRB_ARGS_ANY()); /* 15.2.18.4.8 */ - mrb_define_method(mrb, st, "initialize_copy", mrb_struct_init_copy, MRB_ARGS_REQ(1)); /* 15.2.18.4.9 */ - mrb_define_method(mrb, st, "eql?", mrb_struct_eql, MRB_ARGS_REQ(1)); /* 15.2.18.4.12(x) */ - - mrb_define_method(mrb, st, "size", mrb_struct_len, MRB_ARGS_NONE()); - mrb_define_method(mrb, st, "length", mrb_struct_len, MRB_ARGS_NONE()); - mrb_define_method(mrb, st, "to_a", mrb_struct_to_a, MRB_ARGS_NONE()); - mrb_define_method(mrb, st, "values", mrb_struct_to_a, MRB_ARGS_NONE()); - mrb_define_method(mrb, st, "to_h", mrb_struct_to_h, MRB_ARGS_NONE()); - mrb_define_method(mrb, st, "values_at", mrb_struct_values_at, MRB_ARGS_ANY()); + struct RClass *st = mrb_define_class_id(mrb, MRB_SYM(Struct), mrb->object_class); + MRB_SET_INSTANCE_TT(st, MRB_TT_STRUCT); + MRB_UNDEF_ALLOCATOR(st); + + mrb_define_class_method_id(mrb, st, MRB_SYM(new), mrb_struct_s_def, MRB_ARGS_ANY()); /* 15.2.18.3.1 */ + + MRB_MT_INIT_ROM(mrb, st, struct_rom_entries); } void diff --git a/mrbgems/mruby-struct/test/struct.rb b/mrbgems/mruby-struct/test/struct.rb index db0fa56d86..b2a3451869 100644 --- a/mrbgems/mruby-struct/test/struct.rb +++ b/mrbgems/mruby-struct/test/struct.rb @@ -119,7 +119,7 @@ c = Struct.new(:m1, :m2, :m3, :m4, :m5, :recur) cc = c.new(1,2,3,4,5,nil) cc.recur = cc - assert_equal "#>", cc.inspect + assert_equal "#>", cc.inspect end assert('Struct#length, Struct#size') do @@ -166,13 +166,13 @@ begin Struct.new("Test", :a) a = Struct::Test.new("a") - Struct.remove_const :Test + Struct.__send__(:remove_const,:Test) Struct.new("Test", :a, :b) assert_raise(TypeError) do - a.initialize_copy(Struct::Test.new("a", "b")) + a.__send__(:initialize_copy, Struct::Test.new("a", "b")) end ensure - Struct.remove_const :Test + Struct.__send__(:remove_const,:Test) end end @@ -197,7 +197,7 @@ end assert 'Struct#freeze' do - c = Struct.new :m + c = Struct.new(:m) o = c.new o.m = :test @@ -208,3 +208,96 @@ assert_raise(FrozenError) { o[:m] = :modify } assert_equal :test, o.m end + +assert 'method visibility with Struct' do + c = Struct.new(:r, :g, :b) do + def good! + "GOOD!" + end + + private + def bad! + "BAD!" + end + end + + assert_equal "GOOD!" do + c.new.good! + end + + assert_raise NoMethodError do + c.new.bad! + end +end + +assert "Struct initialize with keyword arguments" do + c = Struct.new(:foo, :bar) + + o = c.new(foo: 1, bar: 2) + assert_equal 1, o.foo + assert_equal 2, o.bar + + o2 = c.new(bar: 1, foo: 2) + assert_equal 2, o2.foo + assert_equal 1, o2.bar + + o3 = c.new(foo: :test) + assert_equal :test, o3.foo + assert_equal nil, o3.bar + + o4 = c.new + assert_equal nil, o4.foo + assert_equal nil, o4.bar + + assert_raise_with_message_pattern(ArgumentError, "unknown keywords: roo, baq") do + c.new(foo: 1, roo: nil, baq: :test) + end +end + +assert "Struct initialize when :keyword_init is true" do + c = Struct.new(:foo, :bar, keyword_init: true) + + o = c.new(foo: 1, bar: 2) + assert_equal 1, o.foo + assert_equal 2, o.bar + + o2 = c.new + assert_equal nil, o2.foo + assert_equal nil, o2.bar + + assert_raise(ArgumentError) do + c.new(1, 2) + end + + assert_raise(ArgumentError) do + c.new({foo: 1}, {bar: 2}) + end +end + +assert "Struct initialize when :keyword_init is false" do + c = Struct.new(:foo, :bar, keyword_init: false) + + o = c.new(1, 2) + assert_equal 1, o.foo + assert_equal 2, o.bar + + o2 = c.new(foo: 1, bar: 2) + assert_equal({foo: 1, bar: 2}, o2.foo) + assert_equal nil, o2.bar + + o3 = c.new + assert_equal nil, o3.foo + assert_equal nil, o3.bar +end + +assert "Struct initialize when :keyword_init is non-boolean value (treat as true)" do + c = Struct.new(:foo, :bar, keyword_init: 12) + + o = c.new(foo: 1, bar: 2) + assert_equal 1, o.foo + assert_equal 2, o.bar + + assert_raise(ArgumentError) do + c.new(1, 2) + end +end diff --git a/mrbgems/mruby-symbol-ext/README.md b/mrbgems/mruby-symbol-ext/README.md new file mode 100644 index 0000000000..3d797f0dcf --- /dev/null +++ b/mrbgems/mruby-symbol-ext/README.md @@ -0,0 +1,50 @@ +# mruby-symbol-ext + +This gem extends mruby's `Symbol` class with additional useful methods. + +## Methods + +Here are the methods added to the `Symbol` class: + +### `capitalize` + +- **call-seq:** `sym.capitalize -> symbol` +- Returns a new symbol with the first character converted to uppercase and the remainder to lowercase. Equivalent to `sym.to_s.capitalize.intern`. + +### `downcase` + +- **call-seq:** `sym.downcase -> symbol` +- Returns a new symbol with all characters converted to lowercase. Equivalent to `sym.to_s.downcase.intern`. + +### `upcase` + +- **call-seq:** `sym.upcase -> symbol` +- Returns a new symbol with all characters converted to uppercase. Equivalent to `sym.to_s.upcase.intern`. + +### `casecmp(other_symbol)` + +- **call-seq:** `sym.casecmp(other) -> -1, 0, +1 or nil` +- Performs a case-insensitive comparison between two symbols. Returns -1, 0, or +1 if `other_symbol` is a symbol. Returns `nil` if `other_symbol` is not a symbol. + +### `casecmp?(other_symbol)` + +- **call-seq:** `sym.casecmp?(other) -> true, false, or nil` +- Returns `true` if the receiver and `other_symbol` are equal after case folding, `false` if they are not equal. Returns `nil` if `other_symbol` is not a symbol. + +### `empty?` + +- **call-seq:** `sym.empty? -> true or false` +- Returns `true` if the symbol's string representation is empty (i.e., `:""`), `false` otherwise. + +### `length` / `size` + +- **call-seq:** + - `sym.length -> integer` + - `sym.size -> integer` +- Returns the length of the symbol's string representation. `size` is an alias for `length`. + +### `Symbol.all_symbols` (Conditional) + +- **call-seq:** `Symbol.all_symbols => array` +- Returns an array of all symbols currently in mruby's symbol table. +- **Note:** This method is only available if mruby is compiled with the `MRB_USE_ALL_SYMBOLS` define. diff --git a/mrbgems/mruby-symbol-ext/mrblib/symbol.rb b/mrbgems/mruby-symbol-ext/mrblib/symbol.rb index 99fa275d56..821cb1d2ab 100644 --- a/mrbgems/mruby-symbol-ext/mrblib/symbol.rb +++ b/mrbgems/mruby-symbol-ext/mrblib/symbol.rb @@ -7,7 +7,7 @@ class Symbol # call-seq: # sym.capitalize -> symbol # - # Same as sym.to_s.capitalize.intern. + # Same as `sym.to_s.capitalize.intern`. def capitalize (self.to_s.capitalize! || self).to_sym @@ -17,7 +17,7 @@ def capitalize # call-seq: # sym.downcase -> symbol # - # Same as sym.to_s.downcase.intern. + # Same as `sym.to_s.downcase.intern`. def downcase (self.to_s.downcase! || self).to_sym @@ -27,7 +27,7 @@ def downcase # call-seq: # sym.upcase -> symbol # - # Same as sym.to_s.upcase.intern. + # Same as `sym.to_s.upcase.intern`. def upcase (self.to_s.upcase! || self).to_sym @@ -37,7 +37,7 @@ def upcase # call-seq: # sym.casecmp(other) -> -1, 0, +1 or nil # - # Case-insensitive version of Symbol#<=>. + # Case-insensitive version of `Symbol#<=>`. def casecmp(other) return nil unless other.kind_of?(Symbol) @@ -69,4 +69,10 @@ def empty? self.length == 0 end + def slice *args + to_s.slice(*args) + end + + alias [] slice + end diff --git a/mrbgems/mruby-symbol-ext/src/symbol.c b/mrbgems/mruby-symbol-ext/src/symbol.c index 4340d74a3d..ed4521ea05 100644 --- a/mrbgems/mruby-symbol-ext/src/symbol.c +++ b/mrbgems/mruby-symbol-ext/src/symbol.c @@ -1,10 +1,8 @@ #include #include +#include #include #include -#ifdef MRB_USE_ALL_SYMBOLS -# include -#endif /* * call-seq: @@ -25,13 +23,13 @@ static mrb_value mrb_sym_all_symbols(mrb_state *mrb, mrb_value self) { - mrb_sym i, lim; mrb_value ary = mrb_ary_new_capa(mrb, mrb->symidx); - for (i=1; i<=MRB_PRESYM_MAX; i++) { + for (mrb_sym i=1; i<=MRB_PRESYM_MAX; i++) { mrb_ary_push(mrb, ary, mrb_symbol_value(i)); } - for (i=1, lim=mrb->symidx+1; isymidx + 1; + for (mrb_sym i=1; i integer * - * Same as sym.to_s.length. + * Same as `sym.to_s.length`. */ static mrb_value mrb_sym_length(mrb_state *mrb, mrb_value self) @@ -59,15 +57,19 @@ mrb_sym_length(mrb_state *mrb, mrb_value self) return mrb_fixnum_value(len); } +static const mrb_mt_entry symbol_ext_rom_entries[] = { + MRB_MT_ENTRY(mrb_sym_length, MRB_SYM(length), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_sym_length, MRB_SYM(size), MRB_ARGS_NONE()), +}; + void mrb_mruby_symbol_ext_gem_init(mrb_state* mrb) { struct RClass *s = mrb->symbol_class; #ifdef MRB_USE_ALL_SYMBOLS - mrb_define_class_method(mrb, s, "all_symbols", mrb_sym_all_symbols, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, s, MRB_SYM(all_symbols), mrb_sym_all_symbols, MRB_ARGS_NONE()); #endif - mrb_define_method(mrb, s, "length", mrb_sym_length, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "size", mrb_sym_length, MRB_ARGS_NONE()); + MRB_MT_INIT_ROM(mrb, s, symbol_ext_rom_entries); } void diff --git a/mrbgems/mruby-symbol-ext/test/symbol.rb b/mrbgems/mruby-symbol-ext/test/symbol.rb index a4fb27733b..2dd26467de 100644 --- a/mrbgems/mruby-symbol-ext/test/symbol.rb +++ b/mrbgems/mruby-symbol-ext/test/symbol.rb @@ -1,4 +1,3 @@ -# coding: utf-8 ## # Symbol(Ext) Test @@ -54,3 +53,15 @@ assert('Symbol#intern') do assert_equal :test, :test.intern end + +assert('Symbol#slice') do + assert_equal 'a', :abc.slice(0) + assert_equal 'ab', :abc.slice(0, 2) + assert_nil :abc.slice(4, 4) +end + +assert('Symbol#[]') do + assert_equal 'a', :abc[0] + assert_equal 'ab', :abc[0, 2] + assert_nil :abc[4, 4] +end diff --git a/mrbgems/mruby-task/README.md b/mrbgems/mruby-task/README.md new file mode 100644 index 0000000000..04a9881f75 --- /dev/null +++ b/mrbgems/mruby-task/README.md @@ -0,0 +1,857 @@ +# mruby-task + +mruby-task is an mrbgem that provides cooperative multitasking with preemptive +scheduling for mruby. It enables concurrent execution of multiple tasks within +a single mruby VM instance using a priority-based scheduler with tick-based +time slicing. + +## Purpose + +The primary purpose of `mruby-task` is to enable mruby applications to: + +- Execute multiple tasks concurrently within a single VM. +- Schedule tasks based on priority (0-255, where 0 is highest priority). +- Provide cooperative yielding with `Task.pass`. +- Support preemptive scheduling via timer-based interrupts. +- Synchronize tasks using `sleep`, `join` and `Task::Queue`. +- Suspend and resume tasks programmatically. +- Coordinate producers and consumers via `Task::Queue` without polling. + +## Architecture + +### Task Scheduler + +The scheduler uses four priority-sorted queues: + +- **DORMANT**: Tasks that have not started or have finished execution. +- **READY**: Tasks ready to run, ordered by priority. +- **WAITING**: Tasks waiting for sleep timeout or join completion. +- **SUSPENDED**: Tasks manually suspended via `#suspend`. + +### Tick-Based Preemption + +A platform-specific timer generates periodic ticks (default: 4ms). Each task +receives a timeslice (default: 3 ticks = 12ms) before being preempted. The +scheduler automatically switches to the next ready task when: + +- A task's timeslice expires. +- A task calls `sleep`, `Task.pass`, or `join`. +- A task finishes execution. + +## Functionality + +### Creating Tasks + +Tasks are created with `Task.new` and begin execution immediately: + +```ruby +# Create a task with default priority (128) +task = Task.new do + puts "Hello from task!" + sleep 1 + puts "Task resumed" +end + +# Create a named task with custom priority +task = Task.new(name: "worker", priority: 64) do + loop do + process_data + Task.pass # Yield to other tasks + end +end + +# Start the scheduler (blocks until all tasks complete or idle) +Task.run +``` + +### Task Class Methods + +- **`Task.new(name: nil, priority: 128) { block }`**: Creates and starts a new task. Lower priority values run first (0 is highest priority). The `name` parameter must be a String if provided. The `priority` must be an Integer between 0-255. + + ```ruby + task = Task.new(name: "background", priority: 200) do + # Task code here + end + ``` + +- **`Task.current`**: Returns the currently executing task. + + ```ruby + current = Task.current + puts "Running: #{current.name}" + ``` + +- **`Task.list`**: Returns an array of all tasks (including dormant tasks). + + ```ruby + Task.list.each do |task| + puts "#{task.name}: #{task.status}" + end + ``` + +- **`Task.pass`**: Cooperatively yields execution to other ready tasks. + + ```ruby + loop do + do_work + Task.pass # Let other tasks run + end + ``` + +- **`Task.get(name)`**: Finds a task by name. Returns `nil` if not found. + + ```ruby + worker = Task.get("worker") + worker.suspend if worker + ``` + +- **`Task.stat`**: Returns a hash containing scheduler statistics: + - `:tick` (Integer): Current tick count + - `:wakeup_tick` (Integer): Next scheduled wakeup tick + - `:dormant`, `:ready`, `:waiting`, `:suspended`: Each is a hash with: + - `:count` (Integer): Number of tasks in this queue + - `:tasks` (Array): Array of task objects in this queue + + ```ruby + stats = Task.stat + puts "Tick: #{stats[:tick]}" + puts "Ready tasks: #{stats[:ready][:count]}" + stats[:ready][:tasks].each { |t| puts t.name } + ``` + +- **`Task.run`**: Starts the scheduler main loop. Blocks until no tasks remain ready or waiting. + + ```ruby + Task.new { do_async_work } + Task.run # Run scheduler until tasks complete + ``` + +- **`Task.tick`**: Returns the current tick count in milliseconds. This is the elapsed time since the scheduler started, measured in tick units. + + ```ruby + start_tick = Task.tick + do_work + elapsed = Task.tick - start_tick + puts "Work took #{elapsed} ms" + ``` + +### Task Instance Methods + +- **`#status`**: Returns the task status as a symbol (`:DORMANT`, `:READY`, `:RUNNING`, `:WAITING`, `:SUSPENDED`). + + ```ruby + puts task.status # => :READY + ``` + +- **`#name`** / **`#name=`**: Get or set the task name. Returns `"(noname)"` for unnamed tasks. Note: `name=` accepts any value, but `Task.new` requires a String. + + ```ruby + task.name = "worker-1" + puts task.name # => "worker-1" + + task = Task.new { } + puts task.name # => "(noname)" + ``` + +- **`#priority`** / **`#priority=`**: Get or set the task priority (0-255). Changing priority requeues the task. + + ```ruby + task.priority = 100 # Lower priority + ``` + +- **`#suspend`**: Suspends the task, moving it to the SUSPENDED queue. The task will not run until `#resume` is called. + + ```ruby + task.suspend + # Later... + task.resume + ``` + +- **`#resume`**: Resumes a suspended task, moving it to the READY queue. + + ```ruby + task.resume + ``` + +- **`#terminate`**: Terminates the task immediately, moving it to DORMANT state. + + ```ruby + task.terminate + ``` + +- **`#join`**: Blocks the current task until the target task completes. + + ```ruby + worker = Task.new { do_long_operation } + worker.join # Wait for completion + puts "Worker finished" + ``` + +### Task::Error + +`Task::Error` is the base error class for queue-related errors. It inherits +from `StandardError`. + +```ruby +begin + q = Task::Queue.new + q.close + q.push(1) # raises Task::Error: queue closed +rescue Task::Error => e + puts e.message # => "queue closed" +end +``` + +Errors raised by `Task::Queue`: + +| Situation | Error class | Message | +| ---------------------------------- | ------------- | ---------------- | +| `push` on a closed queue | `Task::Error` | `"queue closed"` | +| `pop(true)` on an empty open queue | `Task::Error` | `"queue empty"` | + +Internal consistency errors (programming errors, not queue logic) still use +`RuntimeError`: + +| Situation | Error class | +| ------------------------------------------------------- | -------------- | +| Blocking `pop` called from root context | `RuntimeError` | +| Blocking `pop` called from inside a C function boundary | `RuntimeError` | + +### Task::Queue + +`Task::Queue` is a thread-safe FIFO queue for inter-task communication, analogous to CRuby's `Queue`. A task that calls `pop` on an empty queue is automatically moved to the WAITING state and rescheduled when another task pushes an item. +No polling or explicit sleep is required. + +#### Creating a Queue + +```ruby +q = Task::Queue.new +``` + +#### Pushing Items + +```ruby +q.push(item) # add to the back of the queue; raises Task::Error if closed +q << item # alias for push +q.enq(item) # alias for push +``` + +#### Popping Items + +```ruby +item = q.pop # block until an item is available +item = q.pop(true) # non-blocking: raises Task::Error if empty +item = q.deq # alias for pop +item = q.shift # alias for pop +``` + +Behavior when the queue is **closed**: + +- `pop` on a non-empty closed queue returns the remaining items normally. +- `pop` on an empty closed queue returns `nil` immediately (no blocking). + +#### Inspecting the Queue + +```ruby +q.size # => Integer: number of items currently in the queue +q.length # alias for size +q.empty? # => true if the queue has no items +q.num_waiting # => Integer: number of tasks currently blocked in pop +``` + +#### Clearing and Closing + +```ruby +q.clear # remove all items; returns self +q.close # close the queue; returns self +q.closed? # => true if the queue has been closed +``` + +After `close`: + +- `push` raises `Task::Error`. +- Tasks blocked in `pop` are woken and receive `nil` for an empty queue. +- `pop` on remaining items still returns them normally; returns `nil` when empty. + +#### Producer/Consumer Example + +```ruby +q = Task::Queue.new + +Task.new(name: "producer") do + 10.times do |i| + q.push(i) + sleep 0.1 + end + q.close +end + +Task.new(name: "consumer") do + loop do + item = q.pop # blocks until an item arrives or the queue closes + break if item.nil? + puts "got #{item}" + end +end + +Task.run +``` + +### Kernel Methods (Sleep) + +The task scheduler provides task-aware sleep methods that cooperatively yield +to other tasks: + +- **`sleep(seconds)`**: Sleeps for the specified duration. Accepts integers or floats (when `MRB_NO_FLOAT` is not defined). + + ```ruby + sleep 1 # Sleep for 1 second + sleep 0.5 # Sleep for 500ms (with float support) + sleep # Sleep indefinitely (no arguments) + ``` + +- **`usleep(microseconds)`**: Sleeps for the specified number of microseconds. + + ```ruby + usleep 500000 # Sleep for 500ms + usleep 1000 # Sleep for 1ms + ``` + +- **`sleep_ms(milliseconds)`**: Sleeps for the specified number of milliseconds. + + ```ruby + sleep_ms 100 # Sleep for 100ms + ``` + +**Note**: These methods override `mruby-sleep` when both gems are present. They +provide task-aware cooperative sleep when called from within a task, or +blocking sleep when called outside task context. + +## Configuration + +### Build Configuration + +Enable the task scheduler by including the gem in your build config: + +```ruby +MRuby::Build.new do |conf| + # ... other configuration ... + + conf.gem :core => 'mruby-task' + + # ... other gems ... +end +``` + +This automatically defines `MRB_USE_TASK_SCHEDULER`. + +### Timing Configuration + +Timing parameters can be configured via C defines: + +```c +#define MRB_TICK_UNIT 4 // Tick period in milliseconds (default: 4ms) +#define MRB_TIMESLICE_TICK_COUNT 3 // Ticks per timeslice (default: 3) +``` + +Default timeslice: `MRB_TICK_UNIT * MRB_TIMESLICE_TICK_COUNT = 12ms` + +### Stack Configuration + +Task stack and call info sizes: + +```c +#define TASK_STACK_INIT_SIZE 64 // Initial stack entries (default: 64) +#define TASK_CI_INIT_SIZE 8 // Initial callinfo entries (default: 8) +``` + +These grow automatically as needed, similar to Fiber. + +## Platform Requirements + +### HAL (Hardware Abstraction Layer) + +The task scheduler uses a Hardware Abstraction Layer (HAL) to support +different platforms. Platform-specific timer and interrupt handling +lives in `mruby-task/ports//task_hal.c`, and the active +port is selected at build configuration time. + +#### Built-in Ports + +**`ports/posix/`** - For POSIX systems (Linux, macOS, BSD, Unix) + +- Uses `SIGALRM` and `setitimer()` for the timer +- Uses `sigprocmask()` for interrupt protection +- Uses `SA_RESTART` to prevent `EINTR` on system calls +- Supports multiple VMs per process +- **WASM/Emscripten support**: When compiled with Emscripten + (`__EMSCRIPTEN__` defined), the SIGALRM timer is automatically + disabled. JavaScript handles tick calls via `setInterval`, + preventing double-increment of the tick counter + +**`ports/win/`** - For Windows + +- Uses the multimedia timer API (`timeSetEvent`/`timeKillEvent`) +- Uses `CRITICAL_SECTION` for interrupt protection +- Supports multiple VMs per process + +#### Port Selection + +The build system auto-detects `:posix` on Linux/macOS/BSD and `:win` +on Windows. For explicit control (cross-compilation, Cosmopolitan, +etc.) set `conf.ports` in your build configuration: + +```ruby +MRuby::Build.new do |conf| + conf.ports :posix # or :win, or your own port name + conf.gem core: 'mruby-task' +end +``` + +When `conf.ports` is set, the corresponding `ports//` +directory in every gem is compiled in; directories for other port +names are skipped. This is how `build_config/cosmopolitan.rb` reuses +the POSIX HAL on Cosmopolitan. + +**Multi-VM support:** + +- Both built-in ports support multiple `mrb_state` instances +- A single system timer ticks all registered VMs +- Maximum VMs: configurable via `MRB_TASK_MAX_VMS` (default: 8) + +#### Adding a New HAL + +To support a new platform (an RTOS, a UI runloop like GLib or Cocoa, +a bare-metal target), add a new directory +`mruby-task/ports//task_hal.c` and contribute it upstream. The +HAL must implement the six functions declared in +`mruby-task/include/task_hal.h`: + +```c +/** + * Initialize timer and register VM. + * Called during gem initialization. Must set up a periodic timer + * that calls mrb_tick(mrb) every MRB_TICK_UNIT milliseconds. + */ +void mrb_hal_task_init(mrb_state *mrb); + +/** + * Cleanup timer and unregister VM. + * Called during gem finalization. + */ +void mrb_hal_task_final(mrb_state *mrb); + +/** + * Enable timer interrupts (exit critical section). + * Must be reentrant for nested calls. + */ +void mrb_task_enable_irq(void); + +/** + * Disable timer interrupts (enter critical section). + * Must be reentrant for nested calls. + */ +void mrb_task_disable_irq(void); + +/** + * Put CPU in low-power/idle mode. + * Called when no tasks are ready but some are waiting; should sleep + * roughly MRB_TICK_UNIT milliseconds and allow the timer to fire. + */ +void mrb_hal_task_idle_cpu(mrb_state *mrb); + +/** + * Sleep for the given number of microseconds of wall-clock time. + * Must allow timer interrupts/callbacks during the sleep and should + * complete the full duration even if interrupted. + */ +void mrb_hal_task_sleep_us(mrb_state *mrb, mrb_int usec); +``` + +Users selecting your port set `conf.ports :`; the built-in +POSIX and Windows ports are then skipped, so there is no symbol +clash. See `mruby-task/ports/posix/task_hal.c` and +`mruby-task/ports/win/task_hal.c` for reference implementations. + +## C API + +The task scheduler provides a C API for integrating with C code and embedding environments. All exported functions are marked with `MRB_API` for external linkage. + +### Core Scheduler API + +```c +/* Tick handler - called by timer interrupt */ +MRB_API void mrb_tick(mrb_state *mrb); + +/* Main scheduler loop - blocks until all tasks complete */ +MRB_API mrb_value mrb_task_run(mrb_state *mrb); + +/* Single-step task execution for event loop integration */ +MRB_API mrb_value mrb_task_run_once(mrb_state *mrb); +``` + +**`mrb_task_run_once()`** executes one ready task and returns. This is designed for WASM/JavaScript event loop integration where the scheduler should yield control back to the browser between task executions. + +### Task Creation API + +```c +/* Create a task from a proc */ +MRB_API mrb_value mrb_create_task(mrb_state *mrb, struct RProc *proc, + mrb_value name, mrb_value priority, + mrb_value top_self); +``` + +Creates a new task from a `RProc` object. The `name` should be a String or `mrb_nil_value()`, `priority` should be an Integer (0-255) or `mrb_nil_value()` for default priority (128), and `top_self` sets the task's self object (or `mrb_nil_value()` to use default). + +### Task Control API + +```c +/* Suspend a task - prevents it from running until resumed */ +MRB_API void mrb_suspend_task(mrb_state *mrb, mrb_value task); + +/* Resume a suspended task - moves it back to ready/waiting queue */ +MRB_API void mrb_resume_task(mrb_state *mrb, mrb_value task); + +/* Terminate a task immediately - moves to dormant state */ +MRB_API void mrb_terminate_task(mrb_state *mrb, mrb_value task); + +/* Stop a task - marks as stopped without moving to dormant */ +MRB_API mrb_bool mrb_stop_task(mrb_state *mrb, mrb_value task); + +/* Get task result value */ +MRB_API mrb_value mrb_task_value(mrb_state *mrb, mrb_value task); + +/* Get task status symbol */ +MRB_API mrb_value mrb_task_status(mrb_state *mrb, mrb_value task); +``` + +**Note**: These functions raise `E_RUNTIME_ERROR` if called during synchronous execution (when `scheduler_lock > 0`). + +### Synchronous Execution API + +```c +/* Execute a proc synchronously without context switching */ +MRB_API mrb_value mrb_execute_proc_synchronously(mrb_state *mrb, + mrb_value proc, + mrb_int argc, + const mrb_value *argv); +``` + +This function creates a temporary task, executes it to completion, and returns the result. During execution, the scheduler is locked (`scheduler_lock++`), preventing any asynchronous task operations. This is designed for **picoruby-wasm** to execute Ruby code synchronously from JavaScript without triggering task switches. + +**Key characteristics**: + +- Blocks until the proc completes execution +- No context switching occurs during execution +- Other tasks cannot be created, suspended, or resumed while locked +- Temporary task is automatically freed after execution +- If the proc raises an exception, it's returned as the result + +### Task Context Management API + +```c +/* Initialize task context with a new proc */ +MRB_API void mrb_task_init_context(mrb_state *mrb, mrb_value task, + struct RProc *proc); + +/* Reset task context to initial state */ +MRB_API void mrb_task_reset_context(mrb_state *mrb, mrb_value task); + +/* Set proc for task (without full reinitialization) */ +MRB_API void mrb_task_proc_set(mrb_state *mrb, mrb_value task, + struct RProc *proc); +``` + +These functions are designed for **picoruby-sandbox** to reuse task objects for multiple executions without reallocating memory. `mrb_task_init_context()` fully reinitializes the context, while `mrb_task_proc_set()` only updates the proc pointer. + +### Example: Event Loop Integration (WASM) + +```c +/* JavaScript calls this function periodically via setInterval */ +void js_tick_callback(void) { + mrb_tick(mrb); +} + +/* Main loop - called from JavaScript event loop */ +mrb_value js_run_task_once(void) { + return mrb_task_run_once(mrb); +} + +/* Execute Ruby code synchronously from JavaScript */ +mrb_value js_eval_sync(const char *code) { + struct RProc *proc = mrb_generate_code(mrb, code); + return mrb_execute_proc_synchronously(mrb, mrb_obj_value(proc), 0, NULL); +} +``` + +### Example: Task Creation from C + +```c +/* Create a background task */ +static mrb_value my_background_proc(mrb_state *mrb, mrb_value self) { + /* Task code here */ + return mrb_nil_value(); +} + +void create_background_task(mrb_state *mrb) { + struct RProc *proc = mrb_proc_new_cfunc(mrb, my_background_proc); + mrb_value name = mrb_str_new_cstr(mrb, "background"); + mrb_value priority = mrb_fixnum_value(128); + mrb_value task = mrb_create_task(mrb, proc, name, priority, mrb_nil_value()); +} +``` + +## Examples + +### Basic Multitasking + +```ruby +Task.new(name: "task1") do + 3.times do |i| + puts "Task 1: #{i}" + sleep 0.1 + end +end + +Task.new(name: "task2") do + 3.times do |i| + puts "Task 2: #{i}" + sleep 0.1 + end +end + +Task.run # Run until both tasks complete +``` + +### Priority Scheduling + +```ruby +# High priority task (runs first) +Task.new(priority: 0) do + puts "High priority" + sleep 0.1 +end + +# Low priority task (runs after high priority yields) +Task.new(priority: 255) do + puts "Low priority" +end + +Task.run +``` + +### Cooperative Yielding + +```ruby +Task.new(name: "cooperative") do + loop do + do_some_work + Task.pass # Yield to other tasks + break if done? + end +end + +Task.new(name: "other") do + do_other_work +end + +Task.run +``` + +### Task Synchronization + +```ruby +worker = Task.new(name: "worker") do + puts "Working..." + sleep 1 + puts "Work done" + 42 # Return value +end + +Task.new(name: "main") do + puts "Waiting for worker..." + worker.join + puts "Worker completed!" +end + +Task.run +``` + +### Task Control + +```ruby +task = Task.new do + loop do + puts "Running..." + sleep 0.5 + end +end + +# From another task or after Task.run returns: +task.suspend # Pause execution +sleep 1 +task.resume # Resume execution +sleep 1 +task.terminate # Stop permanently +``` + +### Producer/Consumer with Task::Queue + +Using `Task::Queue` eliminates polling. The consumer blocks on `pop` and wakes +automatically when the producer pushes an item. + +```ruby +q = Task::Queue.new +results = [] + +Task.new(name: "producer") do + ["a", "b", "c"].each do |v| + q.push(v) + sleep 0.1 + end + q.close +end + +Task.new(name: "consumer") do + loop do + item = q.pop # blocks here until an item is available or the queue closes + break if item.nil? + results << item + end +end + +Task.run +puts results.inspect # => ["a", "b", "c"] +``` + +Multiple producers and consumers work naturally: + +```ruby +q = Task::Queue.new + +3.times { |i| Task.new { q.push(i) } } + +received = [] +3.times { Task.new { received << q.pop } } + +Task.run +puts received.sort.inspect # => [0, 1, 2] +``` + +## Limitations and Compatibility + +### Relationship with Fiber + +Tasks and Fibers both use `mrb_context` but are **not compatible**: + +- Tasks are scheduled automatically by the preemptive scheduler. +- Fibers require explicit `Fiber.yield` and `resume` calls. +- Do not mix Tasks and Fibers in the same application. + +### Relationship with mruby-sleep + +When `mruby-task` is enabled: + +- The `sleep`, `usleep`, and `sleep_ms` methods are task-aware. +- Inside a task, they cooperatively yield to the scheduler. +- Outside a task (or when scheduler is idle), they block. +- `mruby-sleep` should be excluded from your build when using `mruby-task`. + +### Thread Safety + +The task scheduler is **not thread-safe**. All tasks run in a single OS thread. +For multi-core concurrency, use OS threads with separate mruby VMs per thread. + +### Exceptions + +Uncaught exceptions in a task will terminate that task but not affect other +tasks. The exception is not propagated to the scheduler. + +### GC Integration + +Task contexts are registered with the garbage collector. Tasks and their +stacks/callinfo are properly marked and freed. + +## Testing + +The gem includes tests that verify: + +- Task creation and execution +- Priority scheduling +- Sleep and wakeup +- Join synchronization +- Suspend and resume +- Task.pass cooperative yielding +- Task::Queue push/pop FIFO order +- Task::Queue blocking pop and wakeup on push +- Task::Queue close semantics +- Task::Queue num_waiting count + +Run tests with: + +```bash +rake CONFIG=host-debug test:lib +``` + +## Implementation Details + +### Task States + +Each task can be in one of five states: + +- `DORMANT (0x00)`: Not started or finished +- `READY (0x02)`: Ready to run +- `RUNNING (0x03)`: Currently executing +- `WAITING (0x04)`: Waiting (sleep, join, queue) +- `SUSPENDED (0x08)`: Manually suspended + +### Wait Reasons + +When a task is in WAITING state, the reason indicates why: + +- `NONE (0x00)`: No specific reason +- `SLEEP (0x01)`: Sleeping for time +- `MUTEX (0x02)`: Waiting for mutex (reserved, not yet implemented) +- `JOIN (0x04)`: Waiting for another task +- `QUEUE (0x08)`: Waiting for an item to be pushed to a `Task::Queue` + +### Scheduler Algorithm + +1. Get the highest-priority task from the READY queue +2. Set task status to RUNNING and switch context (`mrb->c = &task->c`) +3. Execute task via `mrb_vm_exec()` until: + - Task yields (sleep, pass, join) + - Timeslice expires (preemption) + - Task completes or terminates +4. Handle completion (wake joined tasks, move to DORMANT) +5. Run incremental GC if needed +6. Requeue task to READY (if still running) or appropriate queue +7. Repeat from step 1 + +### Scheduler Lock + +The scheduler includes a lock counter (`mrb->task.scheduler_lock`) that prevents asynchronous task operations during synchronous execution: + +- When `scheduler_lock > 0`, asynchronous APIs (`mrb_create_task`, `mrb_suspend_task`, `mrb_resume_task`) raise `E_RUNTIME_ERROR` +- This ensures that synchronous execution (via `mrb_execute_proc_synchronously`) completes without interference +- The lock is incremented when entering synchronous execution and decremented when exiting +- Maximum lock depth is 254 to prevent overflow + +## Future Enhancements + +Planned features not yet implemented: + +- **Mutex support**: Thread-safe synchronization primitives +- **Task::SizedQueue**: Bounded queue with backpressure (push blocks when full) +- **Task.raise**: Throw exceptions to other tasks +- **Task#value**: Retrieve task return value (like Thread#value) +- **Per-task timeslice configuration** + +## License + +MIT License (same as mruby) + +## See Also + +- `mruby-fiber`: Cooperative fibers with manual control +- `mruby-sleep`: Blocking sleep (superseded by mruby-task) diff --git a/mrbgems/mruby-task/examples/inspection.rb b/mrbgems/mruby-task/examples/inspection.rb new file mode 100644 index 0000000000..08068f5359 --- /dev/null +++ b/mrbgems/mruby-task/examples/inspection.rb @@ -0,0 +1,65 @@ +# Task Inspection Example +# Demonstrates task status and inspect methods + +puts "=== Task Inspection Demo ===" +puts + +# Create some tasks +worker1 = Task.new(name: "worker-1") do + puts "Worker 1: Starting" + puts "Worker 1: My status is #{Task.current.status}" + puts "Worker 1: My inspect is #{Task.current.inspect}" + sleep 0.5 + puts "Worker 1: Finished" +end + +worker2 = Task.new(name: "worker-2") do + sleep 1 + puts "Worker 2: Finished" +end + +# Inspect before running +puts "Initial task states:" +puts " worker1: #{worker1.inspect}" +puts " status=#{worker1.status}" +puts " worker2: #{worker2.inspect}" +puts " status=#{worker2.status}" +puts + +# Create an observer task +observer = Task.new(name: "observer") do + sleep 0.2 + + puts "\nObserver checking task states:" + puts " worker1: #{worker1.inspect}" + puts " status=#{worker1.status}" + puts " worker2: #{worker2.inspect}" + puts " status=#{worker2.status}" + + sleep 0.5 + + # Suspend worker2 + worker2.suspend + puts "\nObserver suspended worker2:" + puts " worker2: #{worker2.inspect}" + puts " status=#{worker2.status}" + + sleep 0.3 + + # Resume and terminate + worker2.resume + sleep 0.1 + worker2.terminate + + puts "\nObserver terminated worker2:" + puts " worker2: #{worker2.inspect}" + puts " status=#{worker2.status}" +end + +# Run the scheduler +Task.run + +puts "\n=== Final states ===" +puts " worker1: #{worker1.inspect} status=#{worker1.status}" +puts " worker2: #{worker2.inspect} status=#{worker2.status}" +puts " observer: #{observer.inspect} status=#{observer.status}" diff --git a/mrbgems/mruby-task/examples/priority.rb b/mrbgems/mruby-task/examples/priority.rb new file mode 100644 index 0000000000..ee3ec97ed5 --- /dev/null +++ b/mrbgems/mruby-task/examples/priority.rb @@ -0,0 +1,41 @@ +# Priority Scheduling Example +# Lower priority values = higher priority (0 is highest) + +puts "=== Priority Scheduling Demo ===" +puts + +# Create tasks with different priorities +low_priority = Task.new(name: "low-priority", priority: 200) do + 5.times do |i| + puts " [Low Priority] iteration #{i}" + sleep 0.1 + end +end + +high_priority = Task.new(name: "high-priority", priority: 50) do + 5.times do |i| + puts "[High Priority] iteration #{i}" + sleep 0.1 + end +end + +medium_priority = Task.new(name: "medium-priority", priority: 128) do + 5.times do |i| + puts " [Medium Priority] iteration #{i}" + sleep 0.1 + end +end + +puts "Created 3 tasks with different priorities:" +puts " High: priority=50" +puts " Medium: priority=128" +puts " Low: priority=200" +puts +puts "Tasks will run in priority order (highest first)" +puts + +# Run the scheduler +Task.run + +puts +puts "=== All tasks completed ===" diff --git a/mrbgems/mruby-task/examples/producer_consumer.rb b/mrbgems/mruby-task/examples/producer_consumer.rb new file mode 100644 index 0000000000..4a1427f8b4 --- /dev/null +++ b/mrbgems/mruby-task/examples/producer_consumer.rb @@ -0,0 +1,58 @@ +# Producer/Consumer Example +# Demonstrates task coordination using shared state + +puts "=== Producer/Consumer Demo ===" +puts + +# Shared buffer +$buffer = [] +$max_items = 10 +$produced = 0 + +# Producer task +producer = Task.new(name: "producer") do + $max_items.times do |i| + # Produce an item + item = "item-#{i}" + $buffer << item + $produced += 1 + puts "Producer: created #{item} (buffer size: #{$buffer.size})" + + sleep 0.3 + end + puts "Producer: finished producing #{$max_items} items" +end + +# Consumer task +consumer = Task.new(name: "consumer") do + consumed = 0 + + while consumed < $max_items + if $buffer.empty? + puts "Consumer: buffer empty, waiting..." + sleep 0.2 + else + item = $buffer.shift + consumed += 1 + puts "Consumer: consumed #{item} (#{consumed}/#{$max_items})" + sleep 0.5 + end + end + + puts "Consumer: finished consuming #{consumed} items" +end + +# Monitor task +monitor = Task.new(name: "monitor") do + loop do + sleep 1 + puts "Monitor: produced=#{$produced}, buffer=#{$buffer.size}, remaining=#{$max_items - $produced}" + break if $produced >= $max_items && $buffer.empty? + end +end + +# Run the scheduler +Task.run + +puts "\n=== All tasks completed ===" +puts "Final state: produced=#{$produced}, buffer=#{$buffer.size}" diff --git a/mrbgems/mruby-task/examples/queue.rb b/mrbgems/mruby-task/examples/queue.rb new file mode 100644 index 0000000000..0308de8bff --- /dev/null +++ b/mrbgems/mruby-task/examples/queue.rb @@ -0,0 +1,54 @@ +# Task::Queue Example +# Demonstrates producer/consumer coordination using Task::Queue. +# No polling required - consumers block until items are available. + +puts "=== Task::Queue Producer/Consumer Demo ===" +puts + +TOTAL_ITEMS = 10 +q = Task::Queue.new +produced = 0 +consumed = 0 + +producer = Task.new(name: "producer") do + TOTAL_ITEMS.times do |i| + item = "item-#{i}" + q.push(item) + produced += 1 + puts "Producer: pushed #{item}" + sleep 0.1 + end + q.close + puts "Producer: closed queue after #{produced} items" +end + +consumer1 = Task.new(name: "consumer-1") do + loop do + item = q.pop # blocks until an item is available or queue closes + break if item.nil? + consumed += 1 + puts "Consumer-1: got #{item}" + sleep 0.15 + end + puts "Consumer-1: done" +end + +consumer2 = Task.new(name: "consumer-2") do + loop do + item = q.pop + break if item.nil? + consumed += 1 + puts "Consumer-2: got #{item}" + sleep 0.2 + end + puts "Consumer-2: done" +end + +Task.run + +puts +puts "=== Summary ===" +puts "Produced: #{produced}" +puts "Consumed: #{consumed}" +puts "Queue size: #{q.size}" +puts "Queue closed: #{q.closed?}" diff --git a/mrbgems/mruby-task/examples/simple.rb b/mrbgems/mruby-task/examples/simple.rb new file mode 100644 index 0000000000..52a8de5719 --- /dev/null +++ b/mrbgems/mruby-task/examples/simple.rb @@ -0,0 +1,27 @@ +# Simple Task Example +# Basic usage of mruby-task + +puts "=== Simple Task Demo ===" +puts + +# Create a simple task +Task.new(name: "hello") do + 3.times do |i| + puts "Hello from task, iteration #{i}" + sleep 0.5 + end +end + +# Create another task +Task.new(name: "world") do + 3.times do |i| + puts "World from task, iteration #{i}" + sleep 0.7 + end +end + +# Run the scheduler - blocks until all tasks complete +puts "Starting tasks..." +Task.run + +puts "All tasks completed!" diff --git a/mrbgems/mruby-task/examples/statistics.rb b/mrbgems/mruby-task/examples/statistics.rb new file mode 100644 index 0000000000..3fd4fa98aa --- /dev/null +++ b/mrbgems/mruby-task/examples/statistics.rb @@ -0,0 +1,59 @@ +# Task Statistics Example +# Demonstrates Task.stat for monitoring scheduler state + +def print_stats(label) + stat = Task.stat + puts "\n#{label}" + puts " Tick: #{stat[:tick]}, Next wakeup: #{stat[:wakeup_tick]}" + puts " Ready: #{stat[:ready][:count]} tasks" + puts " Waiting: #{stat[:waiting][:count]} tasks" + puts " Suspended: #{stat[:suspended][:count]} tasks" + puts " Dormant: #{stat[:dormant][:count]} tasks" +end + +puts "=== Task Statistics Demo ===" + +print_stats("Initial state:") + +# Create several tasks +workers = [] +5.times do |i| + workers << Task.new(name: "worker-#{i}") do + sleep 0.5 + puts "Worker #{i} completed" + end +end + +print_stats("After creating 5 tasks:") + +# Create a monitor task +monitor = Task.new(name: "monitor") do + sleep 0.1 + print_stats("Monitor: After 0.1s:") + + # Suspend a couple tasks + workers[2].suspend + workers[3].suspend + + print_stats("Monitor: After suspending 2 tasks:") + + sleep 0.5 + + print_stats("Monitor: After 0.5s more:") + + # Resume suspended tasks + workers[2].resume + workers[3].resume + + print_stats("Monitor: After resuming tasks:") + + # Wait for all workers + workers.each(&:join) +end + +# Run the scheduler +Task.run + +print_stats("Final state:") + +puts "\n=== Demo complete ===" diff --git a/mrbgems/mruby-task/examples/suspend_resume.rb b/mrbgems/mruby-task/examples/suspend_resume.rb new file mode 100644 index 0000000000..c90832ecc6 --- /dev/null +++ b/mrbgems/mruby-task/examples/suspend_resume.rb @@ -0,0 +1,47 @@ +# Suspend and Resume Example +# Demonstrates manual task control + +puts "=== Suspend/Resume Demo ===" +puts + +$counter = 0 + +# Create a worker task +worker = Task.new(name: "worker") do + 10.times do |i| + puts "Worker: iteration #{i}, counter=#{$counter}" + $counter += 1 + sleep 0.2 + end + puts "Worker: finished!" +end + +# Create a controller task +controller = Task.new(name: "controller") do + sleep 0.5 + + puts "\nController: Suspending worker..." + worker.suspend + puts "Controller: Worker status = #{worker.status}" + + sleep 1 + + puts "\nController: Incrementing counter while worker is suspended..." + 5.times do + $counter += 10 + puts "Controller: counter=#{$counter}" + sleep 0.2 + end + + puts "\nController: Resuming worker..." + worker.resume + puts "Controller: Worker status = #{worker.status}" + + # Wait for worker to finish + worker.join +end + +# Run the scheduler +Task.run + +puts "\n=== Final counter value: #{$counter} ===" diff --git a/mrbgems/mruby-task/examples/task_pass.rb b/mrbgems/mruby-task/examples/task_pass.rb new file mode 100644 index 0000000000..3a629f1d64 --- /dev/null +++ b/mrbgems/mruby-task/examples/task_pass.rb @@ -0,0 +1,21 @@ +$global = 0 +task = Task.new do + 10.times do |i| + puts "Task #{i} is running" + $global += 1 + sleep 1 + end +end + +while true + sleep 0.5 + if $global < 6 + puts "Global variable is less than 6, waiting..." + Task.pass + else + break + end +end + +puts "Global variable reached 6, exiting loop" +Task.run diff --git a/mrbgems/mruby-task/include/task.h b/mrbgems/mruby-task/include/task.h new file mode 100644 index 0000000000..6a464eb9c1 --- /dev/null +++ b/mrbgems/mruby-task/include/task.h @@ -0,0 +1,171 @@ +/* +** task.h - Task scheduler +** +** See Copyright Notice in mruby.h +*/ + +#ifndef MRUBY_TASK_H +#define MRUBY_TASK_H + +#include + +/* + * Task status values (bit-mapped) + */ +enum { + MRB_TASK_STATUS_DORMANT = 0x00, /* Not started or finished */ + MRB_TASK_STATUS_READY = 0x02, /* Ready to run */ + MRB_TASK_STATUS_RUNNING = 0x03, /* Currently executing */ + MRB_TASK_STATUS_WAITING = 0x04, /* Waiting for condition */ + MRB_TASK_STATUS_SUSPENDED = 0x08, /* Manually suspended */ +}; + +/* + * Task wait reason + */ +enum { + MRB_TASK_REASON_NONE = 0x00, /* No specific reason */ + MRB_TASK_REASON_SLEEP = 0x01, /* Sleeping for time */ + MRB_TASK_REASON_MUTEX = 0x02, /* Waiting for mutex (reserved) */ + MRB_TASK_REASON_JOIN = 0x04, /* Waiting for another task */ + MRB_TASK_REASON_QUEUE = 0x08, /* Waiting for queue item */ +}; + +struct mrb_task_queue; + +/* + * Task structure - represents a single task in the scheduler + * + * Memory-optimized layout: + * - Removed priority_preemption (always equals priority): 1 byte + * - Removed started flag (inferred from c.status): 1 byte + * - Unified wakeup_tick/join/mutex into single union: 4 bytes + * - Removed redundant proc field (stored in c.ci->proc): 8 bytes + * Total savings: ~18 bytes per task (14% reduction) + */ +typedef struct mrb_task { + struct mrb_task *next; /* Linked list pointer */ + uint8_t priority; /* Priority (0-255, 0=highest) */ + uint8_t status; /* Current status (TASKSTATUS enum) */ + uint8_t reason; /* Wait reason (TASKREASON enum) */ + volatile uint8_t timeslice; /* Remaining ticks while RUNNING */ + mrb_value name; /* Optional task name */ + + /* Wait-specific data - mutually exclusive based on reason field */ + union { + uint32_t wakeup_tick; /* Tick count to wake up (REASON_SLEEP) */ + const struct mrb_task *join; /* Task being waited on (REASON_JOIN) */ + void *mutex; /* Mutex pointer (REASON_MUTEX, reserved) */ + struct mrb_task_queue *queue; /* Queue being waited on (REASON_QUEUE) */ + } wait; + + mrb_value self; /* Ruby Task object reference */ + + mrb_value result; /* Task return value */ + + struct mrb_context c; /* Execution context (stack, callinfo, etc) */ +} mrb_task; + +/* + * Task queue configuration + * (mrb_task_state is defined in mruby.h) + */ +#define MRB_NUM_TASK_QUEUE 4 + +/* Queue indices */ +#define MRB_TASK_QUEUE_DORMANT 0 +#define MRB_TASK_QUEUE_READY 1 +#define MRB_TASK_QUEUE_WAITING 2 +#define MRB_TASK_QUEUE_SUSPENDED 3 + +/* Configuration */ +#ifndef MRB_TICK_UNIT +#define MRB_TICK_UNIT 4 /* Tick period in milliseconds */ +#endif + +#ifndef MRB_TIMESLICE_TICK_COUNT +#define MRB_TIMESLICE_TICK_COUNT 3 /* Number of ticks per timeslice */ +#endif + +#define TASK_STACK_INIT_SIZE 64 /* Initial task stack size */ +#define TASK_CI_INIT_SIZE 4 /* Initial task callinfo size */ + +/* + * HAL (Hardware Abstraction Layer) functions are declared in task_hal.h. + */ + +/* + * GC integration + */ +void mrb_task_mark_all(mrb_state *mrb); + +/* + * Core task scheduler API + */ +MRB_API void mrb_tick(mrb_state *mrb); +MRB_API mrb_value mrb_task_run(mrb_state *mrb); +MRB_API mrb_value mrb_task_run_once(mrb_state *mrb); + +/* + * Task creation API + */ +MRB_API mrb_value mrb_create_task(mrb_state *mrb, struct RProc *proc, mrb_value name, mrb_value priority, mrb_value top_self); + +/* + * Synchronous execution API (for picoruby-wasm) + */ +MRB_API mrb_value mrb_execute_proc_synchronously(mrb_state *mrb, mrb_value proc, mrb_int argc, const mrb_value *argv); + +/* + * Task control API + * Note: mrb_task_run is the main scheduler loop (for picoruby-sandbox and picoruby-wasm) + */ +MRB_API void mrb_suspend_task(mrb_state *mrb, mrb_value task); +MRB_API void mrb_resume_task(mrb_state *mrb, mrb_value task); +MRB_API void mrb_terminate_task(mrb_state *mrb, mrb_value task); +MRB_API mrb_bool mrb_stop_task(mrb_state *mrb, mrb_value task); +MRB_API mrb_value mrb_task_value(mrb_state *mrb, mrb_value task); +MRB_API mrb_value mrb_task_status(mrb_state *mrb, mrb_value self); + +/* + * Task context management API (for picoruby-sandbox) + */ +MRB_API void mrb_task_init_context(mrb_state *mrb, mrb_value task, struct RProc *proc); +MRB_API void mrb_task_reset_context(mrb_state *mrb, mrb_value task); +MRB_API void mrb_task_proc_set(mrb_state *mrb, mrb_value task, struct RProc *proc); + +/* + * Internal helpers - used by task.c and task_queue.c + */ +#include +#include "task_hal.h" + +/* Scheduler state accessors (require a local mrb variable in scope) */ +#define q_dormant_ (mrb->task.queues[MRB_TASK_QUEUE_DORMANT]) +#define q_ready_ (mrb->task.queues[MRB_TASK_QUEUE_READY]) +#define q_waiting_ (mrb->task.queues[MRB_TASK_QUEUE_WAITING]) +#define q_suspended_ (mrb->task.queues[MRB_TASK_QUEUE_SUSPENDED]) +#define tick_ (mrb->task.tick) +#define wakeup_tick_ (mrb->task.wakeup_tick) +#define switching_ (mrb->task.switching) + +/* Recover the mrb_task that owns the current mruby context */ +#define MRB2TASK(mrb) ((mrb_task *)((uint8_t *)(mrb)->c - offsetof(mrb_task, c))) + +/* Raise if the scheduler is locked (synchronous execution in progress) */ +static inline void +task_check_scheduler_lock(mrb_state *mrb) +{ + if (mrb->task.scheduler_lock > 0) { + mrb_raise(mrb, E_RUNTIME_ERROR, "Cannot use asynchronous Task API during synchronous execution"); + } +} + +/* Priority-queue insert/delete - defined in task.c */ +void mrb_task_q_insert(mrb_state *mrb, mrb_task *t); +void mrb_task_q_delete(mrb_state *mrb, mrb_task *t); + +/* Task::Queue class registration - defined in task_queue.c */ +void mrb_init_task_queue(mrb_state *mrb, struct RClass *task_class); + +#endif /* MRUBY_TASK_H */ diff --git a/mrbgems/mruby-task/include/task_hal.h b/mrbgems/mruby-task/include/task_hal.h new file mode 100644 index 0000000000..b79827ed29 --- /dev/null +++ b/mrbgems/mruby-task/include/task_hal.h @@ -0,0 +1,162 @@ +/* +** task_hal.h - Task scheduler Hardware Abstraction Layer (HAL) +** +** See Copyright Notice in mruby.h +** +** This header defines the HAL interface that platform-specific implementations +** must provide. The HAL separates platform-specific timer and interrupt handling +** from the core task scheduler logic. +*/ + +#ifndef MRUBY_TASK_HAL_H +#define MRUBY_TASK_HAL_H + +#include + +/* + * Configuration - can be overridden in platform-specific build configs + */ + +/* Tick period in milliseconds - how often the timer fires */ +#ifndef MRB_TICK_UNIT +#define MRB_TICK_UNIT 4 +#endif + +/* Number of timer ticks per task timeslice */ +#ifndef MRB_TIMESLICE_TICK_COUNT +#define MRB_TIMESLICE_TICK_COUNT 3 +#endif + +/* Maximum number of concurrent mrb_state instances with task scheduler */ +#ifndef MRB_TASK_MAX_VMS +#define MRB_TASK_MAX_VMS 8 +#endif + +/* + * HAL Interface Functions + * + * The port directory under mruby-task/ports// provides + * these functions. See README.md for adding a new port. + */ + +/** + * Initialize hardware timer and interrupt system + * + * Called once during mruby-task gem initialization. Should set up a periodic + * timer that calls mrb_tick(mrb) every MRB_TICK_UNIT milliseconds. + * + * Requirements: + * - Initialize platform-specific timer hardware/APIs + * - Set up timer to fire every MRB_TICK_UNIT milliseconds + * - Register mrb_state for multi-VM support if needed + * - Initialize interrupt protection mechanisms (mutexes, signal masks, etc.) + * - Timer should call mrb_tick() on each tick for all registered VMs + * + * @param mrb The mruby state to associate with the timer + */ +void mrb_hal_task_init(mrb_state *mrb); + +/** + * Cleanup timer and interrupt resources + * + * Called during mruby-task gem finalization. Should clean up all resources + * allocated by mrb_hal_task_init(). + * + * Requirements: + * - Stop and cleanup platform timer + * - Unregister mrb_state from multi-VM support + * - Free any allocated HAL resources + * - If last VM, cleanup global HAL state + * + * @param mrb The mruby state to disassociate from the timer + */ +void mrb_hal_task_final(mrb_state *mrb); + +/** + * Enable timer interrupts (exit critical section) + * + * Called by the task scheduler when it's safe to allow timer interrupts. + * Should enable timer interrupts/callbacks that were disabled by + * mrb_task_disable_irq(). + * + * Requirements: + * - Must be reentrant (can be called multiple times) + * - Should use nesting counter or equivalent for nested critical sections + * - On POSIX: unmask signals + * - On Windows: leave critical section + * - On embedded: enable timer interrupts + */ +void mrb_task_enable_irq(void); + +/** + * Disable timer interrupts (enter critical section) + * + * Called by the task scheduler before modifying shared task state. + * Should disable timer interrupts/callbacks to prevent concurrent access. + * + * Requirements: + * - Must be reentrant (can be called multiple times) + * - Should use nesting counter or equivalent for nested critical sections + * - On POSIX: block signals + * - On Windows: enter critical section + * - On embedded: disable timer interrupts + */ +void mrb_task_disable_irq(void); + +/** + * Put CPU in low-power/idle mode + * + * Called by the scheduler when no tasks are ready to run but some tasks + * are waiting or suspended. Should briefly idle the CPU or sleep for + * approximately MRB_TICK_UNIT milliseconds to allow timer to fire. + * + * Requirements: + * - Should return when timer fires or after ~MRB_TICK_UNIT milliseconds + * - Must allow timer interrupts to occur during idle + * - On POSIX: usleep() or nanosleep() + * - On Windows: Sleep() + * - On embedded: platform-specific sleep/wait-for-interrupt instruction + * + * @param mrb The mruby state (for context, may be unused) + */ +void mrb_hal_task_idle_cpu(mrb_state *mrb); + +/** + * Sleep for specified microseconds in wall-clock time + * + * Called by sleep functions when in root context (not in a task). + * Should sleep for the specified number of microseconds in real wall-clock + * time, allowing timer interrupts to occur during the sleep. + * + * Requirements: + * - Sleep for approximately usec microseconds in wall-clock time + * - Must allow timer interrupts/callbacks during sleep + * - Should handle interruptions gracefully and complete full sleep duration + * - On POSIX: use clock_gettime + nanosleep loop for accuracy + * - On Windows: use Sleep() with millisecond conversion + * - On embedded: platform-specific delay with interrupt support + * + * @param mrb The mruby state (for context, may be unused) + * @param usec Number of microseconds to sleep + */ +void mrb_hal_task_sleep_us(mrb_state *mrb, mrb_int usec); + +/* + * Core scheduler function (implemented in task.c, called by HAL) + */ + +/** + * Tick handler - advances scheduler time and wakes sleeping tasks + * + * HAL timer callback must call this function every MRB_TICK_UNIT milliseconds + * for each registered mrb_state. This function: + * - Increments the global tick counter + * - Decrements running task's timeslice + * - Wakes tasks whose sleep time has expired + * - Triggers context switches when needed + * + * @param mrb The mruby state to tick + */ +void mrb_tick(mrb_state *mrb); + +#endif /* MRUBY_TASK_HAL_H */ diff --git a/mrbgems/mruby-task/mrbgem.rake b/mrbgems/mruby-task/mrbgem.rake new file mode 100644 index 0000000000..484ac60c69 --- /dev/null +++ b/mrbgems/mruby-task/mrbgem.rake @@ -0,0 +1,49 @@ +MRuby::Gem::Specification.new('mruby-task') do |spec| + spec.license = 'MIT' + spec.authors = 'mruby developers' + spec.summary = 'Cooperative multitasking with preemptive scheduling' + + # Enable task scheduler globally (required for vm.c integration) + spec.build.defines << 'MRB_USE_TASK_SCHEDULER' + + if spec.for_windows? + spec.linker.libraries << "winmm" + end + + ports = spec.build.effective_ports + + # ports/glib/ needs glib-2.0 (GSource, GMainContext, GRecMutex) and + # gthread-2.0 (GThread). On modern distros gthread-2.0 is a transparent + # alias for glib-2.0; on older ones it's a separate .pc that pulls in + # -lpthread, so query it separately. + if ports.include?('glib') + unless spec.search_package('glib-2.0') && spec.search_package('gthread-2.0') + abort <<~MSG + [mruby-task] conf.ports :glib selected but pkg-config could not find + glib-2.0 / gthread-2.0. Install the GLib development headers + (Debian/Ubuntu: libglib2.0-dev; Fedora: glib2-devel; Arch: glib2; + macOS Homebrew: glib). For non-default install locations, set + PKG_CONFIG_PATH before invoking rake. + MSG + end + end + + # Optional demo tool that exercises the GLib HAL end-to-end (basic + # scheduling, priority ordering, timeslice preemption, auto-execution + # under a foreign GLib main loop). Default off; opt in from your + # build_config with: + # + # conf.cc.defines << 'MRB_TASK_BUILD_DEMO' + # conf.ports :glib + # conf.gem core: 'mruby-task' + # + # When enabled, `rake` produces bin/mruby-task-demo from + # tools/mruby-task-demo/. The define is only inspected here -- the + # demo's C source does not condition on it. + if spec.build.cc.defines.include?('MRB_TASK_BUILD_DEMO') + unless ports.include?('glib') + abort '[mruby-task] MRB_TASK_BUILD_DEMO requires conf.ports :glib' + end + spec.bins = %w(mruby_task_demo) + end +end diff --git a/mrbgems/mruby-task/mrblib/queue.rb b/mrbgems/mruby-task/mrblib/queue.rb new file mode 100644 index 0000000000..792dab885e --- /dev/null +++ b/mrbgems/mruby-task/mrblib/queue.rb @@ -0,0 +1,30 @@ +class Task + class Queue + # WAIT_RETRY is defined in C (task_queue.c gem init) + + def push(obj) + __push(obj) + self + end + alias enq push + alias << push + + # Blocks until an item is available (default), or raises if non_block is true. + # Returns nil if the queue is closed and empty. + # + # The loop is not a busy-wait. When __pop_try finds the queue empty it moves + # the current task to WAITING and sets switching_=TRUE before returning + # WAIT_RETRY. The VM detects switching_ at the next opcode boundary and + # exits mrb_vm_exec, handing control back to the scheduler. This task does + # not run again until a push (or close) moves it back to READY. The loop + # body therefore executes at most once per wakeup event. + def pop(non_block = false) + loop do + v = __pop_try(non_block) + return v unless v.equal?(WAIT_RETRY) + end + end + alias deq pop + alias shift pop + end +end diff --git a/mrbgems/mruby-task/ports/glib/task_hal.c b/mrbgems/mruby-task/ports/glib/task_hal.c new file mode 100644 index 0000000000..a8a1dbe6a4 --- /dev/null +++ b/mrbgems/mruby-task/ports/glib/task_hal.c @@ -0,0 +1,536 @@ +/* +** task_hal.c - GLib HAL for mruby-task +** +** See Copyright Notice in mruby.h +** +** Drives the mruby-task scheduler from an embedding application's GLib +** main loop. No Task.run is required from Ruby; the HAL fires +** mrb_task_run_once and mrb_tick automatically as the host loop iterates, +** which is the integration pattern a GTK or webview app uses in practice. +** +** Two GSources collaborate: +** +** 1. VM-run source on the thread-default GMainContext. Its callback +** runs mrb_task_run_once on every registered VM. +** +** 2. Tick source on a dedicated GMainContext, iterated by a private +** GThread. Its callback runs mrb_tick on every registered VM under +** a recursive mutex. The separate thread is what gives us +** preemption: it can fire mrb_tick while the VM thread is blocked +** inside mrb_vm_exec. +** +** Both are manual GSources -- NULL prepare/check, dispatch driven purely +** by ready_time updates via g_source_set_ready_time. g_timeout_source's +** auto-reschedule would race with park-when-idle. +** +** State machine, evaluated after every dispatch under the IRQ lock: +** +** has_ready (any q_ready_) : vm_run = 0, tick = +1 interval +** has_sleep (q_waiting_) : vm_run = -1 (parked), tick = soonest sleeper +** neither : both = -1 (parked) +** +** In has_sleep state the ticker is the sole waker: it fires at the +** sleeper deadline, catches the scheduler clock up via mrb_tick, and +** sets vm_run = 0 once a task is promoted to ready. In neither state, +** mrb_task_enable_irq is the wake: any Ruby-side scheduler activity +** (Task.new from a bind callback, etc.) sets vm_run = 0 from outside. +** +** Tickless catch-up: in has_sleep state the ticker can be parked for +** arbitrarily long. On fire we compute (now - last_fire_us) + remainder, +** divide by MRB_TICK_INTERVAL_US, and call mrb_tick that many times in +** one go. The leftover < interval is carried in remainder_us to the +** next fire, keeping the scheduler clock aligned with monotonic time. +** Net effect: a loop of long sleeps costs one wakeup per sleep period. +** +** Threading: +** - Per-thread state lives in heap-allocated mrb_task_thread_state, +** reachable via thread-local `ts`. The ticker thread receives a +** pointer at spawn; it never touches another thread's TLS. +** - On the main thread no GMainContext push is needed; the default +** context is used implicitly. +** - On any other thread that opens an mrb_state, the caller MUST first +** call g_main_context_push_thread_default(). This is the standard +** GLib convention used by libsoup, GIO async, GTask, etc. +** +** Locking: +** - irq_lock (GRecMutex) covers every mutation of mrb->task state. +** mrb_task_disable_irq / mrb_task_enable_irq are lock / unlock, +** with the enable side additionally setting vm_run = 0. +** - The ticker holds the lock across its mrb_tick batch. +** - arm_locked runs with the lock held so a concurrent ticker can't +** promote a sleeper between our inspection and our arm decision. +** - mrb_vm_exec runs WITHOUT the lock; the ticker can preempt it +** mid-execution by setting switching_. +** +** Supported platforms: any system with GLib 2.x and GThread (Linux, +** BSD, macOS, Windows with MinGW or MSVC, ...). +*/ + +#include +#include +#include "task.h" +#include "task_hal.h" +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define MRB_TASK_TLS _Thread_local +#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_C) || defined(__xlC__) || defined(__IBMC__) +# define MRB_TASK_TLS __thread +#elif defined(_MSC_VER) || defined(__BORLANDC__) +# define MRB_TASK_TLS __declspec(thread) +#else +# error "mruby-task GLib HAL: no thread-local storage qualifier known for this compiler" +#endif + +#define MRB_TICK_INTERVAL_US ((gint64)MRB_TICK_UNIT * 1000) + +typedef struct mrb_task_thread_state { + mrb_state *vm_list[MRB_TASK_MAX_VMS]; + int vm_count; + GRecMutex irq_lock; + + GMainContext *vm_ctx; + GSource *vm_run_src; + + GMainContext *tick_ctx; + GMainLoop *tick_loop; + GSource *tick_src; + GThread *ticker; + + /* Tickless catch-up: last_fire_us is the monotonic anchor; remainder_us + * is the sub-interval carry from the previous fire. */ + gint64 last_fire_us; + uint32_t remainder_us; +} mrb_task_thread_state; + +static MRB_TASK_TLS mrb_task_thread_state *ts; + +static gboolean +deadline_source_dispatch(GSource *source, GSourceFunc callback, gpointer user_data) +{ + (void)source; + if (!callback) { + return G_SOURCE_REMOVE; + } + return callback(user_data); +} + +static GSourceFuncs deadline_source_funcs = { + NULL, NULL, deadline_source_dispatch, NULL, NULL, NULL, +}; + +static void +free_thread_state(mrb_task_thread_state *s) +{ + if (!s) { + return; + } + if (s->ticker) { + if (s->tick_loop) { + g_main_loop_quit(s->tick_loop); + } + (void)g_thread_join(s->ticker); + s->ticker = NULL; + } + if (s->tick_src) { + g_source_destroy(s->tick_src); + g_source_unref(s->tick_src); + s->tick_src = NULL; + } + if (s->tick_loop) { + g_main_loop_unref(s->tick_loop); + s->tick_loop = NULL; + } + if (s->tick_ctx) { + g_main_context_unref(s->tick_ctx); + s->tick_ctx = NULL; + } + if (s->vm_run_src) { + g_source_destroy(s->vm_run_src); + g_source_unref(s->vm_run_src); + s->vm_run_src = NULL; + } + if (s->vm_ctx) { + g_main_context_unref(s->vm_ctx); + s->vm_ctx = NULL; + } + g_rec_mutex_clear(&s->irq_lock); + g_free(s); +} + +/* + * Walk every VM's queues, decide the arm state, apply it. Called with + * the IRQ lock held. q_waiting_ is walked directly to find the soonest + * SLEEP-reason wakeup; this is authoritative regardless of what + * mrb->task.wakeup_tick currently reads. + * + * Idempotent. Deadlines are computed relative to s->last_fire_us (the + * last actual ticker fire) rather than g_get_monotonic_time(), so + * repeated calls between fires yield the same ready_time and don't + * drift the cadence. set_ready_time is skipped when the value + * wouldn't change, to avoid the eventfd-write side effect. + */ +static void +arm_locked(mrb_task_thread_state *s) +{ + gint64 monotonic_now = g_get_monotonic_time(); + int32_t min_wake_offset = 0; + gboolean has_ready = FALSE; + gboolean has_sleep = FALSE; + gint64 new_vm_ready; + gint64 new_tick_ready; + gint64 cur_vm_ready; + gint64 cur_tick_ready; + int i; + + for (i = 0; i < s->vm_count; i++) { + mrb_state *vm = s->vm_list[i]; + if (!vm) continue; + + if (vm->task.queues[MRB_TASK_QUEUE_READY] != NULL) { + has_ready = TRUE; + continue; + } + + mrb_task *w = vm->task.queues[MRB_TASK_QUEUE_WAITING]; + while (w) { + if (w->reason == MRB_TASK_REASON_SLEEP) { + uint32_t wake = w->wait.wakeup_tick; + uint32_t tick = vm->task.tick; + int32_t offset = (int32_t)(wake - tick); + if (!has_sleep || offset < min_wake_offset) { + min_wake_offset = offset; + has_sleep = TRUE; + } + } + w = w->next; + } + } + + /* Anchor the catch-up clock when the ticker transitions from parked + * to active, so elapsed time counts from "now" rather than from + * before the park. */ + if ((has_ready || has_sleep) && + g_source_get_ready_time(s->tick_src) == -1) { + s->last_fire_us = monotonic_now; + s->remainder_us = 0; + } + + if (has_ready) { + new_vm_ready = 0; + new_tick_ready = s->last_fire_us + MRB_TICK_INTERVAL_US; + } + else if (has_sleep) { + new_vm_ready = -1; + if (min_wake_offset <= 0) { + new_tick_ready = 0; /* overdue, fire immediately */ + } + else { + new_tick_ready = s->last_fire_us + + (gint64)min_wake_offset * MRB_TICK_INTERVAL_US; + } + } + else { + new_vm_ready = -1; + new_tick_ready = -1; + } + + cur_vm_ready = g_source_get_ready_time(s->vm_run_src); + cur_tick_ready = g_source_get_ready_time(s->tick_src); + + if (new_vm_ready != cur_vm_ready) { + g_source_set_ready_time(s->vm_run_src, new_vm_ready); + } + if (new_tick_ready != cur_tick_ready) { + g_source_set_ready_time(s->tick_src, new_tick_ready); + } +} + +static gpointer +ticker_thread(gpointer data) +{ + mrb_task_thread_state *s = (mrb_task_thread_state *)data; + g_main_context_push_thread_default(s->tick_ctx); + g_main_loop_run(s->tick_loop); + g_main_context_pop_thread_default(s->tick_ctx); + return NULL; +} + +/* + * Tick GSource callback on the ticker thread. Computes catch-up ticks + * from elapsed monotonic time plus carried remainder, calls mrb_tick + * that many times under irq_lock, then lets arm_locked decide the + * next state (steady-state cadence vs tickless deadline vs full park). + */ +static gboolean +tick_source_cb(gpointer user_data) +{ + mrb_task_thread_state *s = (mrb_task_thread_state *)user_data; + int i; + gint64 now = g_get_monotonic_time(); + gint64 total_us = (now - s->last_fire_us) + (gint64)s->remainder_us; + gint64 raw_ticks = total_us / (gint64)MRB_TICK_INTERVAL_US; + uint32_t catch_up_ticks = (raw_ticks > (gint64)UINT32_MAX) + ? UINT32_MAX + : (uint32_t)raw_ticks; + + s->remainder_us = (uint32_t)(total_us - + (gint64)catch_up_ticks * (gint64)MRB_TICK_INTERVAL_US); + s->last_fire_us = now; + + g_rec_mutex_lock(&s->irq_lock); + for (i = 0; i < s->vm_count; i++) { + mrb_state *vm = s->vm_list[i]; + if (!vm) continue; + for (uint32_t k = 0; k < catch_up_ticks; k++) { + mrb_tick(vm); + } + } + arm_locked(s); + g_rec_mutex_unlock(&s->irq_lock); + + return G_SOURCE_CONTINUE; +} + +/* + * VM-run GSource callback on the VM thread. Snapshots vm_list under + * the IRQ lock, runs mrb_task_run_once on each entry outside the + * lock, then re-acquires to decide the next arm state. The snapshot + * protects against list-shape changes while we're iterating (e.g., a + * task body that registers or removes another mrb_state on this + * thread). + */ +static gboolean +vm_run_source_cb(gpointer user_data) +{ + mrb_state *snapshot[MRB_TASK_MAX_VMS]; + int snapshot_count; + int i; + (void)user_data; + + if (!ts) { + return G_SOURCE_CONTINUE; + } + + g_rec_mutex_lock(&ts->irq_lock); + snapshot_count = ts->vm_count; + memcpy(snapshot, ts->vm_list, (size_t)snapshot_count * sizeof(mrb_state *)); + g_rec_mutex_unlock(&ts->irq_lock); + + for (i = 0; i < snapshot_count; i++) { + if (snapshot[i]) { + (void)mrb_task_run_once(snapshot[i]); + } + } + + g_rec_mutex_lock(&ts->irq_lock); + arm_locked(ts); + g_rec_mutex_unlock(&ts->irq_lock); + + return G_SOURCE_CONTINUE; +} + +void +mrb_hal_task_init(mrb_state *mrb) +{ + int i; + int idx = -1; + gboolean first_on_thread = FALSE; + guint attach_id; + GError *err = NULL; + gchar *err_msg; + + for (i = 0; i < MRB_NUM_TASK_QUEUE; i++) { + mrb->task.queues[i] = NULL; + } + mrb->task.tick = 0; + mrb->task.wakeup_tick = UINT32_MAX; + mrb->task.switching = FALSE; + + if (ts == NULL) { + ts = g_new0(mrb_task_thread_state, 1); + g_rec_mutex_init(&ts->irq_lock); + ts->last_fire_us = g_get_monotonic_time(); + first_on_thread = TRUE; + } + + g_rec_mutex_lock(&ts->irq_lock); + + for (i = 0; i < ts->vm_count; i++) { + if (ts->vm_list[i] == mrb) { + idx = i; + break; + } + } + + if (idx < 0) { + if (ts->vm_count >= MRB_TASK_MAX_VMS) { + g_rec_mutex_unlock(&ts->irq_lock); + if (first_on_thread) { + free_thread_state(ts); + ts = NULL; + } + mrb_raisef(mrb, E_RUNTIME_ERROR, + "too many mrb_states with task scheduler on this thread " + "(max: %d)", + MRB_TASK_MAX_VMS); + } + ts->vm_list[ts->vm_count++] = mrb; + } + + g_rec_mutex_unlock(&ts->irq_lock); + + if (first_on_thread) { + ts->vm_ctx = g_main_context_ref_thread_default(); + g_assert_nonnull(ts->vm_ctx); + + ts->vm_run_src = g_source_new(&deadline_source_funcs, sizeof(GSource)); + g_assert_nonnull(ts->vm_run_src); + g_source_set_callback(ts->vm_run_src, vm_run_source_cb, NULL, NULL); + g_source_set_ready_time(ts->vm_run_src, -1); + + attach_id = g_source_attach(ts->vm_run_src, ts->vm_ctx); + if (attach_id == 0) { + free_thread_state(ts); + ts = NULL; + mrb_raise(mrb, E_RUNTIME_ERROR, + "mruby-task GLib HAL: g_source_attach failed for VM-run source"); + } + + ts->tick_ctx = g_main_context_new(); + g_assert_nonnull(ts->tick_ctx); + + ts->tick_loop = g_main_loop_new(ts->tick_ctx, FALSE); + g_assert_nonnull(ts->tick_loop); + + ts->tick_src = g_source_new(&deadline_source_funcs, sizeof(GSource)); + g_assert_nonnull(ts->tick_src); + g_source_set_callback(ts->tick_src, tick_source_cb, ts, NULL); + g_source_set_ready_time(ts->tick_src, -1); + + attach_id = g_source_attach(ts->tick_src, ts->tick_ctx); + if (attach_id == 0) { + free_thread_state(ts); + ts = NULL; + mrb_raise(mrb, E_RUNTIME_ERROR, + "mruby-task GLib HAL: g_source_attach failed for tick source"); + } + + ts->ticker = g_thread_try_new("mruby-task-tick", ticker_thread, ts, &err); + if (ts->ticker == NULL) { + /* Copy GLib's error message into a stack buffer before any mruby + * allocation, so mrb_raise's longjmp can't strand the GLib heap. */ + char buf[256]; + err_msg = g_strdup_printf( + "mruby-task GLib HAL: failed to spawn ticker thread: %s", + err ? err->message : "unknown error"); + g_strlcpy(buf, err_msg, sizeof(buf)); + g_free(err_msg); + if (err) { + g_error_free(err); + } + free_thread_state(ts); + ts = NULL; + mrb_raise(mrb, E_RUNTIME_ERROR, buf); + } + } +} + +void +mrb_hal_task_final(mrb_state *mrb) +{ + int i, j; + gboolean last_on_thread = FALSE; + + if (ts == NULL) { + return; + } + + g_rec_mutex_lock(&ts->irq_lock); + + for (i = 0; i < ts->vm_count; i++) { + if (ts->vm_list[i] == mrb) { + for (j = i; j < ts->vm_count - 1; j++) { + ts->vm_list[j] = ts->vm_list[j + 1]; + } + ts->vm_list[ts->vm_count - 1] = NULL; + ts->vm_count--; + break; + } + } + + if (ts->vm_count == 0) { + last_on_thread = TRUE; + } + + g_rec_mutex_unlock(&ts->irq_lock); + + if (last_on_thread) { + free_thread_state(ts); + ts = NULL; + } +} + +void +mrb_task_disable_irq(void) +{ + if (ts) { + g_rec_mutex_lock(&ts->irq_lock); + } +} + +/* Hooked into the scheduler's IRQ-release path. After any state + * change, re-evaluate the arm state of both sources via arm_locked. + * This is what wires up preemption: arm_locked sets tick_src's + * ready_time so the ticker thread fires mrb_tick on cadence. + * + * Without this, only vm_run_source_cb (the foreign-loop dispatch + * callback) ever calls arm_locked, so a Task.run-driven scheduler + * never arms the ticker -- preemption never happens, sleepers in + * q_waiting_ are never woken, and CPU-bound tasks spin forever. + * Calling arm_locked here covers both Task.run and foreign-loop + * drivers symmetrically. + * + * arm_locked also sets vm_run_src ready_time to 0 only when there's + * a ready task, which prevents the spurious wake during mrb_task_run's + * idle queue check (the disable_irq/check/enable_irq pattern over a + * read-only check produces no state change, so vm_run_src stays + * parked at -1). */ +void +mrb_task_enable_irq(void) +{ + if (!ts) { + return; + } + arm_locked(ts); + g_rec_mutex_unlock(&ts->irq_lock); +} + +/* Called only from mrb_task_run's idle loop. Iterates the VM context + * to dispatch any pending vm_run_src (e.g., the ticker just woke us + * because a sleeper became ready), then returns. Block-wait is OK now + * because mrb_task_enable_irq's q_ready_ guard prevents the spurious- + * wake loop that would otherwise spin this iteration. */ +void +mrb_hal_task_idle_cpu(mrb_state *mrb) +{ + (void)mrb; + if (ts && ts->vm_ctx) { + (void)g_main_context_iteration(ts->vm_ctx, TRUE); + } + else { + g_usleep(MRB_TICK_UNIT * 1000); + } +} + +void +mrb_hal_task_sleep_us(mrb_state *mrb, mrb_int usec) +{ + (void)mrb; + if (usec <= 0) { + return; + } + g_usleep((gulong)usec); +} diff --git a/mrbgems/mruby-task/ports/posix/task_hal.c b/mrbgems/mruby-task/ports/posix/task_hal.c new file mode 100644 index 0000000000..e653da3140 --- /dev/null +++ b/mrbgems/mruby-task/ports/posix/task_hal.c @@ -0,0 +1,234 @@ +/* +** task_hal.c - POSIX HAL implementation for mruby-task +** +** See Copyright Notice in mruby.h +** +** POSIX implementation using SIGALRM and setitimer() for timer, +** and sigprocmask() for interrupt protection. +** +** Supported platforms: Linux, macOS, BSD, Unix +** +** Note: When compiled for Emscripten/WASM, the SIGALRM timer is disabled +** because JavaScript handles tick calls via setInterval. Using both would +** cause tick_ to increment twice as fast, making sleep wake up early. +*/ + +#include +#include "task_hal.h" +#ifndef __EMSCRIPTEN__ +#include +#include +#endif +#include +#include +#include + +/* Time conversion constants */ +#define NSEC_PER_MSEC 1000000ULL +#define NSEC_PER_SEC 1000000000ULL +#define USEC_PER_MSEC 1000ULL + +#ifndef __EMSCRIPTEN__ +/* Multi-VM support */ +static mrb_state *vm_list[MRB_TASK_MAX_VMS]; +static volatile sig_atomic_t vm_count = 0; +static sigset_t alarm_mask; + +/* SIGALRM signal handler - ticks all registered VMs */ +static void +sigalrm_handler(int sig) +{ + int i; + (void)sig; + /* Tick all registered VMs */ + for (i = 0; i < vm_count; i++) { + if (vm_list[i]) { + mrb_tick(vm_list[i]); + } + } +} +#endif /* __EMSCRIPTEN__ */ + +/* + * HAL Interface Implementation + */ + +void +mrb_hal_task_init(mrb_state *mrb) +{ + int i; + + /* Initialize task state */ + for (i = 0; i < 4; i++) { + mrb->task.queues[i] = NULL; + } + mrb->task.tick = 0; + mrb->task.wakeup_tick = UINT32_MAX; + mrb->task.switching = FALSE; + +#ifndef __EMSCRIPTEN__ + /* POSIX: Set up SIGALRM timer for tick handling */ + struct sigaction sa; + struct itimerval timer; + int vm_index = -1; + + /* Block SIGALRM during registration to avoid race */ + sigemptyset(&alarm_mask); + sigaddset(&alarm_mask, SIGALRM); + sigprocmask(SIG_BLOCK, &alarm_mask, NULL); + + /* Check if this VM is already registered */ + for (i = 0; i < vm_count; i++) { + if (vm_list[i] == mrb) { + vm_index = i; + break; + } + } + + /* Register new VM if not already present */ + if (vm_index < 0) { + if (vm_count >= MRB_TASK_MAX_VMS) { + sigprocmask(SIG_UNBLOCK, &alarm_mask, NULL); + mrb_raisef(mrb, E_RUNTIME_ERROR, + "too many mrb_states with task scheduler (max: %d)", + MRB_TASK_MAX_VMS); + } + vm_list[vm_count] = mrb; + vm_count++; + } + + /* Set up signal handler and timer only for first VM */ + if (vm_count == 1) { + /* Set up signal handler - SA_RESTART to avoid breaking IO operations */ + sa.sa_handler = sigalrm_handler; + sa.sa_flags = SA_RESTART; + sigemptyset(&sa.sa_mask); + sigaction(SIGALRM, &sa, NULL); + + /* Start timer */ + timer.it_value.tv_sec = 0; + timer.it_value.tv_usec = MRB_TICK_UNIT * 1000; + timer.it_interval.tv_sec = 0; + timer.it_interval.tv_usec = MRB_TICK_UNIT * 1000; + setitimer(ITIMER_REAL, &timer, NULL); + } + + /* Unblock SIGALRM */ + sigprocmask(SIG_UNBLOCK, &alarm_mask, NULL); +#endif +} + +void +mrb_task_enable_irq(void) +{ +#ifndef __EMSCRIPTEN__ + sigprocmask(SIG_UNBLOCK, &alarm_mask, NULL); +#endif +} + +void +mrb_task_disable_irq(void) +{ +#ifndef __EMSCRIPTEN__ + sigprocmask(SIG_BLOCK, &alarm_mask, NULL); +#endif +} + +void +mrb_hal_task_idle_cpu(mrb_state *mrb) +{ + (void)mrb; + /* On POSIX, just pause briefly */ + usleep(MRB_TICK_UNIT * 1000); +} + +void +mrb_hal_task_sleep_us(mrb_state *mrb, mrb_int usec) +{ + struct timespec start, now, sleep_time; + int ret; + + (void)mrb; + + /* Validate input to prevent overflow */ + if (usec < 0) { + return; + } + + ret = clock_gettime(CLOCK_MONOTONIC, &start); + if (ret != 0) { + /* Fallback to simple usleep if clock_gettime fails */ + usleep(usec); + return; + } + + uint64_t target_ns = (uint64_t)usec * USEC_PER_MSEC; + + /* Loop until enough real time has elapsed */ + while (1) { + ret = clock_gettime(CLOCK_MONOTONIC, &now); + if (ret != 0) { + break; /* Clock failure - exit loop */ + } + + uint64_t elapsed_ns = (uint64_t)(now.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (uint64_t)(now.tv_nsec - start.tv_nsec); + + if (elapsed_ns >= target_ns) { + break; + } + + /* Sleep for remaining time, but at least 1ms to allow timer interrupts */ + uint64_t remaining_ns = target_ns - elapsed_ns; + if (remaining_ns > NSEC_PER_MSEC) { + sleep_time.tv_sec = remaining_ns / NSEC_PER_SEC; + sleep_time.tv_nsec = remaining_ns % NSEC_PER_SEC; + } + else { + sleep_time.tv_sec = 0; + sleep_time.tv_nsec = NSEC_PER_MSEC; + } + + nanosleep(&sleep_time, NULL); /* Interrupted by signals - that's OK */ + } +} + +void +mrb_hal_task_final(mrb_state *mrb) +{ +#ifndef __EMSCRIPTEN__ + struct itimerval timer; + int i, j; + + /* Block SIGALRM during unregistration */ + sigprocmask(SIG_BLOCK, &alarm_mask, NULL); + + /* Find and remove this VM from the list */ + for (i = 0; i < vm_count; i++) { + if (vm_list[i] == mrb) { + /* Shift remaining VMs down */ + for (j = i; j < vm_count - 1; j++) { + vm_list[j] = vm_list[j + 1]; + } + vm_list[vm_count - 1] = NULL; + vm_count--; + break; + } + } + + /* Stop timer if last VM */ + if (vm_count == 0) { + timer.it_value.tv_sec = 0; + timer.it_value.tv_usec = 0; + timer.it_interval.tv_sec = 0; + timer.it_interval.tv_usec = 0; + setitimer(ITIMER_REAL, &timer, NULL); + } + + /* Unblock SIGALRM */ + sigprocmask(SIG_UNBLOCK, &alarm_mask, NULL); +#else + /* WASM: No timer cleanup needed */ + (void)mrb; +#endif +} diff --git a/mrbgems/mruby-task/ports/win/task_hal.c b/mrbgems/mruby-task/ports/win/task_hal.c new file mode 100644 index 0000000000..83e6474412 --- /dev/null +++ b/mrbgems/mruby-task/ports/win/task_hal.c @@ -0,0 +1,169 @@ +/* +** task_hal.c - Windows HAL implementation for mruby-task +** +** See Copyright Notice in mruby.h +** +** Windows implementation using multimedia timer (timeSetEvent/timeKillEvent) +** for periodic timer, and CRITICAL_SECTION for interrupt protection. +** +** Supported platforms: Windows (all versions with multimedia timer support) +*/ + +#include +#include "task_hal.h" +#include +#include +#include + +/* Multi-VM support */ +static mrb_state *vm_list[MRB_TASK_MAX_VMS]; +static volatile LONG vm_count = 0; +static CRITICAL_SECTION irq_lock; +static MMRESULT timer_id = 0; + +/* Multimedia timer callback - called periodically by Windows */ +static void CALLBACK +timer_callback(UINT uID, UINT uMsg, DWORD_PTR dwUser, DWORD_PTR dw1, DWORD_PTR dw2) +{ + int i; + (void)uID; (void)uMsg; (void)dwUser; (void)dw1; (void)dw2; + + /* Tick all registered VMs */ + EnterCriticalSection(&irq_lock); + for (i = 0; i < vm_count; i++) { + if (vm_list[i]) { + mrb_tick(vm_list[i]); + } + } + LeaveCriticalSection(&irq_lock); +} + +/* + * HAL Interface Implementation + */ + +void +mrb_hal_task_init(mrb_state *mrb) +{ + int i; + LONG idx; + + /* Initialize task state */ + for (i = 0; i < 4; i++) { + mrb->task.queues[i] = NULL; + } + mrb->task.tick = 0; + mrb->task.wakeup_tick = UINT32_MAX; + mrb->task.switching = FALSE; + + /* Initialize critical section on first VM */ + if (vm_count == 0) { + InitializeCriticalSection(&irq_lock); + } + + EnterCriticalSection(&irq_lock); + + /* Check if this VM is already registered */ + idx = -1; + for (i = 0; i < vm_count; i++) { + if (vm_list[i] == mrb) { + idx = i; + break; + } + } + + /* Register new VM if not already present */ + if (idx < 0) { + if (vm_count >= MRB_TASK_MAX_VMS) { + LeaveCriticalSection(&irq_lock); + mrb_raisef(mrb, E_RUNTIME_ERROR, + "too many mrb_states with task scheduler (max: %d)", + MRB_TASK_MAX_VMS); + } + vm_list[vm_count] = mrb; + InterlockedIncrement(&vm_count); + } + + /* Start timer for first VM */ + if (vm_count == 1) { + /* Request 1ms timer resolution */ + timeBeginPeriod(1); + + /* Create periodic timer with MRB_TICK_UNIT interval */ + timer_id = timeSetEvent( + MRB_TICK_UNIT, /* interval in milliseconds */ + 1, /* resolution in milliseconds */ + timer_callback, /* callback function */ + 0, /* user data */ + TIME_PERIODIC | TIME_KILL_SYNCHRONOUS + ); + } + + LeaveCriticalSection(&irq_lock); +} + +void +mrb_task_enable_irq(void) +{ + LeaveCriticalSection(&irq_lock); +} + +void +mrb_task_disable_irq(void) +{ + EnterCriticalSection(&irq_lock); +} + +void +mrb_hal_task_idle_cpu(mrb_state *mrb) +{ + (void)mrb; + /* On Windows, just sleep briefly */ + Sleep(MRB_TICK_UNIT); +} + +void +mrb_hal_task_sleep_us(mrb_state *mrb, mrb_int usec) +{ + (void)mrb; + + /* Windows Sleep() takes milliseconds, convert from microseconds */ + if (usec >= 0) { + Sleep((DWORD)(usec / 1000)); + } +} + +void +mrb_hal_task_final(mrb_state *mrb) +{ + int i, j; + + EnterCriticalSection(&irq_lock); + + /* Find and remove this VM from the list */ + for (i = 0; i < vm_count; i++) { + if (vm_list[i] == mrb) { + /* Shift remaining VMs down */ + for (j = i; j < vm_count - 1; j++) { + vm_list[j] = vm_list[j + 1]; + } + vm_list[vm_count - 1] = NULL; + InterlockedDecrement(&vm_count); + break; + } + } + + /* Stop timer if last VM */ + if (vm_count == 0) { + if (timer_id != 0) { + timeKillEvent(timer_id); + timeEndPeriod(1); + timer_id = 0; + } + LeaveCriticalSection(&irq_lock); + DeleteCriticalSection(&irq_lock); + } + else { + LeaveCriticalSection(&irq_lock); + } +} diff --git a/mrbgems/mruby-task/src/task.c b/mrbgems/mruby-task/src/task.c new file mode 100644 index 0000000000..4e66a4053a --- /dev/null +++ b/mrbgems/mruby-task/src/task.c @@ -0,0 +1,1628 @@ +/* +** task.c - Task scheduler +** +** See Copyright Notice in mruby.h +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "task.h" + +/* Get task pointer from self with validation */ +#define TASK_GET_PTR_OR_RAISE(var, self) \ + do { \ + (var) = (mrb_task*)mrb_data_get_ptr(mrb, (self), &mrb_task_type); \ + if (!(var)) { \ + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid task"); \ + } \ + } while (0) + +/* Convert microseconds to tick count */ +#define USEC_TO_TICKS(usec) (((usec) / 1000) / MRB_TICK_UNIT) + +/* Maximum value for scheduler_lock (uint8_t max) */ +#define MRB_TASK_SCHEDULER_LOCK_MAX 255 + +/* + * Task data type for GC + */ +static void +mrb_task_free(mrb_state *mrb, void *ptr) +{ + mrb_task *t = (mrb_task*)ptr; + if (t) { + /* Unregister from GC protection (unless it's the main task during shutdown) */ + if (t != mrb->task.main_task) { + mrb_gc_unregister(mrb, t->self); + } + + /* Free context resources - always free if allocated */ + /* Main task never has allocated context (stbase/cibase are NULL) */ + if (t->c.stbase) { + mrb_free(mrb, t->c.stbase); + } + if (t->c.cibase) { + mrb_free(mrb, t->c.cibase); + } + + /* Free the task structure itself */ + mrb_free(mrb, t); + } +} + +static const struct mrb_data_type mrb_task_type = { + "Task", mrb_task_free, +}; + +/* + * GC marking function for all tasks + * Called from gc.c during root_scan_phase + */ +void +mrb_task_mark_all(mrb_state *mrb) +{ + int qi; + for (qi = 0; qi < 4; qi++) { + mrb_task *t = mrb->task.queues[qi]; + while (t) { + struct mrb_context *c = &t->c; + mrb_callinfo *ci; + size_t i, e; + + /* Mark task's stack */ + if (c->stbase) { + if (c->ci) { + e = (c->ci->stack ? c->ci->stack - c->stbase : 0); + e += mrb_ci_nregs(c->ci); + } + else { + e = 0; + } + if (c->stbase + e > c->stend) e = c->stend - c->stbase; + for (i = 0; i < e; i++) { + mrb_gc_mark_value(mrb, c->stbase[i]); + } + /* Clear the dead slots above the live range, matching + mark_context_stack() in gc.c. A preempted task whose live range + later shrinks (a frame returned) would otherwise leave stale + object pointers in those slots; the objects get swept while the + pointers survive, and a subsequent mark of the resumed task trips + the MRB_TT_FREE assertion in mrb_gc_mark (issue #6870). */ + size_t stend = c->stend - c->stbase; + for (; i < stend; i++) { + SET_NIL_VALUE(c->stbase[i]); + } + } + + /* Mark call stack */ + if (c->cibase && c->ci) { + for (ci = c->cibase; ci <= c->ci; ci++) { + if (ci->proc) { + mrb_gc_mark(mrb, (struct RBasic*)ci->proc); + } + if (ci->u.target_class) { + mrb_gc_mark(mrb, (struct RBasic*)ci->u.target_class); + } + } + } + + /* Mark fiber */ + mrb_gc_mark(mrb, (struct RBasic*)c->fib); + + /* Mark task-specific values */ + mrb_gc_mark_value(mrb, t->self); + mrb_gc_mark_value(mrb, t->result); + mrb_gc_mark_value(mrb, t->name); + + t = t->next; + } + } +} + +/* + * Queue operations + */ + +/* Get queue head pointer based on task status */ +static mrb_task** +q_get_queue(mrb_state *mrb, mrb_task *t) +{ + switch (t->status) { + case MRB_TASK_STATUS_DORMANT: + return &q_dormant_; + case MRB_TASK_STATUS_READY: + case MRB_TASK_STATUS_RUNNING: + return &q_ready_; + case MRB_TASK_STATUS_WAITING: + return &q_waiting_; + case MRB_TASK_STATUS_SUSPENDED: + return &q_suspended_; + default: + return &q_dormant_; + } +} + +/* Insert task into queue based on priority (higher priority = lower number = earlier in queue) */ +void +mrb_task_q_insert(mrb_state *mrb, mrb_task *t) +{ + mrb_task **q = q_get_queue(mrb, t); + mrb_task *curr = *q; + mrb_task *prev = NULL; + + /* Find insertion point - insert before first task with lower priority */ + while (curr != NULL && curr->priority <= t->priority) { + prev = curr; + curr = curr->next; + } + + /* Insert task */ + t->next = curr; + if (prev == NULL) { + *q = t; /* Insert at head */ + } + else { + prev->next = t; /* Insert after prev */ + } +} + +/* Delete task from its current queue */ +void +mrb_task_q_delete(mrb_state *mrb, mrb_task *t) +{ + mrb_task **q = q_get_queue(mrb, t); + mrb_task *curr = *q; + mrb_task *prev = NULL; + + /* Find and remove task */ + while (curr != NULL) { + if (curr == t) { + if (prev == NULL) { + *q = curr->next; /* Remove from head */ + } + else { + prev->next = curr->next; /* Remove from middle/end */ + } + t->next = NULL; + return; + } + prev = curr; + curr = curr->next; + } +} + +/* Cleanup terminated task and move to dormant queue if needed */ +static inline mrb_bool +task_cleanup_if_stopped(mrb_state *mrb, mrb_task *t) +{ + if (t->status == MRB_TASK_STATUS_DORMANT || t->c.status == MRB_TASK_STOPPED) { + /* Task is terminated but still in queue - remove it */ + mrb_task_disable_irq(); + mrb_task_q_delete(mrb, t); + if (t->status != MRB_TASK_STATUS_DORMANT) { + t->status = MRB_TASK_STATUS_DORMANT; + mrb_task_q_insert(mrb, t); + } + mrb_task_enable_irq(); + return TRUE; + } + return FALSE; +} + +/* + * Task lifecycle + */ + +/* Allocate new task */ +static mrb_task* +task_alloc(mrb_state *mrb) +{ + mrb_task *t = (mrb_task*)mrb_malloc(mrb, sizeof(mrb_task)); + memset(t, 0, sizeof(mrb_task)); + return t; +} + +/* Initialize task context (stack and callinfo) - similar to Fiber */ +static void +task_init_context(mrb_state *mrb, mrb_task *t, const struct RProc *proc) +{ + static const struct mrb_context mrb_context_zero = { 0 }; + struct mrb_context *c = &t->c; + + *c = mrb_context_zero; + + /* Initialize VM stack */ + size_t slen = TASK_STACK_INIT_SIZE; + if (proc->body.irep->nregs > slen) { + slen += proc->body.irep->nregs; + } + c->stbase = (mrb_value*)mrb_malloc(mrb, slen * sizeof(mrb_value)); + c->stend = c->stbase + slen; + + /* Initialize stack values to nil */ + { + mrb_value *s = c->stbase + 1; + mrb_value *send = c->stend; + while (s < send) { + SET_NIL_VALUE(*s); + s++; + } + } + + /* Set receiver to top self */ + c->stbase[0] = mrb_top_self(mrb); + + /* Initialize callinfo stack */ + static const mrb_callinfo ci_zero = { 0 }; + c->cibase = (mrb_callinfo*)mrb_malloc(mrb, TASK_CI_INIT_SIZE * sizeof(mrb_callinfo)); + c->ciend = c->cibase + TASK_CI_INIT_SIZE; + c->ci = c->cibase; + c->cibase[0] = ci_zero; + + /* Setup callinfo */ + mrb_callinfo *ci = c->ci; + mrb_vm_ci_target_class_set(ci, MRB_PROC_TARGET_CLASS(proc)); + mrb_vm_ci_proc_set(ci, proc); + ci->stack = c->stbase; + ci->pc = proc->body.irep->iseq; /* Initialize PC to start of bytecode */ + + c->status = MRB_TASK_CREATED; +} + +/* + * Scheduler core + */ + +/* Wake up tasks waiting on join for a completed task */ +static void +wake_up_join_waiters(mrb_state *mrb, mrb_task *completed_task) +{ + mrb_task_disable_irq(); + mrb_task *curr = q_waiting_; + while (curr != NULL) { + mrb_task *next = curr->next; + if (curr->reason == MRB_TASK_REASON_JOIN && curr->wait.join == completed_task) { + mrb_task_q_delete(mrb, curr); + curr->status = MRB_TASK_STATUS_READY; + curr->reason = MRB_TASK_REASON_NONE; + curr->wait.join = NULL; + mrb_task_q_insert(mrb, curr); + /* If a higher-priority waiter is resumed from task context, + * request a context switch after leaving the critical section. */ + if (mrb->c != mrb->root_c && !switching_) { + mrb_task *running = MRB2TASK(mrb); + if (curr->priority < running->priority) { + switching_ = TRUE; + } + } + } + curr = next; + } + mrb_task_enable_irq(); +} + +/* Change task state with IRQ protection and queue management */ +static void +task_change_state(mrb_state *mrb, mrb_task *t, uint8_t new_status) +{ + mrb_task_disable_irq(); + mrb_task_q_delete(mrb, t); + t->status = new_status; + mrb_task_q_insert(mrb, t); + mrb_task_enable_irq(); +} + +typedef struct execute_task_vm_args { + mrb_task *t; + const struct RProc *proc; + const mrb_code *pc; +} execute_task_vm_args; + +static mrb_value +execute_task_vm(mrb_state *mrb, void *ud) +{ + execute_task_vm_args *args = (execute_task_vm_args*)ud; + + mrb->task.exception_as_result = TRUE; + args->t->result = mrb_vm_exec(mrb, args->proc, args->pc); + if (mrb->exc) { + args->t->result = mrb_obj_value(mrb->exc); + mrb->exc = NULL; + } + mrb->task.exception_as_result = FALSE; + return args->t->result; +} + +/* Execute a single task - core task execution logic */ +static void +execute_task(mrb_state *mrb, mrb_task *t) +{ + struct mrb_context *prev_c; + mrb_callinfo *prev_ci; + uint8_t prev_cci; + + /* Set task as running */ + t->timeslice = MRB_TIMESLICE_TICK_COUNT; + t->status = MRB_TASK_STATUS_RUNNING; + + /* Switch to task context */ + prev_c = mrb->c; + prev_ci = prev_c->ci; + prev_cci = prev_c->ci->cci; + t->c.prev = mrb->c; + mrb->c = &t->c; + + /* Clear switching flag */ + switching_ = FALSE; + + /* Save proc and PC to locals before calling mrb_vm_exec */ + const struct RProc *proc = t->c.ci->proc; + const mrb_code *pc = t->c.ci->pc; + + /* With C function boundary checks, proc should never be NULL on resume */ + if (!proc) { + mrb_raise(mrb, E_RUNTIME_ERROR, "task context corrupted: no proc on resume"); + } + + /* Set vmexec flag to prevent fiber_terminate from being called */ + t->c.vmexec = TRUE; + + /* Execute task - PC is saved in ci->pc from previous run. + Unhandled task exceptions are converted to the task result by + mrb_vm_exec() in task mode, so the scheduler protect frame stays intact. */ + execute_task_vm_args args = { t, proc, pc }; + mrb_bool error = FALSE; + t->result = mrb_protect_error(mrb, execute_task_vm, &args, &error); + mrb->task.exception_as_result = FALSE; + + /* Clear vmexec flag */ + t->c.vmexec = FALSE; + + /* Clear switching flag */ + switching_ = FALSE; + + /* Restore context */ + mrb->c = prev_c; + t->c.prev = NULL; + prev_c->ci = prev_ci; + prev_ci->cci = prev_cci; + + /* If an abnormal path inside mrb_vm_exec() bypassed + exception_as_result and unwound via MRB_THROW (e.g. a + CINFO_SKIP frame), mrb_protect_error caught it and stored the + exception object in t->result. Force the task to terminate + cleanly so the scheduler keeps running instead of aborting - + re-raising into the scheduler would abort in pattern 1, where + no outer jmpbuf exists. The exception remains observable via + mrb_task_value() / Task#value. */ + if (error) { + t->c.status = MRB_TASK_STOPPED; + } + + /* Handle task termination */ + if (t->c.status == MRB_TASK_STOPPED) { + switching_ = FALSE; + mrb_task_disable_irq(); + mrb_task_q_delete(mrb, t); + t->status = MRB_TASK_STATUS_DORMANT; + mrb_task_q_insert(mrb, t); + mrb_task_enable_irq(); + + /* Wake up tasks waiting on join */ + wake_up_join_waiters(mrb, t); + } + else if (t->status == MRB_TASK_STATUS_RUNNING) { + /* Task yielded but still running - move to ready queue */ + t->status = MRB_TASK_STATUS_READY; + } +} + +/* Tick handler - called by timer interrupt */ +MRB_API void +mrb_tick(mrb_state *mrb) +{ + mrb_task *t; + + /* Increment global tick counter */ + tick_++; + + /* Decrease timeslice for running task */ + t = q_ready_; + if (t && t->status == MRB_TASK_STATUS_RUNNING && t->timeslice > 0) { + t->timeslice--; + if (t->timeslice == 0) { + switching_ = TRUE; /* Trigger context switch */ + } + } + + /* Wake up sleeping tasks whose wakeup time has passed. + * + * UINT32_MAX is the "no sleepers" sentinel. Without the explicit + * check, (int32_t)(UINT32_MAX - tick_) evaluates negative for the + * first half of the 32-bit range, so the queue walk fires every + * tick with nothing to do. Short-circuiting saves the walk in the + * common no-sleepers case -- benefits every HAL, not just + * tickless ones. */ + if (wakeup_tick_ != UINT32_MAX && + (int32_t)(wakeup_tick_ - tick_) <= 0) { + mrb_task *curr = q_waiting_; + mrb_task *next; + uint32_t next_wakeup = UINT32_MAX; + + while (curr != NULL) { + next = curr->next; + + if (curr->reason == MRB_TASK_REASON_SLEEP) { + if ((int32_t)(curr->wait.wakeup_tick - tick_) <= 0) { + /* Time to wake up */ + mrb_task_q_delete(mrb, curr); + curr->status = MRB_TASK_STATUS_READY; + curr->reason = MRB_TASK_REASON_NONE; + mrb_task_q_insert(mrb, curr); + switching_ = TRUE; + } + else if (curr->wait.wakeup_tick < next_wakeup) { + next_wakeup = curr->wait.wakeup_tick; + } + } + + curr = next; + } + + wakeup_tick_ = next_wakeup; + } +} + +/* Body of the main scheduler loop. Wrapped by mrb_task_run() under + mrb_protect_error so an exception raised from a task body unwinds + cleanly without leaving `loop_running` set. */ +static mrb_value +task_run_body(mrb_state *mrb, void *ud) +{ + mrb_task *t; + (void)ud; + + while (1) { + t = q_ready_; + + /* No task ready - check if all tasks are done */ + if (!t) { + mrb_task_disable_irq(); + mrb_bool exiting = !q_ready_ && !q_waiting_ && !q_suspended_; + mrb_task_enable_irq(); + if (exiting) { + /* All tasks are dormant - scheduler done */ + break; + } + /* If there are tasks waiting or suspended, idle */ + mrb_hal_task_idle_cpu(mrb); + continue; + } + + /* Safety check - don't execute terminated tasks */ + if (task_cleanup_if_stopped(mrb, t)) { + continue; + } + + /* Execute task using core logic */ + execute_task(mrb, t); + + /* Move to end of ready queue if still running (round-robin) */ + if (t->status == MRB_TASK_STATUS_READY) { + task_change_state(mrb, t, MRB_TASK_STATUS_READY); + } + + /* Run incremental GC if active */ + if (mrb->gc.state != MRB_GC_STATE_ROOT) { + mrb_incremental_gc(mrb); + } + } + return mrb_nil_value(); +} + +/* Main scheduler loop */ +MRB_API mrb_value +mrb_task_run(mrb_state *mrb) +{ + if (mrb->task.loop_running) { + return mrb_nil_value(); + } + mrb->task.loop_running = TRUE; + + mrb_bool error = FALSE; + mrb_value result = mrb_protect_error(mrb, task_run_body, NULL, &error); + mrb->task.loop_running = FALSE; + if (error) { + mrb_exc_raise(mrb, result); + } + return result; +} + +/* Single-step task execution for WASM event loop integration */ +MRB_API mrb_value +mrb_task_run_once(mrb_state *mrb) +{ + mrb_task *t = q_ready_; + + /* No task ready */ + if (!t) { + return mrb_nil_value(); + } + + /* Safety check - don't execute terminated tasks */ + if (task_cleanup_if_stopped(mrb, t)) { + return mrb_true_value(); + } + + /* Execute task using core logic */ + execute_task(mrb, t); + + /* Move to end of ready queue if still ready (round-robin) */ + if (t->status == MRB_TASK_STATUS_READY) { + task_change_state(mrb, t, MRB_TASK_STATUS_READY); + } + + /* Run incremental GC if active */ + if (mrb->gc.state != MRB_GC_STATE_ROOT) { + mrb_incremental_gc(mrb); + } + + return mrb_true_value(); +} + +/* + * Sleep operations + */ + +static void +sleep_us_impl(mrb_state *mrb, uint32_t usec) +{ + mrb_task *t; + + /* Check if we're in a task context */ + if (mrb->c == mrb->root_c) { + /* Not in task context - sleep in real wall-clock time using HAL */ + mrb_hal_task_sleep_us(mrb, usec); + /* Clear switching flag - we're in root context, not switching to a task */ + switching_ = FALSE; + return; + } + + /* Check for C function boundary - cannot do cooperative context switch */ + mrb_callinfo *ci; + for (ci = mrb->c->ci; ci >= mrb->c->cibase; ci--) { + if (ci->cci > 0) { + /* Inside C function - fall back to blocking sleep without context switch */ + mrb_hal_task_sleep_us(mrb, usec); + switching_ = FALSE; + return; + } + } + + /* In task context - get current running task */ + t = MRB2TASK(mrb); + + mrb_task_disable_irq(); + + /* Remove from ready queue */ + mrb_task_q_delete(mrb, t); + + /* Move to waiting queue */ + t->status = MRB_TASK_STATUS_WAITING; + t->reason = MRB_TASK_REASON_SLEEP; + /* Convert microseconds to ticks (tick unit is in milliseconds) */ + t->wait.wakeup_tick = tick_ + USEC_TO_TICKS(usec); + + /* Update next wakeup time if this task wakes earlier. + * + * When wakeup_tick_ is UINT32_MAX (no prior sleepers), the + * unsigned subtraction wraps to a small positive value and the + * int32_t cast stays non-negative, so the update is skipped and + * the global stays stale until the next mrb_tick self-heals it. + * Handle the sentinel explicitly so tickless HALs that read this + * field get a consistent value as soon as the sleeper is + * installed. */ + if (wakeup_tick_ == UINT32_MAX || + (int32_t)(t->wait.wakeup_tick - wakeup_tick_) < 0) { + wakeup_tick_ = t->wait.wakeup_tick; + } + mrb_task_q_insert(mrb, t); + + mrb_task_enable_irq(); + + /* Trigger context switch */ + switching_ = TRUE; +} + +static void +sleep_ms_impl(mrb_state *mrb, uint32_t ms) +{ + sleep_us_impl(mrb, ms * 1000); +} + +static mrb_value +mrb_f_sleep(mrb_state *mrb, mrb_value self) +{ + mrb_float sec = 0; + mrb_int n = mrb_get_args(mrb, "|f", &sec); + + if (n == 0) { + /* No argument - suspend indefinitely */ + mrb_task *t = q_ready_; + if (t) { + mrb_task_disable_irq(); + mrb_task_q_delete(mrb, t); + t->status = MRB_TASK_STATUS_SUSPENDED; + mrb_task_q_insert(mrb, t); + mrb_task_enable_irq(); + switching_ = TRUE; + } + return mrb_nil_value(); + } + + if (sec < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "time interval must be positive"); + } + + mrb_int ms = (mrb_int)(sec * 1000); + sleep_ms_impl(mrb, (uint32_t)ms); + + return mrb_fixnum_value((mrb_int)sec); +} + +static mrb_value +mrb_f_sleep_ms(mrb_state *mrb, mrb_value self) +{ + mrb_int ms; + + mrb_get_args(mrb, "i", &ms); + + if (ms < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "time interval must be positive"); + } + + sleep_ms_impl(mrb, (uint32_t)ms); + + return mrb_nil_value(); +} + +static mrb_value +mrb_f_usleep(mrb_state *mrb, mrb_value self) +{ + mrb_int usec; + + mrb_get_args(mrb, "i", &usec); + + if (usec < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "time interval must be positive"); + } + + sleep_us_impl(mrb, (uint32_t)usec); + + return mrb_fixnum_value(usec); +} + +/* Common task creation logic shared by Task.new and mrb_create_task */ +static mrb_task* +task_create_common(mrb_state *mrb, const struct RProc *proc, + mrb_value name, uint8_t priority) +{ + mrb_task *t = task_alloc(mrb); + t->priority = priority; + t->status = MRB_TASK_STATUS_READY; + t->reason = MRB_TASK_REASON_NONE; + t->name = name; + + mrb_value task_obj = mrb_obj_value(mrb_data_object_alloc(mrb, mrb_class_get(mrb, "Task"), + t, &mrb_task_type)); + t->self = task_obj; + mrb_gc_register(mrb, task_obj); + task_init_context(mrb, t, proc); + + mrb_task_disable_irq(); + mrb_task_q_insert(mrb, t); + mrb_task_enable_irq(); + + if (q_ready_ && q_ready_->status == MRB_TASK_STATUS_RUNNING) { + if (t->priority < q_ready_->priority) { + switching_ = TRUE; + } + } + + return t; +} + +/* + * Task class methods + */ + +static mrb_value +mrb_task_s_new(mrb_state *mrb, mrb_value self) +{ + mrb_value blk; + mrb_value name_val = mrb_nil_value(); + mrb_int priority = 128; /* Default middle priority */ + mrb_value kw_values[2] = {mrb_undef_value(), mrb_undef_value()}; + mrb_sym kw_names[2] = {MRB_SYM(name), MRB_SYM(priority)}; + const mrb_kwargs kwargs = { + 2, 0, kw_names, kw_values, NULL + }; + + /* Get block and optional keyword arguments */ + mrb_get_args(mrb, "&:", &blk, &kwargs); + + if (mrb_nil_p(blk)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "tried to create task without a block"); + } + + const struct RProc *proc = mrb_proc_ptr(blk); + + /* Parse keyword arguments */ + if (!mrb_undef_p(kw_values[0])) { + if (!mrb_string_p(kw_values[0])) { + mrb_raise(mrb, E_TYPE_ERROR, "name must be a String"); + } + name_val = kw_values[0]; + } + if (!mrb_undef_p(kw_values[1])) { + if (!mrb_integer_p(kw_values[1])) { + mrb_raise(mrb, E_TYPE_ERROR, "priority must be an Integer"); + } + priority = mrb_integer(kw_values[1]); + if (priority < 0 || priority > 255) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "priority must be 0-255"); + } + } + + mrb_task *t = task_create_common(mrb, proc, name_val, (uint8_t)priority); + return t->self; +} + +static mrb_value +mrb_task_s_current(mrb_state *mrb, mrb_value self) +{ + /* Check if we're in root context */ + if (mrb->c == mrb->root_c) { + /* Return main task wrapper (lazy-allocate if needed) */ + if (!mrb->task.main_task) { + struct RClass *task_class = mrb_class_ptr(self); + struct RData *data = mrb_data_object_alloc(mrb, task_class, NULL, &mrb_task_type); + mrb_task *t = (mrb_task*)mrb_calloc(mrb, 1, sizeof(mrb_task)); + + /* Initialize as main task - special status that's never scheduled */ + t->priority = 0; + t->status = MRB_TASK_STATUS_RUNNING; /* Always running */ + t->name = mrb_str_new_cstr(mrb, "main"); + t->self = mrb_obj_value(data); + data->data = t; + data->type = &mrb_task_type; + + /* Register for GC protection */ + mrb_gc_register(mrb, t->self); + + /* Note: t->c is not used - root context is in mrb->root_c */ + mrb->task.main_task = t; + } + return mrb->task.main_task->self; + } + + /* Use pointer arithmetic to get task from context - O(1) */ + mrb_task *t = MRB2TASK(mrb); + return t->self; +} + +static mrb_value +mrb_task_s_list(mrb_state *mrb, mrb_value self) +{ + mrb_value ary = mrb_ary_new(mrb); + + /* Iterate all queues and collect tasks */ + for (int i = 0; i < MRB_NUM_TASK_QUEUE; i++) { + mrb_task *t = mrb->task.queues[i]; + while (t != NULL) { + mrb_ary_push(mrb, ary, t->self); + t = t->next; + } + } + + return ary; +} + +/* + * Run one task iteration - helper for Task.pass from root context + * Waits for ready tasks if needed (cooperative yielding) + */ +static void +task_run_one_iteration(mrb_state *mrb) +{ + mrb_task *t = q_ready_; + + /* No ready task - just return (sleep from root provides delays) */ + if (!t) { + return; + } + + /* Skip terminated tasks */ + if (task_cleanup_if_stopped(mrb, t)) { + return; + } + + /* Execute ready task */ + execute_task(mrb, t); +} + +static mrb_value +mrb_task_s_pass(mrb_state *mrb, mrb_value self) +{ + if (mrb->c == mrb->root_c) { + /* Called from root context - run one task iteration */ + task_run_one_iteration(mrb); + } + else { + /* Check for C function boundary - cannot yield from C function */ + mrb_callinfo *ci; + for (ci = mrb->c->ci; ci >= mrb->c->cibase; ci--) { + if (ci->cci > 0) { + mrb_raise(mrb, E_RUNTIME_ERROR, "can't pass across C function boundary"); + } + } + + /* In task context - trigger context switch */ + switching_ = TRUE; + } + + return mrb_nil_value(); +} + +/* Helper to build statistics for a task queue */ +static mrb_value +mrb_stat_sub(mrb_state *mrb, mrb_task *queue) +{ + mrb_value stat = mrb_hash_new(mrb); + mrb_value tasks = mrb_ary_new(mrb); + mrb_task *curr = queue; + int count = 0; + + /* Walk the queue and collect task objects */ + while (curr) { + count++; + mrb_ary_push(mrb, tasks, curr->self); + curr = curr->next; + } + + /* Build statistics hash */ + mrb_hash_set(mrb, stat, mrb_symbol_value(MRB_SYM(count)), mrb_fixnum_value(count)); + mrb_hash_set(mrb, stat, mrb_symbol_value(MRB_SYM(tasks)), tasks); + + return stat; +} + +static mrb_value +mrb_task_s_stat(mrb_state *mrb, mrb_value self) +{ + mrb_value data = mrb_hash_new(mrb); + + mrb_task_disable_irq(); + + /* Add global scheduler state */ + mrb_hash_set(mrb, data, mrb_symbol_value(MRB_SYM(tick)), mrb_fixnum_value(tick_)); + mrb_hash_set(mrb, data, mrb_symbol_value(MRB_SYM(wakeup_tick)), mrb_fixnum_value(wakeup_tick_)); + + /* Add statistics for each queue */ + mrb_hash_set(mrb, data, mrb_symbol_value(MRB_SYM(dormant)), mrb_stat_sub(mrb, q_dormant_)); + mrb_hash_set(mrb, data, mrb_symbol_value(MRB_SYM(ready)), mrb_stat_sub(mrb, q_ready_)); + mrb_hash_set(mrb, data, mrb_symbol_value(MRB_SYM(waiting)), mrb_stat_sub(mrb, q_waiting_)); + mrb_hash_set(mrb, data, mrb_symbol_value(MRB_SYM(suspended)), mrb_stat_sub(mrb, q_suspended_)); + + mrb_task_enable_irq(); + + return data; +} + +static mrb_value +mrb_task_s_run(mrb_state *mrb, mrb_value self) +{ + return mrb_task_run(mrb); +} + +static mrb_value +mrb_task_s_get(mrb_state *mrb, mrb_value self) +{ + mrb_value name; + + mrb_get_args(mrb, "S", &name); + + /* Search all queues for task with matching name */ + for (int i = 0; i < MRB_NUM_TASK_QUEUE; i++) { + mrb_task *t = mrb->task.queues[i]; + while (t != NULL) { + if (mrb_equal(mrb, t->name, name)) { + return t->self; + } + t = t->next; + } + } + + return mrb_nil_value(); +} + +/* + * Task instance methods + */ + +MRB_API mrb_value +mrb_task_status(mrb_state *mrb, mrb_value self) +{ + mrb_task *t; + + TASK_GET_PTR_OR_RAISE(t, self); + + /* Return status as symbol matching original implementation */ + return mrb_symbol_value( + (t->status == MRB_TASK_STATUS_RUNNING) ? MRB_SYM(RUNNING) : + (t->status == MRB_TASK_STATUS_READY) ? MRB_SYM(READY) : + (t->status == MRB_TASK_STATUS_WAITING) ? MRB_SYM(WAITING) : + (t->status == MRB_TASK_STATUS_SUSPENDED) ? MRB_SYM(SUSPENDED) : + (t->status == MRB_TASK_STATUS_DORMANT) ? MRB_SYM(DORMANT) : + MRB_SYM(UNKNOWN)); +} + +static mrb_value +mrb_task_inspect(mrb_state *mrb, mrb_value self) +{ + mrb_task *t; + char buf[256]; + const char *name_str; + const char *status_str; + + TASK_GET_PTR_OR_RAISE(t, self); + + /* Get status string directly from task status field */ + switch (t->status) { + case MRB_TASK_STATUS_RUNNING: + status_str = "RUNNING"; + break; + case MRB_TASK_STATUS_READY: + status_str = "READY"; + break; + case MRB_TASK_STATUS_WAITING: + status_str = "WAITING"; + break; + case MRB_TASK_STATUS_SUSPENDED: + status_str = "SUSPENDED"; + break; + case MRB_TASK_STATUS_DORMANT: + status_str = "DORMANT"; + break; + default: + status_str = "UNKNOWN"; + break; + } + + /* Get name as C string - avoid mrb_funcall to prevent VM state issues */ + if (mrb_string_p(t->name)) { + name_str = RSTRING_PTR(t->name); + } + else if (mrb_symbol_p(t->name)) { + name_str = mrb_sym_name(mrb, mrb_symbol(t->name)); + } + else { + /* Treat nil, undef, or any other type as unnamed */ + name_str = "(unnamed)"; + } + + /* Format: # */ + snprintf(buf, sizeof(buf), "#", + (void *)t, + name_str, + status_str); + + return mrb_str_new_cstr(mrb, buf); +} + +static mrb_value +mrb_task_name(mrb_state *mrb, mrb_value self) +{ + mrb_task *t; + + TASK_GET_PTR_OR_RAISE(t, self); + + /* Return "(noname)" if name is not set */ + if (mrb_nil_p(t->name)) { + return mrb_str_new_lit(mrb, "(noname)"); + } + + return t->name; +} + +static mrb_value +mrb_task_set_name(mrb_state *mrb, mrb_value self) +{ + mrb_task *t; + mrb_value name; + + TASK_GET_PTR_OR_RAISE(t, self); + + mrb_get_args(mrb, "o", &name); + t->name = name; + + return name; +} + +static mrb_value +mrb_task_priority(mrb_state *mrb, mrb_value self) +{ + mrb_task *t; + + TASK_GET_PTR_OR_RAISE(t, self); + + return mrb_fixnum_value(t->priority); +} + +static mrb_value +mrb_task_set_priority(mrb_state *mrb, mrb_value self) +{ + mrb_task *t; + mrb_int priority; + + TASK_GET_PTR_OR_RAISE(t, self); + + mrb_get_args(mrb, "i", &priority); + + if (priority < 0 || priority > 255) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "priority must be 0-255"); + } + + mrb_task_disable_irq(); + t->priority = (uint8_t)priority; + + /* Re-sort in queue if task is ready */ + if (t->status == MRB_TASK_STATUS_READY || t->status == MRB_TASK_STATUS_RUNNING) { + mrb_task_q_delete(mrb, t); + mrb_task_q_insert(mrb, t); + } + mrb_task_enable_irq(); + + return mrb_fixnum_value(priority); +} + +/* + * Forward declarations for internal functions + */ +static void suspend_task_internal(mrb_state *mrb, mrb_task *t); +static void resume_task_internal(mrb_state *mrb, mrb_task *t); +static void terminate_task_internal(mrb_state *mrb, mrb_task *t); + +static mrb_value +mrb_task_suspend(mrb_state *mrb, mrb_value self) +{ + mrb_task *t; + + TASK_GET_PTR_OR_RAISE(t, self); + task_check_scheduler_lock(mrb); + + suspend_task_internal(mrb, t); + return self; +} + +static mrb_value +mrb_task_resume(mrb_state *mrb, mrb_value self) +{ + mrb_task *t; + + TASK_GET_PTR_OR_RAISE(t, self); + task_check_scheduler_lock(mrb); + + resume_task_internal(mrb, t); + return self; +} + +static mrb_value +mrb_task_terminate(mrb_state *mrb, mrb_value self) +{ + mrb_task *t; + + TASK_GET_PTR_OR_RAISE(t, self); + task_check_scheduler_lock(mrb); + + terminate_task_internal(mrb, t); + return self; +} + +static mrb_value +mrb_task_join(mrb_state *mrb, mrb_value self) +{ + mrb_task *t, *current; + + TASK_GET_PTR_OR_RAISE(t, self); + + /* Get current task using pointer arithmetic */ + if (mrb->c == mrb->root_c) { + mrb_raise(mrb, E_RUNTIME_ERROR, "join can only be called from running task"); + } + current = MRB2TASK(mrb); + + /* Can't join self */ + if (t == current) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "can't join self"); + } + + /* If task is already dormant, return immediately */ + if (t->status == MRB_TASK_STATUS_DORMANT) { + return t->result; + } + + /* Wait for task to complete */ + mrb_task_disable_irq(); + mrb_task_q_delete(mrb, current); + current->status = MRB_TASK_STATUS_WAITING; + current->reason = MRB_TASK_REASON_JOIN; + current->wait.join = t; + mrb_task_q_insert(mrb, current); + mrb_task_enable_irq(); + + /* Trigger context switch */ + switching_ = TRUE; + + return t->result; +} + +/* + * Synchronous execution + */ + +/* Execute a proc synchronously without context switching + * + * This function creates a temporary task, executes it to completion, + * and returns the result. The scheduler_lock prevents any asynchronous + * task operations during execution. + */ +MRB_API mrb_value +mrb_execute_proc_synchronously(mrb_state *mrb, mrb_value proc_val, mrb_int argc, const mrb_value *argv) +{ + struct RProc *proc = mrb_proc_ptr(proc_val); + int ai = mrb_gc_arena_save(mrb); + + /* + * argc/argv are reserved for future use (e.g., passing arguments to + * event handlers or callback functions). Currently all callers pass + * 0 and NULL. + */ + (void)argc; + (void)argv; + + /* 1. Lock scheduler and save context */ + if (mrb->task.scheduler_lock >= MRB_TASK_SCHEDULER_LOCK_MAX) { + mrb_raise(mrb, E_RUNTIME_ERROR, "scheduler lock overflow"); + } + mrb->task.scheduler_lock++; + struct mrb_context *original_c = mrb->c; + + /* 2. Create a temporary task */ + mrb_task *t = task_alloc(mrb); + t->priority = 0; /* Highest priority */ + t->status = MRB_TASK_STATUS_DORMANT; + t->reason = MRB_TASK_REASON_NONE; + t->name = mrb_str_new_lit(mrb, "(sync)"); + + /* Initialize task context */ + task_init_context(mrb, t, proc); + + /* Create wrapper object (not registered with GC as we'll free it manually) */ + struct RClass *task_class = mrb_class_get(mrb, "Task"); + mrb_value task_obj = mrb_obj_value(mrb_data_object_alloc(mrb, task_class, t, &mrb_task_type)); + t->self = task_obj; + + /* 3. Move task from DORMANT to READY */ + mrb_task_disable_irq(); + t->status = MRB_TASK_STATUS_READY; + mrb_task_q_insert(mrb, t); + mrb_task_enable_irq(); + + /* 4. Execute the task in a dedicated loop (no context switching) */ + t->status = MRB_TASK_STATUS_RUNNING; + mrb->c = &t->c; + + while (t->c.status != MRB_TASK_STOPPED) { + t->result = mrb_vm_exec(mrb, mrb->c->ci->proc, mrb->c->ci->pc); + } + + /* If there's an unhandled exception after VM stops, save it as result */ + if (mrb->exc) { + t->result = mrb_obj_value(mrb->exc); + } + + /* 5. Get result and clean up */ + mrb_value result = t->result; + if (mrb_obj_ptr(result) == mrb->exc) { + mrb->exc = NULL; /* Clear exception */ + } + + /* 6. Free the temporary task's resources */ + mrb_task_disable_irq(); + mrb_task_q_delete(mrb, t); + mrb_task_enable_irq(); + + /* Prevent double-free: clear Data object's type before freeing task */ + DATA_TYPE(task_obj) = NULL; + + /* Free context resources directly (bypass GC since we own this task) */ + if (t->c.stbase) { + mrb_free(mrb, t->c.stbase); + t->c.stbase = NULL; + } + if (t->c.cibase) { + mrb_free(mrb, t->c.cibase); + t->c.cibase = NULL; + } + mrb_free(mrb, t); + + /* 7. Restore context and unlock */ + mrb->c = original_c; + mrb->task.scheduler_lock--; + + mrb_gc_arena_restore(mrb, ai); + mrb_gc_protect(mrb, result); + + return result; +} + +/* + * Task.tick class method + */ +static mrb_value +mrb_task_s_tick(mrb_state *mrb, mrb_value self) +{ + return mrb_int_value(mrb, tick_ * MRB_TICK_UNIT); +} + +/* + * Create a task from a proc + * This is called from mrc_create_task() in mrc_utils.c + */ +MRB_API mrb_value +mrb_create_task(mrb_state *mrb, struct RProc *proc, mrb_value name, mrb_value priority, mrb_value top_self) +{ + task_check_scheduler_lock(mrb); + + /* Validate/default priority */ + mrb_int prio = 128; /* Default priority */ + if (!mrb_nil_p(priority)) { + if (!mrb_integer_p(priority)) { + mrb_raise(mrb, E_TYPE_ERROR, "priority must be an Integer"); + } + prio = mrb_integer(priority); + if (prio < 0 || prio > 255) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "priority must be 0-255"); + } + } + + /* Validate/default name */ + mrb_value name_val = mrb_nil_p(name) ? mrb_str_new_lit(mrb, "(noname)") : name; + + mrb_task *t = task_create_common(mrb, proc, name_val, (uint8_t)prio); + + /* Set top_self if provided */ + if (!mrb_nil_p(top_self)) { + t->c.ci->stack[0] = top_self; + } + + return t->self; +} + +/* + * Internal: Suspend a task (no validation, no scheduler_lock check) + */ +static void +suspend_task_internal(mrb_state *mrb, mrb_task *t) +{ + /* + * WAITING task should also be suspended: + * Suspend trigger may occur while the task is sleeping (WAITING). + * DORMANT task should also be suspended: + * e.g., IRB in PicoRuby suspends a DORMANT task to use it again. + */ + if (t->status == MRB_TASK_STATUS_SUSPENDED) return; + + /* + * Determine if context switch is needed BEFORE changing state. + * Context switch is needed when suspending a RUNNING task or + * the current ready task. + */ + mrb_bool need_switch = (t == q_ready_ || t->status == MRB_TASK_STATUS_RUNNING); + + task_change_state(mrb, t, MRB_TASK_STATUS_SUSPENDED); + + if (need_switch) { + switching_ = TRUE; + } +} + +/* + * Suspend a task + */ +MRB_API void +mrb_suspend_task(mrb_state *mrb, mrb_value task) +{ + task_check_scheduler_lock(mrb); + + mrb_task *t = (mrb_task*)mrb_data_check_get_ptr(mrb, task, &mrb_task_type); + if (!t) return; + + suspend_task_internal(mrb, t); +} + +/* + * Internal: Resume a task (no validation, no scheduler_lock check) + */ +static void +resume_task_internal(mrb_state *mrb, mrb_task *t) +{ + if (t->status != MRB_TASK_STATUS_SUSPENDED) return; + + /* Determine target state based on reason */ + uint8_t target_status = (t->reason == MRB_TASK_REASON_NONE) ? + MRB_TASK_STATUS_READY : MRB_TASK_STATUS_WAITING; + + task_change_state(mrb, t, target_status); + + /* Trigger context switch if resumed task has higher priority */ + if (target_status == MRB_TASK_STATUS_READY && q_ready_ && + q_ready_->status == MRB_TASK_STATUS_RUNNING) { + if (t->priority < q_ready_->priority) { + switching_ = TRUE; + } + } + + /* Update wakeup_tick if task has sleep reason. + * + * Two fixes here vs the original: + * - The UINT32_MAX sentinel case (see comments in + * sleep_us_impl). + * - The read-modify-write on wakeup_tick_ races with + * mrb_tick, which also rewrites this field. sleep_us_impl + * already wraps its update in the IRQ pair; we need the + * same here to match the locking discipline. */ + if (t->reason == MRB_TASK_REASON_SLEEP) { + mrb_task_disable_irq(); + if (wakeup_tick_ == UINT32_MAX || + (int32_t)(t->wait.wakeup_tick - wakeup_tick_) < 0) { + wakeup_tick_ = t->wait.wakeup_tick; + } + mrb_task_enable_irq(); + } +} + +/* + * Resume a task + */ +MRB_API void +mrb_resume_task(mrb_state *mrb, mrb_value task) +{ + task_check_scheduler_lock(mrb); + + mrb_task *t = (mrb_task*)mrb_data_check_get_ptr(mrb, task, &mrb_task_type); + if (!t) return; + + resume_task_internal(mrb, t); +} + +/* + * Internal: Terminate a task (no validation, no scheduler_lock check) + */ +static void +terminate_task_internal(mrb_state *mrb, mrb_task *t) +{ + if (t->status == MRB_TASK_STATUS_DORMANT) return; + + mrb_task_disable_irq(); + mrb_task_q_delete(mrb, t); + t->status = MRB_TASK_STATUS_DORMANT; + t->c.status = MRB_TASK_STOPPED; + mrb_task_q_insert(mrb, t); + mrb_task_enable_irq(); + + wake_up_join_waiters(mrb, t); + + /* If terminating self, trigger context switch */ + if (t == q_ready_) { + switching_ = TRUE; + } +} + +/* + * Terminate a task + */ +MRB_API void +mrb_terminate_task(mrb_state *mrb, mrb_value task) +{ + task_check_scheduler_lock(mrb); + + mrb_task *t = (mrb_task*)mrb_data_check_get_ptr(mrb, task, &mrb_task_type); + if (!t) return; + + terminate_task_internal(mrb, t); +} + +/* + * Stop a task (mark as stopped but don't move to dormant) + */ +MRB_API mrb_bool +mrb_stop_task(mrb_state *mrb, mrb_value task) +{ + task_check_scheduler_lock(mrb); + + mrb_task *t = (mrb_task*)mrb_data_check_get_ptr(mrb, task, &mrb_task_type); + if (!t) return FALSE; + + if (t->c.status == MRB_TASK_STOPPED) { + return FALSE; /* Already stopped */ + } + t->c.status = MRB_TASK_STOPPED; + return TRUE; +} + +/* + * Get task result value + */ +MRB_API mrb_value +mrb_task_value(mrb_state *mrb, mrb_value task) +{ + mrb_task *t = (mrb_task*)mrb_data_check_get_ptr(mrb, task, &mrb_task_type); + if (!t) return mrb_nil_value(); + + return t->result; +} + +/* + * Initialize task context with a new proc + */ +MRB_API void +mrb_task_init_context(mrb_state *mrb, mrb_value task, struct RProc *proc) +{ + task_check_scheduler_lock(mrb); + + mrb_task *t = (mrb_task*)mrb_data_check_get_ptr(mrb, task, &mrb_task_type); + if (!t) return; + + struct mrb_context *c = &t->c; + + /* Cleanup existing context if any */ + if (c->stbase) { + mrb_free(mrb, c->stbase); + c->stbase = NULL; + } + if (c->cibase) { + mrb_free(mrb, c->cibase); + c->cibase = NULL; + } + + /* Re-initialize context */ + task_init_context(mrb, t, proc); +} + +/* + * Reset task context to initial state + */ +MRB_API void +mrb_task_reset_context(mrb_state *mrb, mrb_value task) +{ + task_check_scheduler_lock(mrb); + + mrb_task *t = (mrb_task*)mrb_data_check_get_ptr(mrb, task, &mrb_task_type); + if (!t) return; + + struct mrb_context *c = &t->c; + c->ci = c->cibase; + c->status = MRB_TASK_CREATED; + if (c->ci) { + mrb_vm_ci_target_class_set(c->ci, mrb->object_class); + } +} + +/* + * Set proc for task + */ +MRB_API void +mrb_task_proc_set(mrb_state *mrb, mrb_value task, struct RProc *proc) +{ + task_check_scheduler_lock(mrb); + + mrb_task *t = (mrb_task*)mrb_data_check_get_ptr(mrb, task, &mrb_task_type); + if (!t) return; + + /* Handle environment resize if needed */ + if (t->c.cibase && t->c.cibase->u.env) { + struct REnv *e = mrb_vm_ci_env(t->c.cibase); + if (e && MRB_ENV_LEN(e) < proc->body.irep->nlocals) { + MRB_ENV_SET_LEN(e, proc->body.irep->nlocals); + } + } + + if (t->c.ci) { + mrb_vm_ci_proc_set(t->c.ci, proc); + } +} + +/* + * Initialization + */ + +void +mrb_mruby_task_gem_init(mrb_state *mrb) +{ + struct RClass *task_class; + + /* Initialize HAL (timer and interrupts) */ + mrb_hal_task_init(mrb); + + /* Initialize main task to NULL and scheduler_lock to 0 */ + mrb->task.main_task = NULL; + mrb->task.scheduler_lock = 0; + mrb->task.loop_running = FALSE; + mrb->task.exception_as_result = FALSE; + + task_class = mrb_define_class_id(mrb, MRB_SYM(Task), mrb->object_class); + MRB_SET_INSTANCE_TT(task_class, MRB_TT_DATA); + + /* Task::Error - base error class for task synchronization errors */ + mrb_define_class_under_id(mrb, task_class, MRB_SYM(Error), mrb->eStandardError_class); + + /* Task::Queue */ + mrb_init_task_queue(mrb, task_class); + + /* Class methods */ + mrb_define_class_method_id(mrb, task_class, MRB_SYM(new), mrb_task_s_new, MRB_ARGS_KEY(2,0)|MRB_ARGS_BLOCK()); + mrb_define_class_method_id(mrb, task_class, MRB_SYM(current), mrb_task_s_current, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, task_class, MRB_SYM(list), mrb_task_s_list, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, task_class, MRB_SYM(pass), mrb_task_s_pass, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, task_class, MRB_SYM(stat), mrb_task_s_stat, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, task_class, MRB_SYM(get), mrb_task_s_get, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, task_class, MRB_SYM(run), mrb_task_s_run, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, task_class, MRB_SYM(tick), mrb_task_s_tick, MRB_ARGS_NONE()); + + /* Instance methods */ + mrb_define_method_id(mrb, task_class, MRB_SYM(status), mrb_task_status, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, task_class, MRB_SYM(inspect), mrb_task_inspect, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, task_class, MRB_SYM(name), mrb_task_name, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, task_class, MRB_SYM_E(name), mrb_task_set_name, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, task_class, MRB_SYM(priority), mrb_task_priority, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, task_class, MRB_SYM_E(priority), mrb_task_set_priority, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, task_class, MRB_SYM(suspend), mrb_task_suspend, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, task_class, MRB_SYM(resume), mrb_task_resume, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, task_class, MRB_SYM(terminate), mrb_task_terminate, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, task_class, MRB_SYM(join), mrb_task_join, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, task_class, MRB_SYM(value), mrb_task_value, MRB_ARGS_NONE()); + + /* Kernel methods (module functions like CRuby) + * Note: sleep and usleep override mruby-sleep's implementation to be task-aware + * (cooperative sleep within tasks, blocking sleep otherwise) + */ + mrb_define_module_function_id(mrb, mrb->kernel_module, MRB_SYM(sleep), mrb_f_sleep, MRB_ARGS_OPT(1)); + mrb_define_module_function_id(mrb, mrb->kernel_module, MRB_SYM(usleep), mrb_f_usleep, MRB_ARGS_REQ(1)); + mrb_define_module_function_id(mrb, mrb->kernel_module, MRB_SYM(sleep_ms), mrb_f_sleep_ms, MRB_ARGS_REQ(1)); +} + +void +mrb_mruby_task_gem_final(mrb_state *mrb) +{ + /* Clear main task pointer - GC will handle freeing the object */ + if (mrb->task.main_task) { + mrb_gc_unregister(mrb, mrb->task.main_task->self); + mrb->task.main_task = NULL; + } + + mrb_hal_task_final(mrb); +} diff --git a/mrbgems/mruby-task/src/task_queue.c b/mrbgems/mruby-task/src/task_queue.c new file mode 100644 index 0000000000..11175ff6fc --- /dev/null +++ b/mrbgems/mruby-task/src/task_queue.c @@ -0,0 +1,253 @@ +/* +** task_queue.c - Task::Queue implementation +*/ + +#include +#include +#include +#include +#include +#include +#include "task.h" + +typedef struct mrb_task_queue { + uint8_t closed; +} mrb_task_queue; + +static void +mrb_task_queue_free(mrb_state *mrb, void *ptr) +{ + mrb_free(mrb, ptr); +} + +static const struct mrb_data_type mrb_task_queue_type = { + "Task::Queue", mrb_task_queue_free, +}; + +static mrb_value wait_retry_; +static struct RClass *task_error_class_; + +/* Wake the highest-priority task waiting on this queue */ +static void +queue_wake_one_waiter(mrb_state *mrb, mrb_task_queue *q) +{ + mrb_task_disable_irq(); + mrb_task *curr = q_waiting_; + while (curr) { + mrb_task *next = curr->next; + if (curr->reason == MRB_TASK_REASON_QUEUE && curr->wait.queue == q) { + mrb_task_q_delete(mrb, curr); + curr->status = MRB_TASK_STATUS_READY; + curr->reason = MRB_TASK_REASON_NONE; + curr->wait.queue = NULL; + mrb_task_q_insert(mrb, curr); + switching_ = TRUE; + break; + } + curr = next; + } + mrb_task_enable_irq(); +} + +/* Wake all tasks waiting on this queue (used by close) */ +static void +queue_wake_all_waiters(mrb_state *mrb, mrb_task_queue *q) +{ + mrb_bool woke_any = FALSE; + mrb_task_disable_irq(); + mrb_task *curr = q_waiting_; + while (curr) { + mrb_task *next = curr->next; + if (curr->reason == MRB_TASK_REASON_QUEUE && curr->wait.queue == q) { + mrb_task_q_delete(mrb, curr); + curr->status = MRB_TASK_STATUS_READY; + curr->reason = MRB_TASK_REASON_NONE; + curr->wait.queue = NULL; + mrb_task_q_insert(mrb, curr); + woke_any = TRUE; + } + curr = next; + } + if (woke_any) { + switching_ = TRUE; + } + mrb_task_enable_irq(); +} + +static mrb_value +queue_initialize(mrb_state *mrb, mrb_value self) +{ + mrb_task_queue *q = (mrb_task_queue*)mrb_malloc(mrb, sizeof(mrb_task_queue)); + q->closed = 0; + mrb_data_init(self, q, &mrb_task_queue_type); + mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "@items"), mrb_ary_new(mrb)); + return self; +} + +static mrb_value +queue_push(mrb_state *mrb, mrb_value self) +{ + mrb_value obj; + mrb_get_args(mrb, "o", &obj); + + mrb_task_queue *q = (mrb_task_queue*)mrb_data_get_ptr(mrb, self, &mrb_task_queue_type); + if (!q) mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid queue"); + if (q->closed) mrb_raise(mrb, task_error_class_, "queue closed"); + + mrb_value items = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@items")); + mrb_ary_push(mrb, items, obj); + queue_wake_one_waiter(mrb, q); + return self; +} + +/* + * __pop_try: try to pop one item. Returns: + * - the item if available + * - nil if closed and empty + * - raises Task::Error if non_block and empty + * - Task::Queue::WAIT_RETRY sentinel if the current task was put to WAITING + * + * Ruby-level pop loops on WAIT_RETRY. + */ +static mrb_value +queue_pop_try(mrb_state *mrb, mrb_value self) +{ + mrb_bool non_block = FALSE; + mrb_get_args(mrb, "|b", &non_block); + + mrb_task_queue *q = (mrb_task_queue*)mrb_data_get_ptr(mrb, self, &mrb_task_queue_type); + if (!q) mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid queue"); + + mrb_value items = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@items")); + + /* Item available - return it */ + if (RARRAY_LEN(items) > 0) { + return mrb_ary_shift(mrb, items); + } + + /* Closed and empty */ + if (q->closed) { + return mrb_nil_value(); + } + + /* Non-blocking and empty */ + if (non_block) { + mrb_raise(mrb, task_error_class_, "queue empty"); + } + + /* Blocking pop only works inside a task */ + if (mrb->c == mrb->root_c) { + mrb_raise(mrb, E_RUNTIME_ERROR, "blocking pop can only be called from within a task"); + } + + /* Blocking pop requires the scheduler to be running */ + task_check_scheduler_lock(mrb); + + /* Guard against yielding from inside a C function boundary */ + mrb_callinfo *ci; + for (ci = mrb->c->ci; ci >= mrb->c->cibase; ci--) { + if (ci->cci > 0) { + mrb_raise(mrb, E_RUNTIME_ERROR, "blocking pop cannot be called from within a C function boundary"); + } + } + + /* Move current task to WAITING */ + mrb_task *current = MRB2TASK(mrb); + mrb_task_disable_irq(); + mrb_task_q_delete(mrb, current); + current->status = MRB_TASK_STATUS_WAITING; + current->reason = MRB_TASK_REASON_QUEUE; + current->wait.queue = q; + mrb_task_q_insert(mrb, current); + mrb_task_enable_irq(); + switching_ = TRUE; + + /* Return sentinel; the Ruby pop loop will retry after wakeup */ + return wait_retry_; +} + +static mrb_value +queue_size(mrb_state *mrb, mrb_value self) +{ + mrb_value items = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@items")); + return mrb_int_value(mrb, RARRAY_LEN(items)); +} + +static mrb_value +queue_empty_p(mrb_state *mrb, mrb_value self) +{ + mrb_value items = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@items")); + return mrb_bool_value(RARRAY_LEN(items) == 0); +} + +static mrb_value +queue_clear(mrb_state *mrb, mrb_value self) +{ + mrb_value items = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@items")); + mrb_ary_clear(mrb, items); + return self; +} + +static mrb_value +queue_close(mrb_state *mrb, mrb_value self) +{ + mrb_task_queue *q = (mrb_task_queue*)mrb_data_get_ptr(mrb, self, &mrb_task_queue_type); + if (!q) mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid queue"); + if (!q->closed) { + q->closed = 1; + queue_wake_all_waiters(mrb, q); + } + return self; +} + +static mrb_value +queue_closed_p(mrb_state *mrb, mrb_value self) +{ + mrb_task_queue *q = (mrb_task_queue*)mrb_data_get_ptr(mrb, self, &mrb_task_queue_type); + if (!q) mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid queue"); + return mrb_bool_value(q->closed); +} + +static mrb_value +queue_num_waiting(mrb_state *mrb, mrb_value self) +{ + mrb_task_queue *q = (mrb_task_queue*)mrb_data_get_ptr(mrb, self, &mrb_task_queue_type); + if (!q) mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid queue"); + uint32_t count = 0; + mrb_task_disable_irq(); + mrb_task *curr = q_waiting_; + while (curr) { + if (curr->reason == MRB_TASK_REASON_QUEUE && curr->wait.queue == q) { + count++; + } + curr = curr->next; + } + mrb_task_enable_irq(); + return mrb_int_value(mrb, (mrb_int)count); +} + +void +mrb_init_task_queue(mrb_state *mrb, struct RClass *task_class) +{ + struct RClass *queue_class; + + queue_class = mrb_define_class_under_id(mrb, task_class, MRB_SYM(Queue), mrb->object_class); + MRB_SET_INSTANCE_TT(queue_class, MRB_TT_DATA); + + task_error_class_ = mrb_class_get_under_id(mrb, task_class, MRB_SYM(Error)); + + /* Allocate and store WAIT_RETRY sentinel (rooted by the class constant table) */ + wait_retry_ = mrb_obj_new(mrb, mrb->object_class, 0, NULL); + mrb_define_const_id(mrb, queue_class, MRB_SYM(WAIT_RETRY), wait_retry_); + + mrb_define_method_id(mrb, queue_class, MRB_SYM(initialize), queue_initialize, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, queue_class, MRB_SYM(__push), queue_push, MRB_ARGS_REQ(1)); + mrb_define_method_id(mrb, queue_class, MRB_SYM(__pop_try), queue_pop_try, MRB_ARGS_OPT(1)); + mrb_define_method_id(mrb, queue_class, MRB_SYM(size), queue_size, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, queue_class, MRB_SYM(length), queue_size, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, queue_class, MRB_SYM_Q(empty), queue_empty_p, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, queue_class, MRB_SYM(clear), queue_clear, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, queue_class, MRB_SYM(close), queue_close, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, queue_class, MRB_SYM_Q(closed), queue_closed_p, MRB_ARGS_NONE()); + mrb_define_method_id(mrb, queue_class, MRB_SYM(num_waiting), queue_num_waiting, MRB_ARGS_NONE()); +} diff --git a/mrbgems/mruby-task/test/queue.rb b/mrbgems/mruby-task/test/queue.rb new file mode 100644 index 0000000000..33c5f99e2f --- /dev/null +++ b/mrbgems/mruby-task/test/queue.rb @@ -0,0 +1,163 @@ +# Task::Queue tests + +assert("Task::Queue.new creates a queue") do + q = Task::Queue.new + assert_kind_of Task::Queue, q +end + +assert("Task::Queue push and non-blocking pop return item in FIFO order") do + q = Task::Queue.new + q.push(1) + q.push(2) + q.push(3) + assert_equal 1, q.pop(true) + assert_equal 2, q.pop(true) + assert_equal 3, q.pop(true) +end + +assert("Task::Queue << alias works") do + q = Task::Queue.new + q << :a + q << :b + assert_equal :a, q.pop(true) + assert_equal :b, q.pop(true) +end + +assert("Task::Queue enq/deq aliases work") do + q = Task::Queue.new + q.enq(10) + assert_equal 10, q.deq(true) +end + +assert("Task::Queue shift alias works") do + q = Task::Queue.new + q.push(:x) + assert_equal :x, q.shift(true) +end + +assert("Task::Queue size and length") do + q = Task::Queue.new + assert_equal 0, q.size + assert_equal 0, q.length + q.push(1) + assert_equal 1, q.size + q.push(2) + assert_equal 2, q.length + q.pop(true) + assert_equal 1, q.size +end + +assert("Task::Queue empty?") do + q = Task::Queue.new + assert_true q.empty? + q.push(1) + assert_false q.empty? + q.pop(true) + assert_true q.empty? +end + +assert("Task::Queue clear") do + q = Task::Queue.new + q.push(1) + q.push(2) + q.clear + assert_true q.empty? + assert_equal 0, q.size +end + +assert("Task::Queue pop(true) raises Task::Error when empty") do + q = Task::Queue.new + assert_raise(Task::Error) { q.pop(true) } +end + +assert("Task::Queue close and closed?") do + q = Task::Queue.new + assert_false q.closed? + q.close + assert_true q.closed? +end + +assert("Task::Queue push raises Task::Error after close") do + q = Task::Queue.new + q.close + assert_raise(Task::Error) { q.push(1) } +end + +assert("Task::Queue pop(true) returns nil when closed and empty") do + q = Task::Queue.new + q.close + assert_equal nil, q.pop(true) +end + +assert("Task::Queue pops remaining items after close, then nil") do + q = Task::Queue.new + q.push(1) + q.push(2) + q.close + assert_equal 1, q.pop(true) + assert_equal 2, q.pop(true) + assert_equal nil, q.pop(true) +end + +assert("Task::Queue double close is no-op") do + q = Task::Queue.new + q.close + assert_nothing_raised { q.close } + assert_true q.closed? +end + +assert("Task::Queue num_waiting is 0 with no blocked tasks") do + q = Task::Queue.new + assert_equal 0, q.num_waiting +end + +assert("Task::Queue blocking pop wakes on push") do + q = Task::Queue.new + results = [] + + Task.new { results << q.pop } + Task.new { q.push(99) } + Task.run + + assert_equal [99], results +end + +assert("Task::Queue multiple producers and consumers") do + q = Task::Queue.new + received = [] + + Task.new { q.push(1) } + Task.new { q.push(2) } + Task.new { q.push(3) } + Task.new { received << q.pop } + Task.new { received << q.pop } + Task.new { received << q.pop } + Task.run + + assert_equal [1, 2, 3], received.sort +end + +assert("Task::Queue blocking pop returns nil when queue is closed") do + q = Task::Queue.new + results = [] + + Task.new { results << q.pop } + Task.new { q.close } + Task.run + + assert_equal [nil], results +end + +assert("Task::Queue num_waiting reflects blocked task count") do + q = Task::Queue.new + counts = [] + + Task.new { q.pop } + Task.new do + counts << q.num_waiting # consumer should be waiting + q.push(:done) + end + Task.run + + assert_equal [1], counts +end diff --git a/mrbgems/mruby-task/test/task.rb b/mrbgems/mruby-task/test/task.rb new file mode 100644 index 0000000000..5bbe844e30 --- /dev/null +++ b/mrbgems/mruby-task/test/task.rb @@ -0,0 +1,210 @@ +# Sleep/usleep tests (from mruby-sleep) +# Note: Use minimal sleep times to avoid test slowdown + +assert("sleep accepts non-negative values") do + assert_nothing_raised { sleep(0) } +end + +assert("sleep accepts non-negative float values") do + skip unless Object.const_defined?(:Float) + assert_nothing_raised { sleep(0.0) } + assert_nothing_raised { sleep(-0.0) } +end + +assert("sleep raises ArgumentError for negative integer") do + assert_raise(ArgumentError) { sleep(-1) } +end + +assert("sleep raises ArgumentError for negative float") do + skip unless Object.const_defined?(:Float) + assert_raise(ArgumentError) { sleep(-0.1) } +end + +assert("usleep accepts non-negative values") do + assert_nothing_raised { usleep(0) } +end + +assert("usleep raises ArgumentError for negative value") do + assert_raise(ArgumentError) { usleep(-100) } +end + +# Task creation tests + +assert("Task.new creates a task") do + task = Task.new { } + assert_kind_of Task, task +end + +assert("Task.new accepts name") do + task = Task.new(name: "test") { } + assert_equal "test", task.name +end + +assert("Task.new accepts priority") do + task = Task.new(priority: 100) { } + assert_equal 100, task.priority +end + +assert("Task.new raises without block") do + assert_raise(ArgumentError) { Task.new } +end + +# Task state tests + +assert("Task#status returns symbol") do + task = Task.new { } + status = task.status + assert_true [:READY, :RUNNING, :WAITING, :SUSPENDED, :DORMANT, :UNKNOWN].include?(status) +end + +assert("new task has READY status") do + task = Task.new { } + assert_equal :READY, task.status +end + +assert("Task#inspect returns formatted string") do + task = Task.new(name: "test") { } + inspect_str = task.inspect + assert_kind_of String, inspect_str + assert_true inspect_str.include?("Task") + assert_true inspect_str.include?("test") +end + +assert("Task#inspect shows status") do + task = Task.new { } + inspect_str = task.inspect + assert_true inspect_str.include?("READY") || inspect_str.include?("DORMANT") +end + +# Task control methods + +assert("Task#suspend doesn't raise") do + task = Task.new { } + assert_nothing_raised { task.suspend } + # Clean up: a suspended task left in q_suspended_ keeps a later + # Task.run from terminating (the scheduler idles waiting on it + # instead of exiting). + task.terminate +end + +assert("Task#resume doesn't raise") do + task = Task.new { } + assert_nothing_raised { task.resume } +end + +assert("Task#terminate doesn't raise") do + task = Task.new { } + assert_nothing_raised { task.terminate } +end + +# Task.current tests + +assert("Task.current in root context") do + # In root context, Task.current might be nil or a special value + current = Task.current + assert_true current.nil? || current.kind_of?(Task) +end + +# Task.pass tests + +assert("Task.pass yields control") do + assert_nothing_raised { Task.pass } +end + +# Task.stat tests + +assert("Task.stat returns hash") do + stat = Task.stat + assert_kind_of Hash, stat +end + +assert("Task.stat includes tick") do + stat = Task.stat + assert_true stat.has_key?(:tick) + assert_kind_of Integer, stat[:tick] +end + +assert("Task.stat includes wakeup_tick") do + stat = Task.stat + assert_true stat.has_key?(:wakeup_tick) + assert_kind_of Integer, stat[:wakeup_tick] +end + +assert("Task.stat includes queue counts") do + stat = Task.stat + [:ready, :waiting, :suspended, :dormant].each do |queue| + assert_true stat.has_key?(queue), "Missing queue: #{queue}" + assert_kind_of Hash, stat[queue] + assert_true stat[queue].has_key?(:count) + assert_kind_of Integer, stat[queue][:count] + assert_true stat[queue].has_key?(:tasks) + assert_kind_of Array, stat[queue][:tasks] + end +end + +assert("Task.stat tracks task counts") do + stat_before = Task.stat + ready_before = stat_before[:ready][:count] + + task1 = Task.new { sleep 0 } + task2 = Task.new { sleep 0 } + + stat_after = Task.stat + ready_after = stat_after[:ready][:count] + + assert_equal ready_before + 2, ready_after +end + +# Priority tests + +assert("Task.new accepts different priorities") do + low = Task.new(priority: 200) { } + high = Task.new(priority: 50) { } + med = Task.new(priority: 128) { } + + assert_equal 200, low.priority + assert_equal 50, high.priority + assert_equal 128, med.priority +end + +# Name handling + +assert("Task with string name") do + task = Task.new(name: "string_name") { } + assert_equal "string_name", task.name +end + +assert("Task without name returns (noname)") do + task = Task.new { } + assert_equal "(noname)", task.name +end + +# Edge cases + +assert("Task.new with block doesn't execute immediately") do + executed = false + task = Task.new { executed = true } + # Block should not execute until scheduler runs + assert_false executed +end + +assert("Task.run inside Task.run is a noop") do + assert_nothing_raised do + Task.new { Task.run } + Task.run + end +end + +assert("Task#value returns exception object for unhandled task errors") do + child = nil + + Task.new do + child = Task.new { raise "boom" } + end + + Task.run + + result = child.value + assert_kind_of RuntimeError, result + assert_equal "boom", result.message +end diff --git a/mrbgems/mruby-task/tools/mruby_task_demo/mruby_task_demo.c b/mrbgems/mruby-task/tools/mruby_task_demo/mruby_task_demo.c new file mode 100644 index 0000000000..a8e7126394 --- /dev/null +++ b/mrbgems/mruby-task/tools/mruby_task_demo/mruby_task_demo.c @@ -0,0 +1,329 @@ +/* +** mruby-task-demo.c +** +** Three-thread test of the mruby-task GLib HAL. Each thread owns its +** own mrb_state and its own GMainContext (the HAL is thread-local and +** picks up the thread-default context at mrb_open time, then spawns +** its own ticker thread internally). +** +** T1 pure foreign-loop driver. Tasks are registered, then +** g_main_loop_run is what dispatches the scheduler via the +** HAL's vm_run_src. No Task.run anywhere. +** +** T2 mix. Phase 1 registers tasks and calls Task.run to drain +** them synchronously. Phase 2 registers more tasks and lets +** g_main_loop_run drive them. Exercises both drivers on the +** same mrb_state in sequence. +** +** T3 Task.run only. Registers tasks and calls Task.run. The demo +** thread never enters g_main_loop_run -- Task.run is the +** scheduler driver, and the HAL's idle hook iterates vm_ctx +** from inside Task.run so the ticker's cross-thread wakes +** still get dispatched. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static gint64 start_us; + +static void +log_line(const char *msg) +{ + gint64 ms = (g_get_monotonic_time() - start_us) / 1000; + printf("[t=%5" PRId64 " ms] %s\n", ms, msg); + fflush(stdout); +} + +static mrb_value +rb_log(mrb_state *mrb, mrb_value self) +{ + const char *msg; + (void)self; + mrb_get_args(mrb, "z", &msg); + log_line(msg); + return mrb_nil_value(); +} + +static void +run_ruby(mrb_state *mrb, const char *code) +{ + mrb_load_string(mrb, code); + if (mrb->exc) { + mrb_value exc = mrb_obj_value(mrb->exc); + mrb_value str = mrb_funcall(mrb, exc, "to_s", 0); + fprintf(stderr, "Ruby error: %s\n", RSTRING_PTR(str)); + fflush(stderr); + mrb->exc = NULL; + } +} + +static void +banner(const char *msg) +{ + printf("===== %s =====\n", msg); + fflush(stdout); +} + +/* + * T1 -- pure foreign-loop driver. Runs on the main thread (no + * separate GThread for T1; the main thread is the foreign loop). + * + * Pulse task with mixed sleep styles, three staggered sleepers, and + * spinner + stopper for timeslice preemption. g_main_loop_run is the + * only scheduler driver. + */ +static void +run_glib_only(void) +{ + GMainContext *ctx; + GMainLoop *loop; + GSource *timeout; + mrb_state *mrb; + + ctx = g_main_context_new(); + g_main_context_push_thread_default(ctx); + loop = g_main_loop_new(ctx, FALSE); + + mrb = mrb_open(); + mrb_define_method(mrb, mrb->object_class, "log", rb_log, MRB_ARGS_REQ(1)); + + banner("T1 (glib-only): pulse + 3 staggered sleepers + spinner/stopper"); + + run_ruby(mrb, + "$t1_done = false\n" + "Task.new(name: 'T1.pulse') {\n" + " log 'T1.pulse: usleep 8000 x5'\n" + " 5.times { usleep 8000; log 'T1.pulse: micro' }\n" + " log 'T1.pulse: sleep_ms 120 x2'\n" + " 2.times { sleep_ms 120; log 'T1.pulse: chunk' }\n" + " log 'T1.pulse: sleep 0.3'\n" + " sleep 0.3\n" + " log 'T1.pulse: long done'\n" + "}\n" + "[30, 60, 90].each do |ms|\n" + " Task.new(name: 'T1.s' + ms.to_s) {\n" + " log 'T1.sleeper' + ms.to_s + ': sleeping'\n" + " sleep_ms ms\n" + " log 'T1.sleeper' + ms.to_s + ': woke'\n" + " }\n" + "end\n" + "Task.new(name: 'T1.spinner', priority: 200) {\n" + " log 'T1.spinner: entering tight loop'\n" + " loops = 0\n" + " loop {\n" + " loops += 1\n" + " break if $t1_done\n" + " break if loops > 200_000_000\n" + " }\n" + " log 'T1.spinner: exit loops=' + loops.to_s + ' done=' + $t1_done.to_s\n" + "}\n" + "Task.new(name: 'T1.stopper', priority: 50) {\n" + " log 'T1.stopper: sleeping 100 ms'\n" + " sleep 0.1\n" + " log 'T1.stopper: setting $t1_done'\n" + " $t1_done = true\n" + "}\n" + ); + + timeout = g_timeout_source_new(700); + g_source_set_callback(timeout, (GSourceFunc)g_main_loop_quit, loop, NULL); + g_source_attach(timeout, ctx); + g_source_unref(timeout); + + log_line("T1: entering g_main_loop_run (700 ms cap)"); + g_main_loop_run(loop); + log_line("T1: g_main_loop_run returned"); + + mrb_close(mrb); + g_main_loop_unref(loop); + g_main_context_pop_thread_default(ctx); + g_main_context_unref(ctx); +} + +/* + * T2 -- mixed driver. + * + * Phase 1: register a small task set, call Task.run, which blocks + * until those tasks drain. Phase 2: register more tasks and let + * g_main_loop_run drive them. + */ +static gpointer +thread_glib_and_taskrun(gpointer data) +{ + GMainContext *ctx; + GMainLoop *loop; + GSource *timeout; + mrb_state *mrb; + (void)data; + + ctx = g_main_context_new(); + g_main_context_push_thread_default(ctx); + loop = g_main_loop_new(ctx, FALSE); + + mrb = mrb_open(); + mrb_define_method(mrb, mrb->object_class, "log", rb_log, MRB_ARGS_REQ(1)); + + banner("T2 (mix): phase 1 = yield + suspend/resume, drained by Task.run"); + + run_ruby(mrb, + "victim = Task.new(name: 'T2.victim') {\n" + " log 'T2.victim: sleeping 200 ms'\n" + " sleep 0.2\n" + " log 'T2.victim: woke'\n" + "}\n" + "Task.new(name: 'T2.controller', priority: 50) {\n" + " sleep 0.05\n" + " log 'T2.controller: suspending victim (was ' + victim.status.to_s + ')'\n" + " victim.suspend\n" + " log 'T2.controller: victim now ' + victim.status.to_s\n" + " sleep 0.1\n" + " log 'T2.controller: resuming victim (was ' + victim.status.to_s + ')'\n" + " victim.resume\n" + " log 'T2.controller: victim now ' + victim.status.to_s\n" + "}\n" + "Task.new(name: 'T2.yieldA', priority: 100) {\n" + " 3.times { |i| log 'T2.yieldA: iter ' + i.to_s; Task.pass }\n" + "}\n" + "Task.new(name: 'T2.yieldB', priority: 100) {\n" + " 3.times { |i| log 'T2.yieldB: iter ' + i.to_s; Task.pass }\n" + "}\n" + "log 'T2: calling Task.run (drains phase 1)'\n" + "Task.run\n" + "log 'T2: Task.run returned'\n" + ); + + banner("T2 (mix): phase 2 = 3 staggered sleepers, driven by g_main_loop_run"); + + run_ruby(mrb, + "[40, 80, 120].each do |ms|\n" + " Task.new(name: 'T2.s' + ms.to_s) {\n" + " log 'T2.sleeper' + ms.to_s + ': sleeping'\n" + " sleep_ms ms\n" + " log 'T2.sleeper' + ms.to_s + ': woke'\n" + " }\n" + "end\n" + "log 'T2: phase 2 registered'\n" + ); + + timeout = g_timeout_source_new(400); + g_source_set_callback(timeout, (GSourceFunc)g_main_loop_quit, loop, NULL); + g_source_attach(timeout, ctx); + g_source_unref(timeout); + + log_line("T2: entering g_main_loop_run (400 ms cap)"); + g_main_loop_run(loop); + log_line("T2: g_main_loop_run returned"); + + mrb_close(mrb); + g_main_loop_unref(loop); + g_main_context_pop_thread_default(ctx); + g_main_context_unref(ctx); + return NULL; +} + +/* + * T3 -- Task.run only. + * + * No g_main_loop_run on the demo thread. Task.run drives the + * scheduler; mrb_hal_task_idle_cpu iterates vm_ctx from inside + * Task.run's idle loop so the ticker's cross-thread set_ready_time + * still wakes the demo thread when sleepers come due. Returns when + * all queues drain. + */ +static gpointer +thread_taskrun_only(gpointer data) +{ + GMainContext *ctx; + mrb_state *mrb; + (void)data; + + ctx = g_main_context_new(); + g_main_context_push_thread_default(ctx); + + mrb = mrb_open(); + mrb_define_method(mrb, mrb->object_class, "log", rb_log, MRB_ARGS_REQ(1)); + + banner("T3 (Task.run only): zombie/executioner + spinner/stopper + sleepers"); + + run_ruby(mrb, + "$t3_done = false\n" + "$t3_zombie_ticks = 0\n" + "zombie = Task.new(name: 'T3.zombie') {\n" + " log 'T3.zombie: alive (will tick every 50 ms forever)'\n" + " loop {\n" + " sleep_ms 50\n" + " $t3_zombie_ticks += 1\n" + " log 'T3.zombie: tick ' + $t3_zombie_ticks.to_s\n" + " }\n" + " log 'T3.zombie: NEVER REACHED'\n" + "}\n" + "Task.new(name: 'T3.executioner', priority: 50) {\n" + " sleep_ms 175\n" + " log 'T3.executioner: terminating zombie (was ' + zombie.status.to_s + ')'\n" + " zombie.terminate\n" + " log 'T3.executioner: zombie is now ' + zombie.status.to_s\n" + "}\n" + "Task.new(name: 'T3.spinner', priority: 200) {\n" + " log 'T3.spinner: entering tight loop'\n" + " loops = 0\n" + " loop {\n" + " loops += 1\n" + " break if $t3_done\n" + " break if loops > 200_000_000\n" + " }\n" + " log 'T3.spinner: exit loops=' + loops.to_s + ' done=' + $t3_done.to_s\n" + "}\n" + "Task.new(name: 'T3.stopper', priority: 50) {\n" + " log 'T3.stopper: sleeping 100 ms'\n" + " sleep 0.1\n" + " log 'T3.stopper: setting $t3_done'\n" + " $t3_done = true\n" + "}\n" + "[20, 40, 60].each do |ms|\n" + " Task.new(name: 'T3.s' + ms.to_s) {\n" + " log 'T3.sleeper' + ms.to_s + ': sleeping'\n" + " sleep_ms ms\n" + " log 'T3.sleeper' + ms.to_s + ': woke'\n" + " }\n" + "end\n" + "log 'T3: calling Task.run'\n" + "Task.run\n" + "log 'T3: Task.run returned (all queues empty)'\n" + ); + + mrb_close(mrb); + g_main_context_pop_thread_default(ctx); + g_main_context_unref(ctx); + return NULL; +} + +int main(int argc, char **argv) +{ + GThread *t2, *t3; + (void)argc; + (void)argv; + + start_us = g_get_monotonic_time(); + + log_line("main: spawning T2 + T3; running T1 (glib-only) on main thread"); + + t2 = g_thread_new("T2.mix", thread_glib_and_taskrun, NULL); + t3 = g_thread_new("T3.taskrun", thread_taskrun_only, NULL); + + run_glib_only(); + + g_thread_join(t2); + g_thread_join(t3); + + log_line("main: T2 and T3 joined"); + printf("All scenarios completed.\n"); + return 0; +} diff --git a/mrbgems/mruby-test-inline-struct/test/inline.c b/mrbgems/mruby-test-inline-struct/test/inline.c index f3940476b8..c92caa5da2 100644 --- a/mrbgems/mruby-test-inline-struct/test/inline.c +++ b/mrbgems/mruby-test-inline-struct/test/inline.c @@ -56,14 +56,13 @@ istruct_test_test_receive(mrb_state *mrb, mrb_value self) static mrb_value istruct_test_test_receive_direct(mrb_state *mrb, mrb_value self) { - mrb_value is; + mrb_value is = mrb_get_arg1(mrb); struct RClass *klass = mrb_class_get(mrb, "InlineStructTest"); - mrb_get_args(mrb, "o", &is); /* if you need to protect istruct retrieval from untrusted code, you need to care about class replacing. See mrbgem/mruby-random/src/random.c for detail */ if (mrb_istruct_p(is) && mrb_obj_is_kind_of(mrb, is, klass)) { - char *ptr = (char*)mrb_istruct_ptr(is);; + char *ptr = (char*)mrb_istruct_ptr(is); return mrb_bool_value(ptr[0] == 's'); } mrb_raise(mrb, E_TYPE_ERROR, "InlineStructTest"); diff --git a/mrbgems/mruby-test-inline-struct/test/inline.rb b/mrbgems/mruby-test-inline-struct/test/inline.rb index f959a17c40..d8d83919ff 100644 --- a/mrbgems/mruby-test-inline-struct/test/inline.rb +++ b/mrbgems/mruby-test-inline-struct/test/inline.rb @@ -98,21 +98,21 @@ def test_ivar_get # 64-bit mode if InlineStructTest.length == 24 - assert('InlineStructTest length [64 bit]') do + assert('InlineStructTest length [64-bit]') do assert_equal InlineStructTest.length, 3 * 8 end end # 32-bit mode if InlineStructTest.length == 12 - assert('InlineStructTest length [32 bit]') do + assert('InlineStructTest length [32-bit]') do assert_equal InlineStructTest.length, 3 * 4 end end # 16-bit mode if InlineStructTest.length == 6 - assert('InlineStructTest length [16 bit]') do + assert('InlineStructTest length [16-bit]') do assert_equal InlineStructTest.length, 3 * 2 end end diff --git a/mrbgems/mruby-test/README.md b/mrbgems/mruby-test/README.md index 86b1ce05ee..7c6647dd9d 100644 --- a/mrbgems/mruby-test/README.md +++ b/mrbgems/mruby-test/README.md @@ -1,5 +1,4 @@ -Running Tests -============= +# Running Tests To run the tests, execute the following from the project's root directory. diff --git a/mrbgems/mruby-test/driver.c b/mrbgems/mruby-test/driver.c index 21e272536d..11ccd0d6d8 100644 --- a/mrbgems/mruby-test/driver.c +++ b/mrbgems/mruby-test/driver.c @@ -52,12 +52,13 @@ t_print(mrb_state *mrb, mrb_value self) { const mrb_value *argv; mrb_int argc; - mrb_int i; mrb_get_args(mrb, "*!", &argv, &argc); - for (i = 0; i < argc; ++i) { + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < argc; i++) { mrb_value s = mrb_obj_as_string(mrb, argv[i]); fwrite(RSTRING_PTR(s), RSTRING_LEN(s), 1, stdout); + mrb_gc_arena_restore(mrb, ai); } fflush(stdout); @@ -76,7 +77,7 @@ str_match_bracket(const char *p, const char *pat_end, if (p == pat_end) return NULL; if (*p == '!' || *p == '^') { negated = TRUE; - ++p; + p++; } while (*p != ']') { @@ -109,7 +110,7 @@ str_match_no_brace_p(const char *pat, mrb_int pat_len, if (p == pat_end) return s == str_end; switch (*p) { case '*': - do { ++p; } while (p != pat_end && *p == '*'); + do { p++; } while (p != pat_end && *p == '*'); if (UNESCAPE(p, pat_end) == pat_end) return TRUE; if (s == str_end) return FALSE; p_tmp = p; @@ -117,15 +118,15 @@ str_match_no_brace_p(const char *pat, mrb_int pat_len, continue; case '?': if (s == str_end) return FALSE; - ++p; - ++s; + p++; + s++; continue; case '[': { const char *t; if (s == str_end) return FALSE; if ((t = str_match_bracket(p+1, pat_end, s, str_end))) { p = t; - ++s; + s++; continue; } goto L_failed; @@ -164,7 +165,7 @@ str_match_p(mrb_state *mrb, int nest = 0; mrb_bool ret = FALSE; - for (; p != pat_end; ++p) { + for (; p != pat_end; p++) { if (*p == '{' && nest++ == 0) lbrace = p; else if (*p == '}' && lbrace && --nest == 0) { rbrace = p; break; } else if (*p == '\\' && ++p == pat_end) break; @@ -172,7 +173,7 @@ str_match_p(mrb_state *mrb, if (lbrace && rbrace) { /* expand brace */ - char *ex_pat = (char *)mrb_malloc(mrb, pat_len-2); /* expanded pattern */ + char *ex_pat = (char*)mrb_malloc(mrb, pat_len-2); /* expanded pattern */ char *ex_p = ex_pat; COPY_AND_INC(ex_p, pat, lbrace-pat); @@ -180,9 +181,9 @@ str_match_p(mrb_state *mrb, while (p < rbrace) { char *orig_ex_p = ex_p; const char *t = ++p; - for (nest = 0; p < rbrace && !(*p == ',' && nest == 0); ++p) { - if (*p == '{') ++nest; - else if (*p == '}') --nest; + for (nest = 0; p < rbrace && !(*p == ',' && nest == 0); p++) { + if (*p == '{') nest++; + else if (*p == '}') nest--; else if (*p == '\\' && ++p == rbrace) break; } COPY_AND_INC(ex_p, t, p-t); @@ -212,17 +213,15 @@ m_str_match_p(mrb_state *mrb, mrb_value self) void mrb_init_test_driver(mrb_state *mrb, mrb_bool verbose) { - struct RClass *krn, *mrbtest; - - krn = mrb->kernel_module; + struct RClass *krn = mrb->kernel_module; mrb_define_method(mrb, krn, "t_print", t_print, MRB_ARGS_ANY()); mrb_define_method(mrb, krn, "_str_match?", m_str_match_p, MRB_ARGS_REQ(2)); - mrbtest = mrb_define_module(mrb, "Mrbtest"); + struct RClass *mrbtest = mrb_define_module(mrb, "Mrbtest"); #ifndef MRB_NO_FLOAT #ifdef MRB_USE_FLOAT32 -#ifdef MRB_WORDBOX_NO_FLOAT_TRUNCATE +#ifdef MRB_WORDBOX_NO_INLINE_FLOAT mrb_define_const(mrb, mrbtest, "FLOAT_TOLERANCE", mrb_float_value(mrb, 1e-5)); #else mrb_define_const(mrb, mrbtest, "FLOAT_TOLERANCE", mrb_float_value(mrb, 1e-4)); @@ -230,6 +229,8 @@ mrb_init_test_driver(mrb_state *mrb, mrb_bool verbose) #else mrb_define_const(mrb, mrbtest, "FLOAT_TOLERANCE", mrb_float_value(mrb, 1e-10)); #endif +#else + (void)mrbtest; #endif mrb_init_test_vformat(mrb); @@ -242,8 +243,6 @@ mrb_init_test_driver(mrb_state *mrb, mrb_bool verbose) void mrb_t_pass_result(mrb_state *mrb_dst, mrb_state *mrb_src) { - mrb_value res_src; - if (mrb_src->exc) { mrb_print_error(mrb_src); exit(EXIT_FAILURE); @@ -251,7 +250,7 @@ mrb_t_pass_result(mrb_state *mrb_dst, mrb_state *mrb_src) #define TEST_COUNT_PASS(name) \ do { \ - res_src = mrb_gv_get(mrb_src, mrb_intern_lit(mrb_src, "$" #name)); \ + mrb_value res_src = mrb_gv_get(mrb_src, mrb_intern_lit(mrb_src, "$" #name)); \ if (mrb_integer_p(res_src)) { \ mrb_value res_dst = mrb_gv_get(mrb_dst, mrb_intern_lit(mrb_dst, "$" #name)); \ mrb_gv_set(mrb_dst, mrb_intern_lit(mrb_dst, "$" #name), mrb_int_value(mrb_dst, mrb_integer(res_dst) + mrb_integer(res_src))); \ @@ -266,14 +265,17 @@ mrb_t_pass_result(mrb_state *mrb_dst, mrb_state *mrb_src) #undef TEST_COUNT_PASS - res_src = mrb_gv_get(mrb_src, mrb_intern_lit(mrb_src, "$asserts")); + mrb_value res_src = mrb_gv_get(mrb_src, mrb_intern_lit(mrb_src, "$asserts")); if (mrb_array_p(res_src)) { mrb_int i; mrb_value res_dst = mrb_gv_get(mrb_dst, mrb_intern_lit(mrb_dst, "$asserts")); - for (i = 0; i < RARRAY_LEN(res_src); ++i) { + int ai = mrb_gc_arena_save(mrb_dst); + for (i = 0; i < RARRAY_LEN(res_src); i++) { mrb_value val_src = RARRAY_PTR(res_src)[i]; + mrb_ensure_string_type(mrb_dst, val_src); mrb_ary_push(mrb_dst, res_dst, mrb_str_new(mrb_dst, RSTRING_PTR(val_src), RSTRING_LEN(val_src))); + mrb_gc_arena_restore(mrb_dst, ai); } } } @@ -282,15 +284,15 @@ int main(int argc, char **argv) { mrb_state *mrb; - int ret; mrb_bool verbose = FALSE; print_hint(); /* new interpreter instance */ mrb = mrb_open(); - if (mrb == NULL) { - fprintf(stderr, "Invalid mrb_state, exiting test driver"); + if (MRB_OPEN_FAILURE(mrb)) { + mrb_print_error(mrb); /* handles NULL */ + mrb_close(mrb); /* handles NULL */ return EXIT_FAILURE; } @@ -299,10 +301,14 @@ main(int argc, char **argv) verbose = TRUE; } + int ai = mrb_gc_arena_save(mrb); mrb_init_test_driver(mrb, verbose); + mrb_gc_arena_restore(mrb, ai); mrb_load_irep(mrb, mrbtest_assert_irep); + mrb_gc_arena_restore(mrb, ai); mrbgemtest_init(mrb); - ret = eval_test(mrb); + + int ret = eval_test(mrb); mrb_close(mrb); return ret; diff --git a/mrbgems/mruby-test/mrbgem.rake b/mrbgems/mruby-test/mrbgem.rake index 927447b4f7..9a6078cec6 100644 --- a/mrbgems/mruby-test/mrbgem.rake +++ b/mrbgems/mruby-test/mrbgem.rake @@ -19,9 +19,11 @@ MRuby::Gem::Specification.new('mruby-test') do |spec| file assert_c => [assert_rb, build.mrbcfile] do |t| _pp "GEN", t.name.relative_path mkdir_p File.dirname(t.name) - open(t.name, 'w') do |f| + tmpfile = t.name + ".tmp" + open(tmpfile, 'w') do |f| mrbc.run f, assert_rb, 'mrbtest_assert_irep', cdump: false end + File.rename(tmpfile, t.name) end gem_table = build.gems.generate_gem_table build @@ -36,7 +38,8 @@ MRuby::Gem::Specification.new('mruby-test') do |spec| file g.test_rbireps => [g.test_rbfiles, build.mrbcfile].flatten do |t| _pp "GEN", t.name.relative_path mkdir_p File.dirname(t.name) - open(t.name, 'w') do |f| + tmpfile = t.name + ".tmp" + open(tmpfile, 'w') do |f| g.print_gem_test_header(f) test_preload = g.test_preload and [g.dir, MRUBY_ROOT].map {|dir| File.expand_path(g.test_preload, dir) @@ -66,37 +69,33 @@ MRuby::Gem::Specification.new('mruby-test') do |spec| f.puts %Q[void mrb_t_pass_result(mrb_state *dst, mrb_state *src);] f.puts %Q[void GENERATED_TMP_mrb_#{g.funcname}_gem_test(mrb_state *mrb) {] unless g.test_rbfiles.empty? - f.puts %Q[ mrb_state *mrb2;] unless g.test_args.empty? f.puts %Q[ mrb_value test_args_hash;] end - f.puts %Q[ int ai;] + f.puts %Q[ mrb_state *mrb2 = mrb_open_core();] + f.puts %Q[ if (mrb2 == NULL) {] + f.puts %Q[ fprintf(stderr, "Invalid mrb_state, exiting \%s", __func__);] + f.puts %Q[ exit(EXIT_FAILURE);] + f.puts %Q[ }] + f.puts %Q[ int ai = mrb_gc_arena_save(mrb2);] + f.puts %Q[ mrb_const_set(mrb2, mrb_obj_value(mrb2->object_class), mrb_intern_lit(mrb2, "GEMNAME"), mrb_str_new(mrb2, "#{g.name}", #{g.name.length}));] + f.puts %Q[ mrb_gc_arena_restore(mrb2, ai);] + if test_preload.nil? + f.puts %Q[ mrb_load_irep(mrb2, mrbtest_assert_irep);] + else + f.puts %Q[ mrb_load_irep(mrb2, gem_test_irep_#{g.funcname}_preload);] + end + dep_list.each do |d| + f.puts %Q[ GENERATED_TMP_mrb_#{d.funcname}_gem_init(mrb2);] + f.puts %Q[ mrb_state_atexit(mrb2, GENERATED_TMP_mrb_#{d.funcname}_gem_final);] + f.puts %Q[ mrb_gc_arena_restore(mrb2, ai);] + end + f.puts %Q[ mrb_init_test_driver(mrb2, mrb_test(mrb_gv_get(mrb, mrb_intern_lit(mrb, "$mrbtest_verbose"))));] + f.puts %Q[ mrb_gc_arena_restore(mrb2, ai);] + f.puts %Q[ ] g.test_rbfiles.count.times do |i| - f.puts %Q[ ai = mrb_gc_arena_save(mrb);] - f.puts %Q[ mrb2 = mrb_open_core(mrb_default_allocf, NULL);] - f.puts %Q[ if (mrb2 == NULL) {] - f.puts %Q[ fprintf(stderr, "Invalid mrb_state, exiting \%s", __func__);] - f.puts %Q[ exit(EXIT_FAILURE);] - f.puts %Q[ }] - dep_list.each do |d| - f.puts %Q[ GENERATED_TMP_mrb_#{d.funcname}_gem_init(mrb2);] - f.puts %Q[ mrb_state_atexit(mrb2, GENERATED_TMP_mrb_#{d.funcname}_gem_final);] - end - f.puts %Q[ mrb_init_test_driver(mrb2, mrb_test(mrb_gv_get(mrb, mrb_intern_lit(mrb, "$mrbtest_verbose"))));] - if test_preload.nil? - f.puts %Q[ mrb_load_irep(mrb2, mrbtest_assert_irep);] - else - f.puts %Q[ mrb_load_irep(mrb2, gem_test_irep_#{g.funcname}_preload);] - end - f.puts %Q[ if (mrb2->exc) {] - f.puts %Q[ mrb_print_error(mrb2);] - f.puts %Q[ mrb_close(mrb2);] - f.puts %Q[ exit(EXIT_FAILURE);] - f.puts %Q[ }] - f.puts %Q[ mrb_const_set(mrb2, mrb_obj_value(mrb2->object_class), mrb_intern_lit(mrb2, "GEMNAME"), mrb_str_new(mrb2, "#{g.name}", #{g.name.length}));] - unless g.test_args.empty? - f.puts %Q[ test_args_hash = mrb_hash_new_capa(mrb, #{g.test_args.length}); ] + f.puts %Q[ test_args_hash = mrb_hash_new_capa(mrb2, #{g.test_args.length}); ] g.test_args.each do |arg_name, arg_value| escaped_arg_name = arg_name.gsub('\\', '\\\\\\\\').gsub('"', '\"') escaped_arg_value = arg_value.gsub('\\', '\\\\\\\\').gsub('"', '\"') @@ -104,19 +103,24 @@ MRuby::Gem::Specification.new('mruby-test') do |spec| end f.puts %Q[ mrb_const_set(mrb2, mrb_obj_value(mrb2->object_class), mrb_intern_lit(mrb2, "TEST_ARGS"), test_args_hash); ] end + f.puts %Q[ mrb_gc_arena_restore(mrb2, ai);] f.puts %Q[ mrb_#{g.funcname}_gem_test(mrb2);] if g.custom_test_init? - + f.puts %Q[ mrb_gc_arena_restore(mrb2, ai);] f.puts %Q[ mrb_load_irep(mrb2, gem_test_irep_#{g.funcname}_#{i});] + f.puts %Q[ if (mrb2->exc) {] + f.puts %Q[ mrb_print_error(mrb2);] + f.puts %Q[ mrb_close(mrb2);] + f.puts %Q[ exit(EXIT_FAILURE);] + f.puts %Q[ }] f.puts %Q[ ] - - f.puts %Q[ mrb_t_pass_result(mrb, mrb2);] - f.puts %Q[ mrb_close(mrb2);] - f.puts %Q[ mrb_gc_arena_restore(mrb, ai);] end + f.puts %Q[ mrb_t_pass_result(mrb, mrb2);] + f.puts %Q[ mrb_close(mrb2);] end f.puts %Q[}] end + File.rename(tmpfile, t.name) end end @@ -134,7 +138,8 @@ MRuby::Gem::Specification.new('mruby-test') do |spec| file clib => ["#{build.build_dir}/mrbgems/active_gems.txt", build.mrbcfile, __FILE__] do |_t| _pp "GEN", clib.relative_path mkdir_p File.dirname(clib) - open(clib, 'w') do |f| + tmpfile = clib + ".tmp" + open(tmpfile, 'w') do |f| f.puts %Q[/*] f.puts %Q[ * This file contains a list of all] f.puts %Q[ * test functions.] @@ -144,16 +149,31 @@ MRuby::Gem::Specification.new('mruby-test') do |spec| f.puts %Q[ * All manual changes will get lost.] f.puts %Q[ */] f.puts %Q[] - f.puts %Q[struct mrb_state;] - f.puts %Q[typedef struct mrb_state mrb_state;] + f.puts %Q[#include ] + f.puts %Q[#include ] + f.puts %Q[#include ] + f.puts %Q[] build.gems.each do |g| f.puts %Q[void GENERATED_TMP_mrb_#{g.funcname}_gem_test(mrb_state *mrb);] end f.puts %Q[void mrbgemtest_init(mrb_state* mrb) {] + f.puts %Q[ int ai = mrb_gc_arena_save(mrb);] build.gems.each do |g| - f.puts %Q[ GENERATED_TMP_mrb_#{g.funcname}_gem_test(mrb);] + if g.skip_test? + f.puts %Q[ do {] + f.puts %Q[ mrb_value asserts = mrb_gv_get(mrb, mrb_intern_lit(mrb, "$asserts"));] + f.puts %Q[ mrb_ary_push(mrb, asserts, mrb_str_new_lit(mrb, ] + f.puts %Q[ "Warn: Skipping tests for gem (#{ + g.name == 'mruby-test' ? 'core' : "mrbgems: #{g.name}" + })"));] + f.puts %Q[ } while (0);] + else + f.puts %Q[ GENERATED_TMP_mrb_#{g.funcname}_gem_test(mrb);] + end + f.puts %Q[ mrb_gc_arena_restore(mrb, ai);] end f.puts %Q[}] end + File.rename(tmpfile, clib) end end diff --git a/mrbgems/mruby-test/vformat.c b/mrbgems/mruby-test/vformat.c index 571d96b191..5e2c51b5a1 100644 --- a/mrbgems/mruby-test/vformat.c +++ b/mrbgems/mruby-test/vformat.c @@ -9,11 +9,8 @@ static mrb_value vf_s_format_0(mrb_state *mrb, mrb_value klass) { mrb_value fmt_str; - const char *fmt; - mrb_get_args(mrb, "S", &fmt_str); - fmt = RSTRING_CSTR(mrb, fmt_str); - + const char *fmt = RSTRING_CSTR(mrb, fmt_str); return mrb_format(mrb, fmt); } @@ -22,12 +19,10 @@ static mrb_value vf_s_format_c(mrb_state *mrb, mrb_value klass) { mrb_value fmt_str, arg_str; - const char *fmt; - char c; mrb_get_args(mrb, "SS", &fmt_str, &arg_str); - fmt = RSTRING_CSTR(mrb, fmt_str); - c = RSTRING_CSTR(mrb, arg_str)[0]; + const char *fmt = RSTRING_CSTR(mrb, fmt_str); + char c = RSTRING_CSTR(mrb, arg_str)[0]; return mrb_format(mrb, fmt, c); } @@ -37,14 +32,11 @@ static mrb_value vf_s_format_d(mrb_state *mrb, mrb_value klass) { mrb_value fmt_str; - const char *fmt; mrb_int i; - int d; mrb_get_args(mrb, "Si", &fmt_str, &i); - fmt = RSTRING_CSTR(mrb, fmt_str); - d = (int)i; - + const char *fmt = RSTRING_CSTR(mrb, fmt_str); + int d = (int)i; return mrb_format(mrb, fmt, d); } @@ -54,12 +46,10 @@ static mrb_value vf_s_format_f(mrb_state *mrb, mrb_value klass) { mrb_value fmt_str; - const char *fmt; mrb_float f; mrb_get_args(mrb, "Sf", &fmt_str, &f); - fmt = RSTRING_CSTR(mrb, fmt_str); - + const char *fmt = RSTRING_CSTR(mrb, fmt_str); return mrb_format(mrb, fmt, f); } #endif @@ -69,12 +59,10 @@ static mrb_value vf_s_format_i(mrb_state *mrb, mrb_value klass) { mrb_value fmt_str; - const char *fmt; mrb_int i; mrb_get_args(mrb, "Si", &fmt_str, &i); - fmt = RSTRING_CSTR(mrb, fmt_str); - + const char *fmt = RSTRING_CSTR(mrb, fmt_str); return mrb_format(mrb, fmt, i); } @@ -83,15 +71,12 @@ static mrb_value vf_s_format_l(mrb_state *mrb, mrb_value klass) { mrb_value fmt_str, arg_str; - const char *fmt; - const char *s; mrb_int i; - size_t len; mrb_get_args(mrb, "SSi", &fmt_str, &arg_str, &i); - fmt = RSTRING_CSTR(mrb, fmt_str); - s = RSTRING_PTR(arg_str); - len = (size_t)i; + const char *fmt = RSTRING_CSTR(mrb, fmt_str); + const char *s = RSTRING_PTR(arg_str); + size_t len = (size_t)i; if (len > (size_t)RSTRING_LEN(arg_str)) len = (size_t)RSTRING_LEN(arg_str); return mrb_format(mrb, fmt, s, len); @@ -102,12 +87,10 @@ static mrb_value vf_s_format_n(mrb_state *mrb, mrb_value klass) { mrb_value fmt_str; - const char *fmt; mrb_sym sym; mrb_get_args(mrb, "Sn", &fmt_str, &sym); - fmt = RSTRING_CSTR(mrb, fmt_str); - + const char *fmt = RSTRING_CSTR(mrb, fmt_str); return mrb_format(mrb, fmt, sym); } @@ -116,13 +99,9 @@ static mrb_value vf_s_format_s(mrb_state *mrb, mrb_value klass) { mrb_value fmt_str, arg_str; - const char *fmt; - const char *s; - mrb_get_args(mrb, "SS", &fmt_str, &arg_str); - fmt = RSTRING_CSTR(mrb, fmt_str); - s = RSTRING_CSTR(mrb, arg_str); - + const char *fmt = RSTRING_CSTR(mrb, fmt_str); + const char *s = RSTRING_CSTR(mrb, arg_str); return mrb_format(mrb, fmt, s); } @@ -131,13 +110,10 @@ static mrb_value vf_s_format_C(mrb_state *mrb, mrb_value klass) { mrb_value fmt_str, arg_cls; - const char *fmt; - struct RClass *c; mrb_get_args(mrb, "SC", &fmt_str, &arg_cls); - fmt = RSTRING_CSTR(mrb, fmt_str); - c = mrb_class_ptr(arg_cls); - + const char *fmt = RSTRING_CSTR(mrb, fmt_str); + struct RClass *c = mrb_class_ptr(arg_cls); return mrb_format(mrb, fmt, c); } @@ -146,20 +122,16 @@ static mrb_value vf_s_format_v(mrb_state *mrb, mrb_value klass) { mrb_value fmt_str, arg_v; - const char *fmt; mrb_get_args(mrb, "So", &fmt_str, &arg_v); - fmt = RSTRING_CSTR(mrb, fmt_str); - + const char *fmt = RSTRING_CSTR(mrb, fmt_str); return mrb_format(mrb, fmt, arg_v); } void mrb_init_test_vformat(mrb_state *mrb) { - struct RClass *vf; - - vf = mrb_define_module(mrb, "TestVFormat"); + struct RClass *vf = mrb_define_module(mrb, "TestVFormat"); mrb_define_class_method(mrb, vf, "z", vf_s_format_0, MRB_ARGS_REQ(1)); #define VF_DEFINE_FORMAT_METHOD(t) VF_DEFINE_FORMAT_METHOD_n(t,2) diff --git a/mrbgems/mruby-time/README.md b/mrbgems/mruby-time/README.md new file mode 100644 index 0000000000..23c06d6234 --- /dev/null +++ b/mrbgems/mruby-time/README.md @@ -0,0 +1,102 @@ +# mruby-time + +mruby-time is an mrbgem that provides a `Time` class for mruby, offering functionalities for time manipulation and representation. It is designed to be largely compatible with the Time class found in standard Ruby, following the ISO Ruby Time class specification. + +## Purpose + +The primary purpose of `mruby-time` is to enable mruby applications to: + +- Work with specific points in time. +- Handle timezones, primarily UTC and the local system timezone. +- Perform time arithmetic. +- Format time information into human-readable strings. + +## Functionality + +### Creating Time Objects + +You can create `Time` objects in several ways: + +- **`Time.now`**: Returns a `Time` object representing the current time based on the system's clock. + + ```ruby + t = Time.now + ``` + +- **`Time.at(seconds_with_fraction)`**: Creates a `Time` object for the time `seconds_with_fraction` since the Epoch (January 1, 1970, 00:00:00 UTC). You can provide seconds as an integer or a float for sub-second precision. + + ```ruby + t1 = Time.at(1678886400) # Integer seconds + t2 = Time.at(1678886400.5) # Seconds with microseconds + ``` + +- **`Time.local(year, month, day, hour, min, sec, usec)`** (or **`Time.mktime`**): Creates a `Time` object from the given components in the local timezone. Arguments beyond `year` are optional and default to minimum values (e.g., month 1, day 1, hour 0, etc.). + + ```ruby + t = Time.local(2023, 3, 15, 12, 30, 0) # March 15, 2023, 12:30:00 local time + ``` + +- **`Time.gm(year, month, day, hour, min, sec, usec)`** (or **`Time.utc`**): Creates a `Time` object from the given components in UTC. Arguments are optional similar to `Time.local`. + + ```ruby + t = Time.gm(2023, 3, 15, 12, 30, 0) # March 15, 2023, 12:30:00 UTC + ``` + +### Getting Time Components + +Once you have a `Time` object, you can extract its components: + +- `t.year`: Returns the year. +- `t.month` (or `t.mon`): Returns the month of the year (1-12). +- `t.day` (or `t.mday`): Returns the day of the month (1-31). +- `t.hour`: Returns the hour of the day (0-23). +- `t.min`: Returns the minute of the hour (0-59). +- `t.sec`: Returns the second of the minute (0-59). +- `t.usec`: Returns the microsecond of the second (0-999999). +- `t.wday`: Returns the day of the week (0 for Sunday, 1 for Monday, ..., 6 for Saturday). +- `t.yday`: Returns the day of the year (1-366). +- `t.zone`: Returns the timezone name ("UTC" or local timezone offset like "+0900"). + +### Timezone Conversions + +- `t.utc?` (or `t.gmt?`): Returns `true` if the `Time` object is in UTC. +- `t.utc` (or `t.gmtime`): Converts the `Time` object to UTC and returns `self`. +- `t.getutc` (or `t.getgm`): Returns a new `Time` object representing the same point in time as `t`, but in UTC. +- `t.localtime`: Converts the `Time` object to the local timezone and returns `self`. +- `t.getlocal`: Returns a new `Time` object representing the same point in time as `t`, but in the local timezone. + +### Time Arithmetic + +- **Addition (`+`)**: Adds a duration (in seconds) to a `Time` object, returning a new `Time` object. + + ```ruby + t1 = Time.now + t2 = t1 + 60 # 60 seconds later + ``` + +- **Subtraction (`-`)**: + - Subtracting a duration (in seconds) from a `Time` object returns a new `Time` object. + - Subtracting another `Time` object returns the difference in seconds as a Float. + + ```ruby + t1 = Time.now + t_earlier = t1 - 3600 # One hour earlier + + t2 = Time.local(2023, 1, 1) + t3 = Time.local(2023, 1, 2) + difference_seconds = t3 - t2 # Returns 86400.0 + ``` + +### Formatting Time + +- **`t.to_s`**: Returns a string representation of the time (e.g., "2023-03-15 10:30:00 +0000"). The format may vary slightly. +- **`t.inspect`**: Similar to `to_s`, provides a string representation. +- **`t.asctime`** (or **`t.ctime`**): Returns a canonical string representation (e.g., "Wed Mar 15 10:30:00 2023"). +- **`t.to_i`**: Returns the number of seconds since the Epoch as an integer. +- **`t.to_f`**: Returns the number of seconds since the Epoch as a float (including microseconds). + +## Compatibility + +`mruby-time` aims to be compatible with the standard Ruby `Time` class as defined by ISO/IEC 30170:2012 (Ruby Language Specification). However, due to the nature of embedded systems and the mruby environment, there might be minor differences or limitations, especially concerning timezone data complexity beyond UTC and local system time. + +Refer to the source code and tests for detailed behavior. diff --git a/mrbgems/mruby-time/include/mruby/time.h b/mrbgems/mruby-time/include/mruby/time.h index 1adcfd49c5..349fc4d04a 100644 --- a/mrbgems/mruby-time/include/mruby/time.h +++ b/mrbgems/mruby-time/include/mruby/time.h @@ -7,7 +7,7 @@ #ifndef MRUBY_TIME_H #define MRUBY_TIME_H -#include "mruby/common.h" +#include #include MRB_BEGIN_DECL @@ -20,6 +20,7 @@ typedef enum mrb_timezone { } mrb_timezone; MRB_API mrb_value mrb_time_at(mrb_state *mrb, time_t sec, time_t usec, mrb_timezone timezone); +MRB_API struct tm* mrb_time_get_tm(mrb_state *mrb, mrb_value time); MRB_END_DECL diff --git a/mrbgems/mruby-time/mrblib/time.rb b/mrbgems/mruby-time/mrblib/time.rb deleted file mode 100644 index df0d8ca82c..0000000000 --- a/mrbgems/mruby-time/mrblib/time.rb +++ /dev/null @@ -1,9 +0,0 @@ -class Time - def sunday?; wday == 0 end - def monday?; wday == 1 end - def tuesday?; wday == 2 end - def wednesday?; wday == 3 end - def thursday?; wday == 4 end - def friday?; wday == 5 end - def saturday?; wday == 6 end -end diff --git a/mrbgems/mruby-time/src/time.c b/mrbgems/mruby-time/src/time.c index b864e568e7..291e3d11ee 100644 --- a/mrbgems/mruby-time/src/time.c +++ b/mrbgems/mruby-time/src/time.c @@ -11,7 +11,6 @@ #include #include #include -#include #ifdef MRB_NO_STDIO #include @@ -26,6 +25,28 @@ #define NDIV(x,y) (-(-((x)+1)/(y))-1) #define TO_S_FMT "%Y-%m-%d %H:%M:%S " +/* Time unit constants */ +#define USECS_PER_SEC 1000000L +#define USECS_PER_SEC_F 1.0e6 +#define NSECS_PER_USEC 1000L +#define SECS_PER_MIN 60 +#define MINS_PER_HOUR 60 +#define HOURS_PER_DAY 24 +#define DAYS_PER_YEAR 365 +#define DAYS_PER_LEAP_YEAR 366 +#define MONTHS_PER_YEAR 12 + +/* Calendar calculation constants */ +#define TM_YEAR_BASE 1900 +#define EPOCH_YEAR_OFFSET 70 +#define LEAP_YEAR_DIVISOR 4 +#define LEAP_YEAR_NON_DIVISOR_CENTURY 100 +#define LEAP_YEAR_DIVISOR_QUAD_CENTURY 400 + +/* Windows specific time constants */ +#define WINDOWS_EPOCH_BIAS_USEC UI64(116444736000000000) /* Unix epoch bias in 100ns intervals for Windows FILETIME */ +#define HUNDRED_NS_PER_USEC 10 /* Number of 100-nanosecond intervals in a microsecond */ + #if defined(_MSC_VER) && _MSC_VER < 1800 double round(double x) { return floor(x + 0.5); @@ -44,6 +65,11 @@ double round(double x) { /** Time class configuration */ +/* Platform detection for Windows variants */ +#if defined(_MSC_VER) && _MSC_VER < 1900 || defined(__MINGW64__) || defined(__MINGW32__) +#define MRB_TIME_WINDOWS_NO_STRFTIME_Z +#endif + /* gettimeofday(2) */ /* C99 does not have gettimeofday that is required to retrieve microseconds */ /* uncomment following macro on platforms without gettimeofday(2) */ @@ -83,31 +109,38 @@ double round(double x) { /** end of Time class configuration */ -#if (defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0) && defined(CLOCK_REALTIME) +/* protection against incorrectly defined _POSIX_TIMERS */ +#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS + 0) > 0 && defined(CLOCK_REALTIME) # define USE_CLOCK_GETTIME #endif -#if !defined(NO_GETTIMEOFDAY) -# if defined(_WIN32) && !defined(USE_CLOCK_GETTIME) -# define WIN32_LEAN_AND_MEAN /* don't include winsock.h */ -# include -# define gettimeofday my_gettimeofday +#if !defined(NO_GETTIMEOFDAY) && defined(_WIN32) && !defined(USE_CLOCK_GETTIME) +/* Windows gettimeofday polyfill */ +#define WIN32_LEAN_AND_MEAN /* don't include winsock.h */ +#include +#define gettimeofday my_gettimeofday -# ifdef _MSC_VER -# define UI64(x) x##ui64 -# else -# define UI64(x) x##ull -# endif +#ifdef _MSC_VER +# define UI64(x) x##ui64 +#else +# define UI64(x) x##ull +#endif typedef long suseconds_t; -# if (!defined __MINGW64__) && (!defined __MINGW32__) +#if (!defined __MINGW64__) && (!defined __MINGW32__) struct timeval { time_t tv_sec; suseconds_t tv_usec; }; -# endif +#endif +/* + * Polyfill for gettimeofday on Windows platforms that may not have it (e.g., older MSVC). + * Retrieves the current system time as FILETIME, converts it to Unix epoch, + * and then splits it into seconds and microseconds. + * The timezone argument (tz) is not supported. + */ static int gettimeofday(struct timeval *tv, void *tz) { @@ -120,55 +153,68 @@ gettimeofday(struct timeval *tv, void *tz) unsigned __int64 u64; } t; GetSystemTimeAsFileTime(&t.ft); /* 100 ns intervals since Windows epoch */ - t.u64 -= UI64(116444736000000000); /* Unix epoch bias */ - t.u64 /= 10; /* to microseconds */ - tv->tv_sec = (time_t)(t.u64 / (1000 * 1000)); - tv->tv_usec = t.u64 % (1000 * 1000); + t.u64 -= WINDOWS_EPOCH_BIAS_USEC; /* Unix epoch bias */ + t.u64 /= HUNDRED_NS_PER_USEC; /* to microseconds */ + tv->tv_sec = (time_t)(t.u64 / USECS_PER_SEC); + tv->tv_usec = t.u64 % USECS_PER_SEC; } return 0; } -# else -# include -# endif + +#elif !defined(NO_GETTIMEOFDAY) +/* Non-Windows platforms use standard sys/time.h */ +#include #endif #ifdef NO_GMTIME_R #define gmtime_r(t,r) gmtime(t) #define localtime_r(t,r) localtime(t) #endif +/* + * USE_SYSTEM_TIMEGM: If defined, the system's `timegm` is used. + * Otherwise, a custom implementation `my_timgm` is used. + * `timegm` converts a `struct tm` (broken-down time) in UTC to a `time_t` (seconds since epoch). + * This is the reverse of `gmtime_r`. + */ #ifndef USE_SYSTEM_TIMEGM #define timegm my_timgm +/* Helper function to check for leap years. */ static unsigned int is_leapyear(unsigned int y) { - return (y % 4) == 0 && ((y % 100) != 0 || (y % 400) == 0); + return (y % LEAP_YEAR_DIVISOR) == 0 && ((y % LEAP_YEAR_NON_DIVISOR_CENTURY) != 0 || (y % LEAP_YEAR_DIVISOR_QUAD_CENTURY) == 0); } static time_t timegm(struct tm *tm) { - static const unsigned int ndays[2][12] = { - {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, - {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31} + static const unsigned int ndays[2][MONTHS_PER_YEAR] = { + {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, /* Non-leap year */ + {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31} /* Leap year */ }; - time_t r = 0; + time_t r = 0; /* Accumulator for seconds since epoch */ int i; - unsigned int *nday = (unsigned int*) ndays[is_leapyear(tm->tm_year+1900)]; - - static const int epoch_year = 70; - if(tm->tm_year >= epoch_year) { - for (i = epoch_year; i < tm->tm_year; ++i) - r += is_leapyear(i+1900) ? 366*24*60*60 : 365*24*60*60; - } else { - for (i = tm->tm_year; i < epoch_year; ++i) - r -= is_leapyear(i+1900) ? 366*24*60*60 : 365*24*60*60; + /* Get a pointer to the array of days in each month for the given year (leap or non-leap) */ + unsigned int *nday = (unsigned int*) ndays[is_leapyear(tm->tm_year+TM_YEAR_BASE)]; + + /* Calculate seconds from years since epoch */ + if (tm->tm_year >= EPOCH_YEAR_OFFSET) { /* Years from 1970 up to tm_year */ + for (i = EPOCH_YEAR_OFFSET; i < tm->tm_year; ++i) + r += is_leapyear(i+TM_YEAR_BASE) ? (DAYS_PER_LEAP_YEAR*HOURS_PER_DAY*SECS_PER_MIN*MINS_PER_HOUR) : (DAYS_PER_YEAR*HOURS_PER_DAY*SECS_PER_MIN*MINS_PER_HOUR); + } + else { /* Years before 1970 down to tm_year */ + for (i = tm->tm_year; i < EPOCH_YEAR_OFFSET; ++i) + r -= is_leapyear(i+TM_YEAR_BASE) ? (DAYS_PER_LEAP_YEAR*HOURS_PER_DAY*SECS_PER_MIN*MINS_PER_HOUR) : (DAYS_PER_YEAR*HOURS_PER_DAY*SECS_PER_MIN*MINS_PER_HOUR); } + /* Add seconds from months in the current year */ for (i = 0; i < tm->tm_mon; ++i) - r += nday[i] * 24 * 60 * 60; - r += (tm->tm_mday - 1) * 24 * 60 * 60; - r += tm->tm_hour * 60 * 60; - r += tm->tm_min * 60; + r += nday[i] * HOURS_PER_DAY * SECS_PER_MIN * MINS_PER_HOUR; + /* Add seconds from days in the current month */ + r += (tm->tm_mday - 1) * HOURS_PER_DAY * SECS_PER_MIN * MINS_PER_HOUR; + /* Add seconds from hours, minutes, and seconds in the current day */ + r += tm->tm_hour * SECS_PER_MIN * MINS_PER_HOUR; + r += tm->tm_min * SECS_PER_MIN; r += tm->tm_sec; return r; } @@ -180,32 +226,23 @@ timegm(struct tm *tm) */ #ifndef MRB_NO_STDIO -static const char mon_names[12][4] = { +static const char mon_names[MONTHS_PER_YEAR][4] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", }; -static const char wday_names[7][4] = { +static const char wday_names[7][4] = { /* Consider defining DAYS_PER_WEEK = 7 if used elsewhere */ "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", }; #endif struct mrb_time { - time_t sec; - time_t usec; - enum mrb_timezone timezone; - struct tm datetime; + time_t sec; /* Seconds since the Epoch */ + time_t nsec; /* Nanosecond fraction of the second (0-999999999) */ + enum mrb_timezone timezone; /* Timezone setting (MRB_TIMEZONE_UTC or MRB_TIMEZONE_LOCAL) */ + struct tm datetime; /* Cache for broken-down time based on sec, nsec, and timezone. Updated by time_update_datetime. */ }; -static const struct mrb_data_type mrb_time_type = { "Time", mrb_free }; - -#ifndef MRB_NO_FLOAT -void mrb_check_num_exact(mrb_state *mrb, mrb_float num); -typedef mrb_float mrb_sec; -#define mrb_sec_value(mrb, sec) mrb_float_value(mrb, sec) -#else -typedef mrb_int mrb_sec; -#define mrb_sec_value(mrb, sec) mrb_int_value(mrb, sec) -#endif +static const struct mrb_data_type time_type = { "Time", mrb_free }; /* mrb_free is the standard C free() */ #define MRB_TIME_T_UINT (~(time_t)0 > 0) #define MRB_TIME_MIN ( \ @@ -217,8 +254,13 @@ typedef mrb_int mrb_sec; (sizeof(time_t) <= 4 ? INT32_MAX : INT64_MAX) \ ) -#ifndef MRB_NO_FLOAT -/* return true if time_t is fit in mrb_int */ +/* + * Checks if a time_t value `v` can be represented as an mrb_int without overflow or precision loss. + * This is important because mruby integers (mrb_int) might be smaller than time_t on some platforms. + * - If mrb_int can fully encompass the range of time_t, it's always TRUE. + * - Otherwise, it checks if `v` falls within the representable range of mrb_int. + * - Considers if time_t is unsigned (MRB_TIME_T_UINT). + */ static mrb_bool fixable_time_t_p(time_t v) { @@ -228,58 +270,135 @@ fixable_time_t_p(time_t v) if (MRB_INT_MIN > (mrb_int)v) return FALSE; return TRUE; } -#endif +static void +time_out_of_range(mrb_state *mrb, mrb_value obj) +{ + mrb_raisef(mrb, E_RANGE_ERROR, "%v out of Time range", obj); +} + +static mrb_noreturn void +time_uninitialized(mrb_state *mrb) +{ + mrb_raise(mrb, E_ARGUMENT_ERROR, "uninitialized Time"); +} + + +#ifndef MRB_NO_FLOAT static time_t -mrb_to_time_t(mrb_state *mrb, mrb_value obj, time_t *usec) +mrb_time_t_from_float(mrb_state *mrb, mrb_value obj, time_t *usec) { time_t t; + mrb_float f = mrb_float(obj); - switch (mrb_type(obj)) { -#ifndef MRB_NO_FLOAT - case MRB_TT_FLOAT: - { - mrb_float f = mrb_float(obj); - - mrb_check_num_exact(mrb, f); - if (f >= ((mrb_float)MRB_TIME_MAX-1.0) || f < ((mrb_float)MRB_TIME_MIN+1.0)) { - goto out_of_range; - } - - if (usec) { - t = (time_t)f; - *usec = (time_t)llround((f - t) * 1.0e+6); - } - else { - t = (time_t)llround(f); - } - } - break; + mrb_check_num_exact(mrb, f); + if (f >= ((mrb_float)MRB_TIME_MAX-1.0) || f < ((mrb_float)MRB_TIME_MIN+1.0)) { + time_out_of_range(mrb, obj); + } + + if (usec) { + double tt = floor(f); + if (!isfinite(tt)) time_out_of_range(mrb, obj); + t = (time_t)tt; + *usec = (time_t)trunc((f - tt) * USECS_PER_SEC_F); + } + else { + double tt = round(f); + if (!isfinite(tt)) time_out_of_range(mrb, obj); + t = (time_t)tt; + } + return t; +} #endif /* MRB_NO_FLOAT */ - default: - case MRB_TT_INTEGER: - { - mrb_int i = mrb_integer(obj); - if ((MRB_INT_MAX > MRB_TIME_MAX && i > 0 && (time_t)i > MRB_TIME_MAX) || - (0 > MRB_TIME_MIN && MRB_TIME_MIN > MRB_INT_MIN && MRB_TIME_MIN > i)) { - goto out_of_range; - } +static time_t +mrb_time_t_from_integer(mrb_state *mrb, mrb_value obj, time_t *usec) +{ + time_t t; + mrb_int i = mrb_integer(obj); - t = (time_t)i; - if (usec) { *usec = 0; } - } - break; + if ((MRB_INT_MAX > MRB_TIME_MAX && i > 0 && (time_t)i > MRB_TIME_MAX) || + (0 > MRB_TIME_MIN && MRB_TIME_MIN > MRB_INT_MIN && MRB_TIME_MIN > i)) { + time_out_of_range(mrb, obj); } + t = (time_t)i; + if (usec) { *usec = 0; } return t; +} -out_of_range: - mrb_raisef(mrb, E_ARGUMENT_ERROR, "%v out of Time range", obj); +#ifdef MRB_USE_BIGINT +static time_t +mrb_time_t_from_bigint(mrb_state *mrb, mrb_value obj, time_t *usec) +{ + time_t t; + if (sizeof(time_t) > sizeof(mrb_int)) { + if (MRB_TIME_T_UINT) { + t = (time_t)mrb_bint_as_uint64(mrb, obj); + } + else { + t = (time_t)mrb_bint_as_int64(mrb, obj); + } + if (usec) { *usec = 0; } + } + else { + mrb_int i = mrb_bint_as_int(mrb, obj); + obj = mrb_int_value(mrb, i); + /* Call the integer handler for the converted value */ + t = mrb_time_t_from_integer(mrb, obj, usec); + } + return t; +} +#endif /* MRB_USE_BIGINT */ - /* not reached */ - if (usec) { *usec = 0; } - return 0; +static time_t +mrb_to_time_t(mrb_state *mrb, mrb_value obj, time_t *usec) +{ + switch (mrb_type(obj)) { +#ifndef MRB_NO_FLOAT + case MRB_TT_FLOAT: + return mrb_time_t_from_float(mrb, obj, usec); +#endif /* MRB_NO_FLOAT */ + +#ifdef MRB_USE_BIGINT + case MRB_TT_BIGINT: + return mrb_time_t_from_bigint(mrb, obj, usec); +#endif /* MRB_USE_BIGINT */ + + case MRB_TT_INTEGER: + return mrb_time_t_from_integer(mrb, obj, usec); + + default: + mrb_raisef(mrb, E_TYPE_ERROR, "cannot convert %Y to time", obj); + return 0; /* Should not reach here */ + } +} + +/* + * Converts a time_t value `t` into an appropriate mruby numeric value. + * - If `t` fits in mrb_int (checked by fixable_time_t_p), returns an mrb_int_value. + * - Otherwise, if MRB_USE_BIGINT is defined, returns a BigInt. + * - Otherwise, if MRB_NO_FLOAT is not defined, returns a Float. + * - Otherwise, raises an ArgumentError if the time value is too large to represent. + */ +static mrb_value +time_value_from_time_t(mrb_state *mrb, time_t t) +{ + if (!fixable_time_t_p(t)) { +#if defined(MRB_USE_BIGINT) + if (MRB_TIME_T_UINT) { + return mrb_bint_new_uint64(mrb, (uint64_t)t); + } + else { + return mrb_bint_new_int64(mrb, (int64_t)t); + } +#elif !defined(MRB_NO_FLOAT) + return mrb_float_value(mrb, (mrb_float)t); +#else + mrb_raise(mrb, E_RANGE_ERROR, "Time out of range"); +#endif + } + return mrb_int_value(mrb, (mrb_int)t); } /** Updates the datetime of a mrb_time based on it's timezone and @@ -288,8 +407,8 @@ mrb_to_time_t(mrb_state *mrb, mrb_value obj, time_t *usec) static struct mrb_time* time_update_datetime(mrb_state *mrb, struct mrb_time *self, int dealloc) { - struct tm *aid; time_t t = self->sec; + struct tm *aid; if (self->timezone == MRB_TIMEZONE_UTC) { aid = gmtime_r(&t, &self->datetime); @@ -298,109 +417,155 @@ time_update_datetime(mrb_state *mrb, struct mrb_time *self, int dealloc) aid = localtime_r(&t, &self->datetime); } if (!aid) { - mrb_sec sec = (mrb_sec)t; - if (dealloc) mrb_free(mrb, self); - mrb_raisef(mrb, E_ARGUMENT_ERROR, "%v out of Time range", mrb_sec_value(mrb, sec)); + time_out_of_range(mrb, time_value_from_time_t(mrb, t)); /* not reached */ return NULL; } #ifdef NO_GMTIME_R - self->datetime = *aid; /* copy data */ + /* + * If reentrant gmtime_r/localtime_r are not available (NO_GMTIME_R is defined), + * standard gmtime/localtime are used. These functions often return a pointer + * to a static internal buffer. To avoid this buffer being overwritten by subsequent + * calls, the data pointed to by `aid` must be copied into `self->datetime`. + */ + self->datetime = *aid; /* copy data from static buffer */ #endif return self; } static mrb_value -mrb_time_wrap(mrb_state *mrb, struct RClass *tc, struct mrb_time *tm) +time_wrap(mrb_state *mrb, struct RClass *tc, struct mrb_time *tm) { - return mrb_obj_value(Data_Wrap_Struct(mrb, tc, &mrb_time_type, tm)); + return mrb_obj_value(Data_Wrap_Struct(mrb, tc, &time_type, tm)); } /* Allocates a mrb_time object and initializes it. */ static struct mrb_time* -time_alloc_time(mrb_state *mrb, time_t sec, time_t usec, enum mrb_timezone timezone) +time_alloc_time(mrb_state *mrb, time_t sec, time_t nsec, enum mrb_timezone timezone) { - struct mrb_time *tm; - - tm = (struct mrb_time *)mrb_malloc(mrb, sizeof(struct mrb_time)); - tm->sec = sec; - tm->usec = usec; - if (MRB_TIME_T_UINT && tm->usec < 0) { - long sec2 = (long)NDIV(tm->usec,1000000); /* negative div */ - tm->usec -= sec2 * 1000000; - tm->sec += sec2; + struct mrb_time *time_obj = (struct mrb_time*)mrb_malloc(mrb, sizeof(struct mrb_time)); + time_obj->sec = sec; + time_obj->nsec = nsec; + + /* Normalize seconds and nanoseconds. */ + /* This is only necessary if time_t is signed and nsec is negative. */ + if (!MRB_TIME_T_UINT && time_obj->nsec < 0) { + /* + * If nsec is negative, adjust seconds downwards. + * NDIV calculates division rounded towards negative infinity. + * For example, NDIV(-1, 1000000000) is -1, so 1 second is subtracted. + */ + long sec_adjustment = (long)NDIV(time_obj->nsec, 1000000000L); + time_obj->nsec -= sec_adjustment * 1000000000L; /* Becomes positive or zero */ + time_obj->sec += sec_adjustment; } - else if (tm->usec >= 1000000) { - long sec2 = (long)(tm->usec / 1000000); - tm->usec -= sec2 * 1000000; - tm->sec += sec2; + /* Handle positive nanosecond overflow. */ + else if (time_obj->nsec >= 1000000000L) { + /* If nsec is 1000000000 or more, adjust seconds upwards. */ + long sec_adjustment = (long)(time_obj->nsec / 1000000000L); + time_obj->nsec -= sec_adjustment * 1000000000L; /* Reduce to < 1000000000 */ + time_obj->sec += sec_adjustment; } - tm->timezone = timezone; - time_update_datetime(mrb, tm, TRUE); + time_obj->timezone = timezone; + /* Update the datetime struct; this also handles potential deallocation on error. */ + time_update_datetime(mrb, time_obj, TRUE); - return tm; + return time_obj; } +/* + * Allocates and initializes an mrb_time structure from mruby values for seconds and microseconds. + * It first converts the mruby values to time_t using mrb_to_time_t, + * then calls time_alloc_time to perform the actual allocation and normalization. + */ static struct mrb_time* time_alloc(mrb_state *mrb, mrb_value sec, mrb_value usec, enum mrb_timezone timezone) { - time_t tsec, tusec; + time_t tsec, tusec; /* Variables to hold converted seconds and microseconds */ + time_t nsec; tsec = mrb_to_time_t(mrb, sec, &tusec); tusec += mrb_to_time_t(mrb, usec, NULL); - return time_alloc_time(mrb, tsec, tusec, timezone); + /* Normalize microseconds to avoid overflow when converting to nanoseconds */ + if (tusec >= USECS_PER_SEC || tusec <= -USECS_PER_SEC) { + time_t sec_adjustment = tusec / USECS_PER_SEC; + tusec -= sec_adjustment * USECS_PER_SEC; + tsec += sec_adjustment; + } + + nsec = tusec * NSECS_PER_USEC; + return time_alloc_time(mrb, tsec, nsec, timezone); } +/* + * Creates a new Time object from C-native time_t seconds and microseconds. + * This is a lower-level constructor compared to time_make. + */ static mrb_value -mrb_time_make_time(mrb_state *mrb, struct RClass *c, time_t sec, time_t usec, enum mrb_timezone timezone) +time_make_time(mrb_state *mrb, struct RClass *c, time_t sec, time_t usec, enum mrb_timezone timezone) { - return mrb_time_wrap(mrb, c, time_alloc_time(mrb, sec, usec, timezone)); + return time_wrap(mrb, c, time_alloc_time(mrb, sec, usec, timezone)); } +/* + * Creates a new Time object from mruby values representing seconds and microseconds. + * This is a higher-level constructor that handles mruby type conversions. + */ static mrb_value -mrb_time_make(mrb_state *mrb, struct RClass *c, mrb_value sec, mrb_value usec, enum mrb_timezone timezone) +time_make(mrb_state *mrb, struct RClass *c, mrb_value sec, mrb_value usec, enum mrb_timezone timezone) { - return mrb_time_wrap(mrb, c, time_alloc(mrb, sec, usec, timezone)); + return time_wrap(mrb, c, time_alloc(mrb, sec, usec, timezone)); } +/* + * Retrieves the current system time and creates a new mrb_time object. + * It uses different strategies based on platform capabilities: + * 1. timespec_get (C11 standard, if TIME_UTC is defined) + * 2. clock_gettime (POSIX standard, if USE_CLOCK_GETTIME is defined) + * 3. gettimeofday (Commonly available POSIX function, or our polyfill on Windows) + * 4. time(NULL) (Standard C, second precision only; microseconds are faked if called rapidly) + * The new Time object is initialized to the local timezone. + */ static struct mrb_time* current_mrb_time(mrb_state *mrb) { - struct mrb_time tmzero = {0}; - struct mrb_time *tm; - time_t sec, usec; + struct mrb_time tmzero = {0}; /* Used to initialize the new mrb_time struct */ + time_t sec, nsec; #if defined(TIME_UTC) && !defined(__ANDROID__) { struct timespec ts; timespec_get(&ts, TIME_UTC); sec = ts.tv_sec; - usec = ts.tv_nsec / 1000; + nsec = ts.tv_nsec; /* Full nanosecond precision preserved */ } #elif defined(USE_CLOCK_GETTIME) { struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); sec = ts.tv_sec; - usec = ts.tv_nsec / 1000; + nsec = ts.tv_nsec; /* Full nanosecond precision preserved */ } #elif defined(NO_GETTIMEOFDAY) { static time_t last_sec = 0, last_usec = 0; sec = time(NULL); - if (sec != last_sec) { + if (sec != last_sec) { /* Time has advanced by at least one second */ last_sec = sec; last_usec = 0; } - else { - /* add 1 usec to differentiate two times */ + else { /* Called multiple times within the same second */ + /* Add 1 usec to differentiate two Time objects created in rapid succession. + * This is a simple way to ensure distinctness when second-level precision is the best available. + * Note: This might lead to microsecond values that don't reflect actual time but ensure uniqueness. + */ last_usec += 1; } - usec = last_usec; + nsec = last_usec * NSECS_PER_USEC; /* Convert fake microseconds to nanoseconds */ } #else { @@ -408,42 +573,73 @@ current_mrb_time(mrb_state *mrb) gettimeofday(&tv, NULL); sec = tv.tv_sec; - usec = tv.tv_usec; + nsec = tv.tv_usec * NSECS_PER_USEC; /* Convert microseconds to nanoseconds */ } #endif - tm = (struct mrb_time *)mrb_malloc(mrb, sizeof(*tm)); + + struct mrb_time *tm = (struct mrb_time*)mrb_malloc(mrb, sizeof(*tm)); *tm = tmzero; - tm->sec = sec; tm->usec = usec; + tm->sec = sec; tm->nsec = nsec; tm->timezone = MRB_TIMEZONE_LOCAL; time_update_datetime(mrb, tm, TRUE); return tm; } -/* Allocates a new Time object with given millis value. */ +/* + * call-seq: + * Time.now -> time + * + * Returns a new Time object representing the current system time. + * The time is created in the local timezone. + * + * Time.now #=> 2023-12-25 10:30:45 +0900 + */ static mrb_value -mrb_time_now(mrb_state *mrb, mrb_value self) +time_now(mrb_state *mrb, mrb_value self) { - return mrb_time_wrap(mrb, mrb_class_ptr(self), current_mrb_time(mrb)); + return time_wrap(mrb, mrb_class_ptr(self), current_mrb_time(mrb)); } MRB_API mrb_value mrb_time_at(mrb_state *mrb, time_t sec, time_t usec, enum mrb_timezone zone) { - return mrb_time_make_time(mrb, mrb_class_get_id(mrb, MRB_SYM(Time)), sec, usec, zone); + time_t nsec; + + /* Normalize microseconds to avoid overflow when converting to nanoseconds */ + if (usec >= USECS_PER_SEC || usec <= -USECS_PER_SEC) { + time_t sec_adjustment = usec / USECS_PER_SEC; + usec -= sec_adjustment * USECS_PER_SEC; + sec += sec_adjustment; + } + + nsec = usec * NSECS_PER_USEC; + return time_make_time(mrb, mrb_class_get_id(mrb, MRB_SYM(Time)), sec, nsec, zone); } -/* 15.2.19.6.1 */ -/* Creates an instance of time at the given time in seconds, etc. */ +/* + * call-seq: + * Time.at(seconds) -> time + * Time.at(seconds, microseconds) -> time + * + * Creates a new Time object representing the specified number of seconds + * since the Unix epoch (1970-01-01 00:00:00 UTC). The optional second + * argument specifies additional microseconds. + * + * Time.at(0) #=> 1970-01-01 09:00:00 +0900 + * Time.at(1000000000) #=> 2001-09-09 10:46:40 +0900 + * Time.at(1.5) #=> 1970-01-01 09:00:01 +0900 (with 500000 usec) + * Time.at(0, 500000) #=> 1970-01-01 09:00:00 +0900 (with 500000 usec) + */ static mrb_value -mrb_time_at_m(mrb_state *mrb, mrb_value self) +time_at_m(mrb_state *mrb, mrb_value self) { mrb_value sec; mrb_value usec = mrb_fixnum_value(0); mrb_get_args(mrb, "o|o", &sec, &usec); - return mrb_time_make(mrb, mrb_class_ptr(self), sec, usec, MRB_TIMEZONE_LOCAL); + return time_make(mrb, mrb_class_ptr(self), sec, usec, MRB_TIMEZONE_LOCAL); } static struct mrb_time* @@ -451,26 +647,37 @@ time_mktime(mrb_state *mrb, mrb_int ayear, mrb_int amonth, mrb_int aday, mrb_int ahour, mrb_int amin, mrb_int asec, mrb_int ausec, enum mrb_timezone timezone) { - time_t nowsecs; struct tm nowtime = { 0 }; #if MRB_INT_MAX > INT_MAX -#define OUTINT(x) (((MRB_TIME_T_UINT ? 0 : INT_MIN) > (x)) || (x) > INT_MAX) +#define OUTINT(x) (((MRB_TIME_T_UINT ? 0 : INT_MIN) > (x)) || (x) > INT_MAX - TM_YEAR_BASE) #else #define OUTINT(x) 0 #endif - if (ayear < 1900 || OUTINT(ayear-1900) || - amonth < 1 || amonth > 12 || - aday < 1 || aday > 31 || - ahour < 0 || ahour > 24 || - (ahour == 24 && (amin > 0 || asec > 0)) || - amin < 0 || amin > 59 || - asec < 0 || asec > 60) + /* Check for underflow before adjusting year */ + if (ayear < MRB_INT_MIN + TM_YEAR_BASE) mrb_raise(mrb, E_ARGUMENT_ERROR, "argument out of range"); - nowtime.tm_year = (int)(ayear - 1900); - nowtime.tm_mon = (int)(amonth - 1); + /* Adjust year to be relative to TM_YEAR_BASE (1900) for struct tm */ + ayear -= TM_YEAR_BASE; + + /* Validate arguments: year (after adjustment), month, day, hour, minute, second. + * This checks for valid ranges for each component. + * For hour, it allows 24 only if minutes and seconds are zero (midnight). + * For second, it allows up to 60 to accommodate leap seconds. + */ + if (OUTINT(ayear) || + amonth < 1 || amonth > MONTHS_PER_YEAR || + aday < 1 || aday > 31 || /* Max days in a month, could be more specific but 31 is a safe upper bound for validation */ + ahour < 0 || ahour > HOURS_PER_DAY || + (ahour == HOURS_PER_DAY && (amin > 0 || asec > 0)) || /* Allow 24:00:00 */ + amin < 0 || amin > (MINS_PER_HOUR -1) || + asec < 0 || asec > SECS_PER_MIN) /* tm_sec can be 60 for leap seconds */ + mrb_raise(mrb, E_ARGUMENT_ERROR, "argument out of range"); + + nowtime.tm_year = (int)ayear; + nowtime.tm_mon = (int)(amonth - 1); /* tm_mon is 0-11 */ nowtime.tm_mday = (int)aday; nowtime.tm_hour = (int)ahour; nowtime.tm_min = (int)amin; @@ -484,80 +691,143 @@ time_mktime(mrb_state *mrb, mrb_int ayear, mrb_int amonth, mrb_int aday, else { mk = mktime; } - nowsecs = (*mk)(&nowtime); + + time_t nowsecs = (*mk)(&nowtime); + /* + * Handle mktime/timegm failure (returns -1): + * This could mean either: + * 1. Invalid date/time arguments, OR + * 2. Valid time exactly one second before Unix epoch (1969-12-31 23:59:59) + * + * To distinguish: increment seconds and test again. + * If result is 0 (epoch), original was valid epoch-1. + * Otherwise, original arguments were invalid. + */ if (nowsecs == (time_t)-1) { - nowtime.tm_sec += 1; /* maybe Epoch-1 sec */ - nowsecs = (*mk)(&nowtime); - if (nowsecs != 0) { /* check if Epoch */ - mrb_raise(mrb, E_ARGUMENT_ERROR, "Not a valid time"); + struct tm test_tm = nowtime; + test_tm.tm_sec += 1; + if ((*mk)(&test_tm) != 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid time"); } - nowsecs = (time_t)-1; /* valid Epoch-1 */ + /* Original time was valid epoch-1, keep nowsecs = -1 */ } - return time_alloc_time(mrb, nowsecs, ausec, timezone); + return time_alloc_time(mrb, nowsecs, ausec * NSECS_PER_USEC, timezone); } -/* 15.2.19.6.2 */ -/* Creates an instance of time at the given time in UTC. */ +/* + * call-seq: + * Time.gm(year, month = 1, day = 1, hour = 0, min = 0, sec = 0, usec = 0) -> time + * Time.utc(year, month = 1, day = 1, hour = 0, min = 0, sec = 0, usec = 0) -> time + * + * Creates a new Time object representing the specified date and time in UTC. + * All arguments except year are optional and default to the minimum value. + * + * Time.gm(2023) #=> 2023-01-01 00:00:00 UTC + * Time.gm(2023, 12, 25) #=> 2023-12-25 00:00:00 UTC + * Time.gm(2023, 12, 25, 10, 30) #=> 2023-12-25 10:30:00 UTC + * Time.utc(2023, 12, 25, 10, 30, 45) #=> 2023-12-25 10:30:45 UTC + */ static mrb_value -mrb_time_gm(mrb_state *mrb, mrb_value self) +time_gm(mrb_state *mrb, mrb_value self) { mrb_int ayear = 0, amonth = 1, aday = 1, ahour = 0, amin = 0, asec = 0, ausec = 0; mrb_get_args(mrb, "i|iiiiii", &ayear, &amonth, &aday, &ahour, &amin, &asec, &ausec); - return mrb_time_wrap(mrb, mrb_class_ptr(self), + return time_wrap(mrb, mrb_class_ptr(self), time_mktime(mrb, ayear, amonth, aday, ahour, amin, asec, ausec, MRB_TIMEZONE_UTC)); } -/* 15.2.19.6.3 */ -/* Creates an instance of time at the given time in local time zone. */ +/* + * call-seq: + * Time.local(year, month = 1, day = 1, hour = 0, min = 0, sec = 0, usec = 0) -> time + * Time.mktime(year, month = 1, day = 1, hour = 0, min = 0, sec = 0, usec = 0) -> time + * + * Creates a new Time object representing the specified date and time in the + * local timezone. All arguments except year are optional and default to + * the minimum value. + * + * Time.local(2023) #=> 2023-01-01 00:00:00 +0900 + * Time.local(2023, 12, 25) #=> 2023-12-25 00:00:00 +0900 + * Time.local(2023, 12, 25, 10, 30) #=> 2023-12-25 10:30:00 +0900 + * Time.mktime(2023, 12, 25, 10, 30, 45) #=> 2023-12-25 10:30:45 +0900 + */ static mrb_value -mrb_time_local(mrb_state *mrb, mrb_value self) +time_local(mrb_state *mrb, mrb_value self) { mrb_int ayear = 0, amonth = 1, aday = 1, ahour = 0, amin = 0, asec = 0, ausec = 0; mrb_get_args(mrb, "i|iiiiii", &ayear, &amonth, &aday, &ahour, &amin, &asec, &ausec); - return mrb_time_wrap(mrb, mrb_class_ptr(self), + return time_wrap(mrb, mrb_class_ptr(self), time_mktime(mrb, ayear, amonth, aday, ahour, amin, asec, ausec, MRB_TIMEZONE_LOCAL)); } static struct mrb_time* time_get_ptr(mrb_state *mrb, mrb_value time) { - struct mrb_time *tm; - - tm = DATA_GET_PTR(mrb, time, &mrb_time_type, struct mrb_time); + struct mrb_time *tm = DATA_GET_PTR(mrb, time, &time_type, struct mrb_time); if (!tm) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "uninitialized time"); + time_uninitialized(mrb); } return tm; } +MRB_API struct tm* +mrb_time_get_tm(mrb_state *mrb, mrb_value time) +{ + struct mrb_time *tm = time_get_ptr(mrb, time); + time_update_datetime(mrb, tm, FALSE); + return &tm->datetime; +} + +/* + * call-seq: + * time == other_time -> true or false + * time.eql?(other_time) -> true or false + * + * Returns true if the two Time objects represent the same moment in time. + * Comparison is done at microsecond precision. + * + * t1 = Time.at(1000000000) + * t2 = Time.at(1000000000) + * t1 == t2 #=> true + * t1.eql?(t2) #=> true + */ static mrb_value -mrb_time_eq(mrb_state *mrb, mrb_value self) +time_eq(mrb_state *mrb, mrb_value self) { mrb_value other = mrb_get_arg1(mrb); - struct mrb_time *tm1, *tm2; - mrb_bool eq_p; - - tm1 = DATA_GET_PTR(mrb, self, &mrb_time_type, struct mrb_time); - tm2 = DATA_CHECK_GET_PTR(mrb, other, &mrb_time_type, struct mrb_time); - eq_p = tm1 && tm2 && tm1->sec == tm2->sec && tm1->usec == tm2->usec; + struct mrb_time *tm1 = DATA_GET_PTR(mrb, self, &time_type, struct mrb_time); + struct mrb_time *tm2 = DATA_CHECK_GET_PTR(mrb, other, &time_type, struct mrb_time); + mrb_bool eq_p = tm1 && tm2 && tm1->sec == tm2->sec && tm1->nsec == tm2->nsec; return mrb_bool_value(eq_p); } +/* + * call-seq: + * time <=> other_time -> -1, 0, 1, or nil + * + * Compares two Time objects. Returns -1 if time is earlier than other_time, + * 0 if they are equal, 1 if time is later than other_time, or nil if + * other_time is not a Time object. + * + * t1 = Time.at(1000000000) + * t2 = Time.at(1000000001) + * t1 <=> t2 #=> -1 + * t2 <=> t1 #=> 1 + * t1 <=> t1 #=> 0 + */ static mrb_value -mrb_time_cmp(mrb_state *mrb, mrb_value self) +time_cmp(mrb_state *mrb, mrb_value self) { mrb_value other = mrb_get_arg1(mrb); - struct mrb_time *tm1, *tm2; + struct mrb_time *tm1 = DATA_GET_PTR(mrb, self, &time_type, struct mrb_time); + struct mrb_time *tm2 = DATA_CHECK_GET_PTR(mrb, other, &time_type, struct mrb_time); - tm1 = DATA_GET_PTR(mrb, self, &mrb_time_type, struct mrb_time); - tm2 = DATA_CHECK_GET_PTR(mrb, other, &mrb_time_type, struct mrb_time); if (!tm1 || !tm2) return mrb_nil_value(); if (tm1->sec > tm2->sec) { return mrb_fixnum_value(1); @@ -566,10 +836,10 @@ mrb_time_cmp(mrb_state *mrb, mrb_value self) return mrb_fixnum_value(-1); } /* tm1->sec == tm2->sec */ - if (tm1->usec > tm2->usec) { + if (tm1->nsec > tm2->nsec) { return mrb_fixnum_value(1); } - else if (tm1->usec < tm2->usec) { + else if (tm1->nsec < tm2->nsec) { return mrb_fixnum_value(-1); } return mrb_fixnum_value(0); @@ -578,56 +848,84 @@ mrb_time_cmp(mrb_state *mrb, mrb_value self) static mrb_noreturn void int_overflow(mrb_state *mrb, const char *reason) { - mrb_raisef(mrb, E_RANGE_ERROR, "time_t overflow in Time %s", reason); + mrb_raisef(mrb, E_RANGE_ERROR, "Time out of range in %s", reason); } +/* + * call-seq: + * time + numeric -> time + * + * Returns a new Time object representing time + numeric seconds. + * The numeric can be an Integer, Float, or other numeric type. + * + * t = Time.at(1000000000) + * t + 1 #=> 2001-09-09 10:46:41 +0900 + * t + 0.5 #=> 2001-09-09 10:46:40 +0900 (with 500000 usec) + * t + 3600 #=> 2001-09-09 11:46:40 +0900 (one hour later) + */ static mrb_value -mrb_time_plus(mrb_state *mrb, mrb_value self) +time_plus(mrb_state *mrb, mrb_value self) { mrb_value o = mrb_get_arg1(mrb); - struct mrb_time *tm; time_t sec, usec; - tm = time_get_ptr(mrb, self); + struct mrb_time *tm = time_get_ptr(mrb, self); sec = mrb_to_time_t(mrb, o, &usec); #ifdef MRB_HAVE_TYPE_GENERIC_CHECKED_ARITHMETIC_BUILTINS - if (__builtin_add_overflow(tm->sec, sec, &sec)) { + /* + * Add seconds and handle potential overflow. + * If __builtin_add_overflow is available (GCC/Clang extension), use it for safe addition. + * Otherwise, perform manual overflow checks before addition. + */ + if (__builtin_add_overflow(tm->sec, sec, &sec)) { /* sec result is stored back in sec */ int_overflow(mrb, "addition"); } #else - if (sec >= 0) { - if (tm->sec > MRB_TIME_MAX - sec) { + if (sec >= 0) { /* Adding a positive number */ + if (tm->sec > MRB_TIME_MAX - sec) { /* Check for positive overflow */ int_overflow(mrb, "addition"); } } - else { - if (tm->sec < MRB_TIME_MIN - sec) { + else { /* Adding a negative number (effectively subtraction) */ + if (tm->sec < MRB_TIME_MIN - sec) { /* Check for negative overflow */ int_overflow(mrb, "addition"); } } - sec = tm->sec + sec; + sec = tm->sec + sec; /* Perform the addition */ #endif - return mrb_time_make_time(mrb, mrb_obj_class(mrb, self), sec, tm->usec+usec, tm->timezone); + return time_make_time(mrb, mrb_obj_class(mrb, self), sec, tm->nsec + usec * NSECS_PER_USEC, tm->timezone); } +/* + * call-seq: + * time - other_time -> float + * time - numeric -> time + * + * If other_time is a Time object, returns the difference in seconds as a Float. + * If numeric is given, returns a new Time object representing time - numeric seconds. + * + * t1 = Time.at(1000000000) + * t2 = Time.at(1000000001) + * t2 - t1 #=> 1.0 + * t1 - 1 #=> 2001-09-09 10:46:39 +0900 + * t1 - 0.5 #=> 2001-09-09 10:46:39 +0900 (with 500000 usec) + */ static mrb_value -mrb_time_minus(mrb_state *mrb, mrb_value self) +time_minus(mrb_state *mrb, mrb_value self) { mrb_value other = mrb_get_arg1(mrb); - struct mrb_time *tm, *tm2; + struct mrb_time *tm = time_get_ptr(mrb, self); + struct mrb_time *tm2 = DATA_CHECK_GET_PTR(mrb, other, &time_type, struct mrb_time); - tm = time_get_ptr(mrb, self); - tm2 = DATA_CHECK_GET_PTR(mrb, other, &mrb_time_type, struct mrb_time); if (tm2) { #ifndef MRB_NO_FLOAT mrb_float f; - f = (mrb_sec)(tm->sec - tm2->sec) - + (mrb_sec)(tm->usec - tm2->usec) / 1.0e6; + f = (mrb_float)(tm->sec - tm2->sec) + + (mrb_float)(tm->nsec - tm2->nsec) / 1.0e9; return mrb_float_value(mrb, f); #else - mrb_int f; - f = tm->sec - tm2->sec; - if (tm->usec < tm2->usec) f--; + mrb_int f = tm->sec - tm2->sec; + if (tm->nsec < tm2->nsec) f--; return mrb_int_value(mrb, f); #endif } @@ -635,80 +933,121 @@ mrb_time_minus(mrb_state *mrb, mrb_value self) time_t sec, usec; sec = mrb_to_time_t(mrb, other, &usec); #ifdef MRB_HAVE_TYPE_GENERIC_CHECKED_ARITHMETIC_BUILTINS - if (__builtin_sub_overflow(tm->sec, sec, &sec)) { - int_overflow(mrb, "subtraction"); - } + /* + * Subtract seconds and handle potential overflow. + * If __builtin_sub_overflow is available, use it. + * Otherwise, perform manual overflow checks. Note that `sec` here is the subtrahend. + */ + if (__builtin_sub_overflow(tm->sec, sec, &sec)) { /* sec result is stored back in sec */ + int_overflow(mrb, "subtraction"); + } #else - if (sec >= 0) { - if (tm->sec < MRB_TIME_MIN + sec) { + if (sec >= 0) { /* Subtracting a positive number */ + if (tm->sec < MRB_TIME_MIN + sec) { /* Check for negative overflow */ int_overflow(mrb, "subtraction"); } } - else { - if (tm->sec > MRB_TIME_MAX + sec) { + else { /* Subtracting a negative number (effectively addition) */ + if (tm->sec > MRB_TIME_MAX + sec) { /* Check for positive overflow */ int_overflow(mrb, "subtraction"); } - } - sec = tm->sec - sec; + } + sec = tm->sec - sec; /* Perform the subtraction */ #endif - return mrb_time_make_time(mrb, mrb_obj_class(mrb, self), sec, tm->usec-usec, tm->timezone); + return time_make_time(mrb, mrb_obj_class(mrb, self), sec, tm->nsec - usec * NSECS_PER_USEC, tm->timezone); } } -/* 15.2.19.7.30 */ -/* Returns week day number of time. */ +/* + * call-seq: + * time.wday -> integer + * + * Returns the day of the week (0-6) of the time, where Sunday is 0. + * + * Time.local(2023, 12, 25).wday #=> 1 (Monday) + * Time.local(2023, 12, 24).wday #=> 0 (Sunday) + * Time.local(2023, 12, 30).wday #=> 6 (Saturday) + */ static mrb_value -mrb_time_wday(mrb_state *mrb, mrb_value self) +time_wday(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); + struct mrb_time *tm = time_get_ptr(mrb, self); return mrb_fixnum_value(tm->datetime.tm_wday); } -/* 15.2.19.7.31 */ -/* Returns year day number of time. */ +/* + * call-seq: + * time.yday -> integer + * + * Returns the day of the year (1-366) of the time. + * + * Time.local(2023, 1, 1).yday #=> 1 + * Time.local(2023, 12, 31).yday #=> 365 + * Time.local(2024, 12, 31).yday #=> 366 (leap year) + */ static mrb_value -mrb_time_yday(mrb_state *mrb, mrb_value self) +time_yday(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); + struct mrb_time *tm = time_get_ptr(mrb, self); return mrb_fixnum_value(tm->datetime.tm_yday + 1); } -/* 15.2.19.7.32 */ -/* Returns year of time. */ +/* + * call-seq: + * time.year -> integer + * + * Returns the year of the time. + * + * Time.local(2023, 12, 25).year #=> 2023 + * Time.at(0).year #=> 1970 + */ static mrb_value -mrb_time_year(mrb_state *mrb, mrb_value self) +time_year(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); - return mrb_fixnum_value(tm->datetime.tm_year + 1900); + struct mrb_time *tm = time_get_ptr(mrb, self); + return mrb_fixnum_value(tm->datetime.tm_year + TM_YEAR_BASE); } static size_t time_zonename(mrb_state *mrb, struct mrb_time *tm, char *buf, size_t len) { -#if defined(_MSC_VER) && _MSC_VER < 1900 || defined(__MINGW64__) || defined(__MINGW32__) - struct tm datetime = {0}; - time_t utc_sec = timegm(&tm->datetime); - int offset = abs((int)(utc_sec - tm->sec) / 60); - datetime.tm_year = 100; - datetime.tm_hour = offset / 60; - datetime.tm_min = offset % 60; - buf[0] = utc_sec < tm->sec ? '-' : '+'; - return strftime(buf+1, len-1, "%H%M", &datetime) + 1; +#ifdef MRB_TIME_WINDOWS_NO_STRFTIME_Z + /* + * On some Windows versions (specifically with MSC_VER < 1900, i.e., pre-VS2015, or MinGW), + * strftime's "%z" (timezone offset) specifier might not be available or reliable. + * This block manually calculates the UTC offset. + */ + struct tm datetime = {0}; /* Temporary tm struct for strftime */ + time_t utc_sec = timegm(&tm->datetime); /* Convert current datetime (interpreted as UTC) to time_t */ + /* Calculate offset in minutes: difference between this UTC time_t and the stored local time_t */ + int offset = abs((int)(utc_sec - tm->sec) / SECS_PER_MIN); + /* Copy actual date components for accurate timezone/DST calculation */ + datetime.tm_year = tm->datetime.tm_year; + datetime.tm_mon = tm->datetime.tm_mon; + datetime.tm_mday = tm->datetime.tm_mday; + datetime.tm_hour = offset / MINS_PER_HOUR; /* Convert offset to hours and minutes */ + datetime.tm_min = offset % MINS_PER_HOUR; + buf[0] = utc_sec < tm->sec ? '-' : '+'; /* Determine sign of the offset */ + return strftime(buf+1, len-1, "%H%M", &datetime) + 1; /* Format as +HHMM or -HHMM */ #else + /* On other systems, use strftime with "%z" to get the timezone offset */ return strftime(buf, len, "%z", &tm->datetime); #endif } -/* 15.2.19.7.33 */ -/* Returns name of time's timezone. */ +/* + * call-seq: + * time.zone -> string + * + * Returns the timezone name or offset of the time. + * For UTC times, returns "UTC". For local times, returns the + * timezone offset in the format "+HHMM" or "-HHMM". + * + * Time.utc(2023, 12, 25).zone #=> "UTC" + * Time.local(2023, 12, 25).zone #=> "+0900" (example for JST) + */ static mrb_value -mrb_time_zone(mrb_state *mrb, mrb_value self) +time_zone(mrb_state *mrb, mrb_value self) { struct mrb_time *tm = time_get_ptr(mrb, self); if (tm->timezone == MRB_TIMEZONE_UTC) { @@ -719,10 +1058,19 @@ mrb_time_zone(mrb_state *mrb, mrb_value self) return mrb_str_new(mrb, buf, len); } -/* 15.2.19.7.4 */ -/* Returns a string that describes the time. */ +/* + * call-seq: + * time.asctime -> string + * time.ctime -> string + * + * Returns a string representation of the time in the classic Unix + * asctime format: "Day Mon DD HH:MM:SS YYYY". + * + * Time.local(2023, 12, 25, 10, 30, 45).asctime #=> "Mon Dec 25 10:30:45 2023" + * Time.utc(2023, 1, 1, 0, 0, 0).ctime #=> "Sun Jan 1 00:00:00 2023" + */ static mrb_value -mrb_time_asctime(mrb_state *mrb, mrb_value self) +time_asctime(mrb_state *mrb, mrb_value self) { struct mrb_time *tm = time_get_ptr(mrb, self); struct tm *d = &tm->datetime; @@ -742,252 +1090,386 @@ mrb_time_asctime(mrb_state *mrb, mrb_value self) len = snprintf(buf, sizeof(buf), "%s %s %2d %02d:%02d:%02d %.4d", wday_names[d->tm_wday], mon_names[d->tm_mon], d->tm_mday, d->tm_hour, d->tm_min, d->tm_sec, - d->tm_year + 1900); + d->tm_year + TM_YEAR_BASE); #endif return mrb_str_new(mrb, buf, len); } -/* 15.2.19.7.6 */ -/* Returns the day in the month of the time. */ +/* + * call-seq: + * time.day -> integer + * time.mday -> integer + * + * Returns the day of the month (1-31) of the time. + * + * Time.local(2023, 12, 25).day #=> 25 + * Time.local(2023, 1, 1).mday #=> 1 + */ static mrb_value -mrb_time_day(mrb_state *mrb, mrb_value self) +time_day(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); + struct mrb_time *tm = time_get_ptr(mrb, self); return mrb_fixnum_value(tm->datetime.tm_mday); } -/* 15.2.19.7.7 */ -/* Returns true if daylight saving was applied for this time. */ +/* + * call-seq: + * time.dst? -> true or false + * + * Returns true if daylight saving time is in effect for this time, + * false otherwise. Only meaningful for local times. + * + * # Example depends on local timezone and DST rules + * Time.local(2023, 7, 15).dst? #=> true (summer in northern hemisphere) + * Time.local(2023, 1, 15).dst? #=> false (winter in northern hemisphere) + * Time.utc(2023, 7, 15).dst? #=> false (UTC has no DST) + */ static mrb_value -mrb_time_dst_p(mrb_state *mrb, mrb_value self) +time_dst_p(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); + struct mrb_time *tm = time_get_ptr(mrb, self); return mrb_bool_value(tm->datetime.tm_isdst); } -/* 15.2.19.7.8 */ -/* 15.2.19.7.10 */ -/* Returns the Time object of the UTC(GMT) timezone. */ +/* + * call-seq: + * time.getutc -> time + * time.getgm -> time + * + * Returns a new Time object representing the same moment in UTC timezone. + * The original time object is not modified. + * + * t = Time.local(2023, 12, 25, 10, 30) #=> 2023-12-25 10:30:00 +0900 + * t.getutc #=> 2023-12-25 01:30:00 UTC + * t #=> 2023-12-25 10:30:00 +0900 (unchanged) + */ static mrb_value -mrb_time_getutc(mrb_state *mrb, mrb_value self) +time_getutc(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm, *tm2; - - tm = time_get_ptr(mrb, self); - tm2 = (struct mrb_time *)mrb_malloc(mrb, sizeof(*tm)); + struct mrb_time *tm = time_get_ptr(mrb, self); + struct mrb_time *tm2 = (struct mrb_time*)mrb_malloc(mrb, sizeof(*tm)); *tm2 = *tm; - tm2->timezone = MRB_TIMEZONE_UTC; - time_update_datetime(mrb, tm2, TRUE); - return mrb_time_wrap(mrb, mrb_obj_class(mrb, self), tm2); + if (tm2->timezone != MRB_TIMEZONE_UTC) { + tm2->timezone = MRB_TIMEZONE_UTC; + time_update_datetime(mrb, tm2, TRUE); + } + return time_wrap(mrb, mrb_obj_class(mrb, self), tm2); } -/* 15.2.19.7.9 */ -/* Returns the Time object of the LOCAL timezone. */ +/* + * call-seq: + * time.getlocal -> time + * + * Returns a new Time object representing the same moment in local timezone. + * The original time object is not modified. + * + * t = Time.utc(2023, 12, 25, 1, 30) #=> 2023-12-25 01:30:00 UTC + * t.getlocal #=> 2023-12-25 10:30:00 +0900 + * t #=> 2023-12-25 01:30:00 UTC (unchanged) + */ static mrb_value -mrb_time_getlocal(mrb_state *mrb, mrb_value self) +time_getlocal(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm, *tm2; - - tm = time_get_ptr(mrb, self); - tm2 = (struct mrb_time *)mrb_malloc(mrb, sizeof(*tm)); + struct mrb_time *tm = time_get_ptr(mrb, self); + struct mrb_time *tm2 = (struct mrb_time*)mrb_malloc(mrb, sizeof(*tm)); *tm2 = *tm; - tm2->timezone = MRB_TIMEZONE_LOCAL; - time_update_datetime(mrb, tm2, TRUE); - return mrb_time_wrap(mrb, mrb_obj_class(mrb, self), tm2); + if (tm2->timezone != MRB_TIMEZONE_LOCAL) { + tm2->timezone = MRB_TIMEZONE_LOCAL; + time_update_datetime(mrb, tm2, TRUE); + } + return time_wrap(mrb, mrb_obj_class(mrb, self), tm2); } -/* 15.2.19.7.15 */ -/* Returns hour of time. */ +/* + * call-seq: + * time.hour -> integer + * + * Returns the hour of the day (0-23) of the time. + * + * Time.local(2023, 12, 25, 10, 30).hour #=> 10 + * Time.local(2023, 12, 25, 0, 0).hour #=> 0 + * Time.local(2023, 12, 25, 23, 59).hour #=> 23 + */ static mrb_value -mrb_time_hour(mrb_state *mrb, mrb_value self) +time_hour(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); + struct mrb_time *tm = time_get_ptr(mrb, self); return mrb_fixnum_value(tm->datetime.tm_hour); } -/* 15.2.19.7.16 */ -/* Initializes a time by setting the amount of milliseconds since the epoch.*/ +/* + * call-seq: + * Time.new -> time + * Time.new(year, month = 1, day = 1, hour = 0, min = 0, sec = 0, usec = 0) -> time + * + * Creates a new Time object. With no arguments, creates a Time representing + * the current moment. With arguments, creates a Time representing the + * specified date and time in the local timezone. + * + * Time.new #=> 2023-12-25 10:30:45 +0900 (current time) + * Time.new(2023) #=> 2023-01-01 00:00:00 +0900 + * Time.new(2023, 12, 25) #=> 2023-12-25 00:00:00 +0900 + * Time.new(2023, 12, 25, 10, 30, 45) #=> 2023-12-25 10:30:45 +0900 + */ static mrb_value -mrb_time_initialize(mrb_state *mrb, mrb_value self) +time_init(mrb_state *mrb, mrb_value self) { mrb_int ayear = 0, amonth = 1, aday = 1, ahour = 0, amin = 0, asec = 0, ausec = 0; - mrb_int n; - struct mrb_time *tm; - - n = mrb_get_args(mrb, "|iiiiiii", - &ayear, &amonth, &aday, &ahour, &amin, &asec, &ausec); - tm = (struct mrb_time*)DATA_PTR(self); - if (tm) { - mrb_free(mrb, tm); + + mrb_int n = mrb_get_args(mrb, "|iiiiiii", /* year, month, day, hour, minute, second, microsecond (all optional) */ + &ayear, &amonth, &aday, &ahour, &amin, &asec, &ausec); + struct mrb_time *tm = (struct mrb_time*)DATA_PTR(self); + + if (tm) { /* If Time object is being re-initialized (e.g. time_obj.send(:initialize, ...)) */ + mrb_free(mrb, tm); /* Free existing data */ } - mrb_data_init(self, NULL, &mrb_time_type); + mrb_data_init(self, NULL, &time_type); /* Prepare for new data */ - if (n == 0) { - tm = current_mrb_time(mrb); + if (n == 0) { /* Time.new (no arguments) */ + tm = current_mrb_time(mrb); /* Get current time */ } - else { + else { /* Time.new(year, [mon, day, hour, min, sec, usec]) */ + /* Create time from specified components in local timezone */ tm = time_mktime(mrb, ayear, amonth, aday, ahour, amin, asec, ausec, MRB_TIMEZONE_LOCAL); } - mrb_data_init(self, tm, &mrb_time_type); + mrb_data_init(self, tm, &time_type); /* Attach the new mrb_time struct to the mruby object */ return self; } -/* 15.2.19.7.17(x) */ -/* Initializes a copy of this time object. */ +/* + * call-seq: + * time.initialize_copy(other_time) -> time + * + * Initializes this time object as a copy of other_time. + * This is a private method used internally by dup and clone. + * + * t1 = Time.now + * t2 = t1.dup # calls initialize_copy internally + */ static mrb_value -mrb_time_initialize_copy(mrb_state *mrb, mrb_value copy) +time_init_copy(mrb_state *mrb, mrb_value copy) { mrb_value src = mrb_get_arg1(mrb); - struct mrb_time *t1, *t2; if (mrb_obj_equal(mrb, copy, src)) return copy; if (!mrb_obj_is_instance_of(mrb, src, mrb_obj_class(mrb, copy))) { mrb_raise(mrb, E_TYPE_ERROR, "wrong argument class"); } - t1 = (struct mrb_time *)DATA_PTR(copy); - t2 = (struct mrb_time *)DATA_PTR(src); + struct mrb_time *t1 = (struct mrb_time*)DATA_PTR(copy); + struct mrb_time *t2 = (struct mrb_time*)DATA_PTR(src); + if (!t2) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "uninitialized time"); + time_uninitialized(mrb); } if (!t1) { - t1 = (struct mrb_time *)mrb_malloc(mrb, sizeof(struct mrb_time)); - mrb_data_init(copy, t1, &mrb_time_type); + t1 = (struct mrb_time*)mrb_malloc(mrb, sizeof(struct mrb_time)); + mrb_data_init(copy, t1, &time_type); } *t1 = *t2; return copy; } -/* 15.2.19.7.18 */ -/* Sets the timezone attribute of the Time object to LOCAL. */ +/* + * call-seq: + * time.localtime -> time + * + * Converts the time to local timezone in place and returns self. + * The time value remains the same, but the timezone is changed to local. + * + * t = Time.utc(2023, 12, 25, 1, 30) #=> 2023-12-25 01:30:00 UTC + * t.localtime #=> 2023-12-25 10:30:00 +0900 + * t #=> 2023-12-25 10:30:00 +0900 (modified) + */ static mrb_value -mrb_time_localtime(mrb_state *mrb, mrb_value self) +time_localtime(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); + struct mrb_time *tm = time_get_ptr(mrb, self); tm->timezone = MRB_TIMEZONE_LOCAL; time_update_datetime(mrb, tm, FALSE); return self; } -/* 15.2.19.7.19 */ -/* Returns day of month of time. */ -static mrb_value -mrb_time_mday(mrb_state *mrb, mrb_value self) -{ - struct mrb_time *tm; - tm = time_get_ptr(mrb, self); - return mrb_fixnum_value(tm->datetime.tm_mday); -} - -/* 15.2.19.7.20 */ -/* Returns minutes of time. */ +/* + * call-seq: + * time.min -> integer + * + * Returns the minute of the hour (0-59) of the time. + * + * Time.local(2023, 12, 25, 10, 30).min #=> 30 + * Time.local(2023, 12, 25, 10, 0).min #=> 0 + * Time.local(2023, 12, 25, 10, 59).min #=> 59 + */ static mrb_value -mrb_time_min(mrb_state *mrb, mrb_value self) +time_min(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); + struct mrb_time *tm = time_get_ptr(mrb, self); return mrb_fixnum_value(tm->datetime.tm_min); } -/* 15.2.19.7.21 (mon) and 15.2.19.7.22 (month) */ -/* Returns month of time. */ +/* + * call-seq: + * time.mon -> integer + * time.month -> integer + * + * Returns the month of the year (1-12) of the time. + * + * Time.local(2023, 12, 25).mon #=> 12 + * Time.local(2023, 1, 1).month #=> 1 + */ static mrb_value -mrb_time_mon(mrb_state *mrb, mrb_value self) +time_mon(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); + struct mrb_time *tm = time_get_ptr(mrb, self); return mrb_fixnum_value(tm->datetime.tm_mon + 1); } -/* 15.2.19.7.23 */ -/* Returns seconds in minute of time. */ +/* + * call-seq: + * time.sec -> integer + * + * Returns the second of the minute (0-60) of the time. + * Note: 60 is possible for leap seconds. + * + * Time.local(2023, 12, 25, 10, 30, 45).sec #=> 45 + * Time.local(2023, 12, 25, 10, 30, 0).sec #=> 0 + * Time.local(2023, 12, 25, 10, 30, 59).sec #=> 59 + */ static mrb_value -mrb_time_sec(mrb_state *mrb, mrb_value self) +time_sec(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); + struct mrb_time *tm = time_get_ptr(mrb, self); return mrb_fixnum_value(tm->datetime.tm_sec); } #ifndef MRB_NO_FLOAT -/* 15.2.19.7.24 */ -/* Returns a Float with the time since the epoch in seconds. */ +/* + * call-seq: + * time.to_f -> float + * + * Returns the time as a Float representing the number of seconds + * since the Unix epoch (1970-01-01 00:00:00 UTC), including + * fractional seconds for microsecond precision. + * + * Time.at(0).to_f #=> 0.0 + * Time.at(1000000000.5).to_f #=> 1000000000.5 + * Time.at(0, 123456).to_f #=> 0.123456 + */ static mrb_value -mrb_time_to_f(mrb_state *mrb, mrb_value self) +time_to_f(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); - return mrb_float_value(mrb, (mrb_float)tm->sec + (mrb_float)tm->usec/1.0e6); + struct mrb_time *tm = time_get_ptr(mrb, self); + return mrb_float_value(mrb, (mrb_float)tm->sec + (mrb_float)tm->nsec/1.0e9); } #endif -/* 15.2.19.7.25 */ -/* Returns an Integer with the time since the epoch in seconds. */ +/* + * call-seq: + * time.to_i -> integer + * + * Returns the time as an integer representing the number of seconds + * since the Unix epoch (1970-01-01 00:00:00 UTC). + * + * Time.at(0).to_i #=> 0 + * Time.at(1000000000).to_i #=> 1000000000 + * Time.local(2023, 1, 1).to_i #=> 1672531200 (example) + */ static mrb_value -mrb_time_to_i(mrb_state *mrb, mrb_value self) +time_to_i(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); -#ifndef MRB_NO_FLOAT - if (!fixable_time_t_p(tm->sec)) { - return mrb_float_value(mrb, (mrb_float)tm->sec); - } -#endif - return mrb_int_value(mrb, (mrb_int)tm->sec); + struct mrb_time *tm = time_get_ptr(mrb, self); + return time_value_from_time_t(mrb, tm->sec); } -/* 15.2.19.7.26 */ -/* Returns the number of microseconds for time. */ +/* + * call-seq: + * time.usec -> integer + * + * Returns the microsecond component (0-999999) of the time. + * + * Time.at(1000000000.123456).usec #=> 123456 + * Time.at(1000000000, 500000).usec #=> 500000 + * Time.at(1000000000).usec #=> 0 + */ static mrb_value -mrb_time_usec(mrb_state *mrb, mrb_value self) +time_usec(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); - return mrb_fixnum_value((mrb_int)tm->usec); + struct mrb_time *tm = time_get_ptr(mrb, self); + return mrb_fixnum_value((mrb_int)(tm->nsec / NSECS_PER_USEC)); } -/* 15.2.19.7.27 */ -/* Sets the timezone attribute of the Time object to UTC. */ +/* + * call-seq: + * time.nsec -> integer + * time.tv_nsec -> integer + * + * Returns the nanosecond component (0-999999999) of the time. + * + * Time.at(1000000000, 123456).nsec #=> 123456000 + * Time.at(1000000000.123456789).nsec #=> 123456789 + * Time.at(1000000000).nsec #=> 0 + */ static mrb_value -mrb_time_utc(mrb_state *mrb, mrb_value self) +time_nsec(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; + struct mrb_time *tm = time_get_ptr(mrb, self); + return mrb_fixnum_value((mrb_int)tm->nsec); +} - tm = time_get_ptr(mrb, self); +/* + * call-seq: + * time.utc -> time + * time.gmtime -> time + * + * Converts the time to UTC timezone in place and returns self. + * The time value remains the same, but the timezone is changed to UTC. + * + * t = Time.local(2023, 12, 25, 10, 30) #=> 2023-12-25 10:30:00 +0900 + * t.utc #=> 2023-12-25 01:30:00 UTC + * t #=> 2023-12-25 01:30:00 UTC (modified) + */ +static mrb_value +time_utc(mrb_state *mrb, mrb_value self) +{ + struct mrb_time *tm = time_get_ptr(mrb, self); tm->timezone = MRB_TIMEZONE_UTC; time_update_datetime(mrb, tm, FALSE); return self; } -/* 15.2.19.7.28 */ -/* Returns true if this time is in the UTC timezone false if not. */ +/* + * call-seq: + * time.utc? -> true or false + * time.gmt? -> true or false + * + * Returns true if the time is in UTC timezone, false otherwise. + * + * Time.utc(2023, 12, 25).utc? #=> true + * Time.local(2023, 12, 25).utc? #=> false + * Time.local(2023, 12, 25).gmt? #=> false + */ static mrb_value -mrb_time_utc_p(mrb_state *mrb, mrb_value self) +time_utc_p(mrb_state *mrb, mrb_value self) { - struct mrb_time *tm; - - tm = time_get_ptr(mrb, self); + struct mrb_time *tm = time_get_ptr(mrb, self); return mrb_bool_value(tm->timezone == MRB_TIMEZONE_UTC); } +/* + * call-seq: + * time.to_s -> string + * time.inspect -> string + * + * Returns a string representation of the time in the format + * "YYYY-MM-DD HH:MM:SS ZONE". + * + * Time.local(2023, 12, 25, 10, 30, 45).to_s #=> "2023-12-25 10:30:45 +0900" + * Time.utc(2023, 12, 25, 10, 30, 45).to_s #=> "2023-12-25 10:30:45 UTC" + */ static mrb_value -mrb_time_to_s(mrb_state *mrb, mrb_value self) +time_to_s(mrb_state *mrb, mrb_value self) { struct mrb_time *tm = time_get_ptr(mrb, self); char buf[64]; @@ -997,86 +1479,257 @@ mrb_time_to_s(mrb_state *mrb, mrb_value self) len = strftime(buf, sizeof(buf), TO_S_FMT "UTC", &tm->datetime); } else { +#ifdef MRB_TIME_WINDOWS_NO_STRFTIME_Z + /* Use two-step approach on Windows platforms without reliable %z support */ len = strftime(buf, sizeof(buf), TO_S_FMT, &tm->datetime); len += time_zonename(mrb, tm, buf+len, sizeof(buf)-len); +#else + /* Use combined format string on platforms with %z support */ + len = strftime(buf, sizeof(buf), TO_S_FMT "%z", &tm->datetime); +#endif } mrb_value str = mrb_str_new(mrb, buf, len); RSTR_SET_ASCII_FLAG(mrb_str_ptr(str)); return str; } +/* + * call-seq: + * time.hash -> integer + * + * Returns a hash value for the time object. Two time objects with + * the same time value will have the same hash value. + * + * t1 = Time.at(1000000000) + * t2 = Time.at(1000000000) + * t1.hash == t2.hash #=> true + */ static mrb_value -mrb_time_hash(mrb_state *mrb, mrb_value self) +time_hash(mrb_state *mrb, mrb_value self) { struct mrb_time *tm = time_get_ptr(mrb, self); uint32_t hash = mrb_byte_hash((uint8_t*)&tm->sec, sizeof(time_t)); - hash = mrb_byte_hash_step((uint8_t*)&tm->usec, sizeof(time_t), hash); + hash = mrb_byte_hash_step((uint8_t*)&tm->nsec, sizeof(time_t), hash); hash = mrb_byte_hash_step((uint8_t*)&tm->timezone, sizeof(tm->timezone), hash); return mrb_int_value(mrb, hash); } -void -mrb_mruby_time_gem_init(mrb_state* mrb) +/* + * Generic function for weekday checks. Used by all weekday methods. + */ +static mrb_value +time_wday_p(mrb_state *mrb, mrb_value self, int target_wday) { - struct RClass *tc; - /* ISO 15.2.19.2 */ - tc = mrb_define_class(mrb, "Time", mrb->object_class); - MRB_SET_INSTANCE_TT(tc, MRB_TT_DATA); - mrb_include_module(mrb, tc, mrb_module_get(mrb, "Comparable")); - mrb_define_class_method(mrb, tc, "at", mrb_time_at_m, MRB_ARGS_ARG(1, 1)); /* 15.2.19.6.1 */ - mrb_define_class_method(mrb, tc, "gm", mrb_time_gm, MRB_ARGS_ARG(1,6)); /* 15.2.19.6.2 */ - mrb_define_class_method(mrb, tc, "local", mrb_time_local, MRB_ARGS_ARG(1,6)); /* 15.2.19.6.3 */ - mrb_define_class_method(mrb, tc, "mktime", mrb_time_local, MRB_ARGS_ARG(1,6));/* 15.2.19.6.4 */ - mrb_define_class_method(mrb, tc, "now", mrb_time_now, MRB_ARGS_NONE()); /* 15.2.19.6.5 */ - mrb_define_class_method(mrb, tc, "utc", mrb_time_gm, MRB_ARGS_ARG(1,6)); /* 15.2.19.6.6 */ - - mrb_define_method(mrb, tc, "hash" , mrb_time_hash , MRB_ARGS_NONE()); - mrb_define_method(mrb, tc, "eql?" , mrb_time_eq , MRB_ARGS_REQ(1)); - mrb_define_method(mrb, tc, "==" , mrb_time_eq , MRB_ARGS_REQ(1)); - mrb_define_method(mrb, tc, "<=>" , mrb_time_cmp , MRB_ARGS_REQ(1)); /* 15.2.19.7.1 */ - mrb_define_method(mrb, tc, "+" , mrb_time_plus , MRB_ARGS_REQ(1)); /* 15.2.19.7.2 */ - mrb_define_method(mrb, tc, "-" , mrb_time_minus , MRB_ARGS_REQ(1)); /* 15.2.19.7.3 */ - mrb_define_method(mrb, tc, "to_s" , mrb_time_to_s , MRB_ARGS_NONE()); - mrb_define_method(mrb, tc, "inspect", mrb_time_to_s , MRB_ARGS_NONE()); - mrb_define_method(mrb, tc, "asctime", mrb_time_asctime, MRB_ARGS_NONE()); /* 15.2.19.7.4 */ - mrb_define_method(mrb, tc, "ctime" , mrb_time_asctime, MRB_ARGS_NONE()); /* 15.2.19.7.5 */ - mrb_define_method(mrb, tc, "day" , mrb_time_day , MRB_ARGS_NONE()); /* 15.2.19.7.6 */ - mrb_define_method(mrb, tc, "dst?" , mrb_time_dst_p , MRB_ARGS_NONE()); /* 15.2.19.7.7 */ - mrb_define_method(mrb, tc, "getgm" , mrb_time_getutc , MRB_ARGS_NONE()); /* 15.2.19.7.8 */ - mrb_define_method(mrb, tc, "getlocal",mrb_time_getlocal,MRB_ARGS_NONE()); /* 15.2.19.7.9 */ - mrb_define_method(mrb, tc, "getutc" , mrb_time_getutc , MRB_ARGS_NONE()); /* 15.2.19.7.10 */ - mrb_define_method(mrb, tc, "gmt?" , mrb_time_utc_p , MRB_ARGS_NONE()); /* 15.2.19.7.11 */ - mrb_define_method(mrb, tc, "gmtime" , mrb_time_utc , MRB_ARGS_NONE()); /* 15.2.19.7.13 */ - mrb_define_method(mrb, tc, "hour" , mrb_time_hour, MRB_ARGS_NONE()); /* 15.2.19.7.15 */ - mrb_define_method(mrb, tc, "localtime", mrb_time_localtime, MRB_ARGS_NONE()); /* 15.2.19.7.18 */ - mrb_define_method(mrb, tc, "mday" , mrb_time_mday, MRB_ARGS_NONE()); /* 15.2.19.7.19 */ - mrb_define_method(mrb, tc, "min" , mrb_time_min, MRB_ARGS_NONE()); /* 15.2.19.7.20 */ - - mrb_define_method(mrb, tc, "mon" , mrb_time_mon, MRB_ARGS_NONE()); /* 15.2.19.7.21 */ - mrb_define_method(mrb, tc, "month", mrb_time_mon, MRB_ARGS_NONE()); /* 15.2.19.7.22 */ - - mrb_define_method(mrb, tc, "sec" , mrb_time_sec, MRB_ARGS_NONE()); /* 15.2.19.7.23 */ - mrb_define_method(mrb, tc, "to_i", mrb_time_to_i, MRB_ARGS_NONE()); /* 15.2.19.7.25 */ + struct mrb_time *tm = time_get_ptr(mrb, self); + return mrb_bool_value(tm->datetime.tm_wday == target_wday); +} + +/* + * call-seq: + * time.sunday? -> true or false + * + * Returns true if the time falls on a Sunday, false otherwise. + * + * Time.local(2023, 12, 24).sunday? #=> true + * Time.local(2023, 12, 25).sunday? #=> false + */ +static mrb_value +time_sunday(mrb_state *mrb, mrb_value self) +{ + return time_wday_p(mrb, self, 0); +} + +/* + * call-seq: + * time.monday? -> true or false + * + * Returns true if the time falls on a Monday, false otherwise. + * + * Time.local(2023, 12, 25).monday? #=> true + * Time.local(2023, 12, 24).monday? #=> false + */ +static mrb_value +time_monday(mrb_state *mrb, mrb_value self) +{ + return time_wday_p(mrb, self, 1); +} + +/* + * call-seq: + * time.tuesday? -> true or false + * + * Returns true if the time falls on a Tuesday, false otherwise. + * + * Time.local(2023, 12, 26).tuesday? #=> true + * Time.local(2023, 12, 25).tuesday? #=> false + */ +static mrb_value +time_tuesday(mrb_state *mrb, mrb_value self) +{ + return time_wday_p(mrb, self, 2); +} + +/* + * call-seq: + * time.wednesday? -> true or false + * + * Returns true if the time falls on a Wednesday, false otherwise. + * + * Time.local(2023, 12, 27).wednesday? #=> true + * Time.local(2023, 12, 25).wednesday? #=> false + */ +static mrb_value +time_wednesday(mrb_state *mrb, mrb_value self) +{ + return time_wday_p(mrb, self, 3); +} + +/* + * call-seq: + * time.thursday? -> true or false + * + * Returns true if the time falls on a Thursday, false otherwise. + * + * Time.local(2023, 12, 28).thursday? #=> true + * Time.local(2023, 12, 25).thursday? #=> false + */ +static mrb_value +time_thursday(mrb_state *mrb, mrb_value self) +{ + return time_wday_p(mrb, self, 4); +} + +/* + * call-seq: + * time.friday? -> true or false + * + * Returns true if the time falls on a Friday, false otherwise. + * + * Time.local(2023, 12, 29).friday? #=> true + * Time.local(2023, 12, 25).friday? #=> false + */ +static mrb_value +time_friday(mrb_state *mrb, mrb_value self) +{ + return time_wday_p(mrb, self, 5); +} + +/* + * call-seq: + * time.saturday? -> true or false + * + * Returns true if the time falls on a Saturday, false otherwise. + * + * Time.local(2023, 12, 30).saturday? #=> true + * Time.local(2023, 12, 25).saturday? #=> false + */ +static mrb_value +time_saturday(mrb_state *mrb, mrb_value self) +{ + return time_wday_p(mrb, self, 6); +} + +/* + * ISO 15.2.19.7.12 + * ISO 15.2.19.7.14 + * ISO 15.2.19.7.29 + */ +/* + * call-seq: + * time.gmt_offset -> integer + * time.utc_offset -> integer + * time.gmtoff -> integer + * + * Returns the offset in seconds between the timezone of time and UTC. + * + * Time.local(2000, 1, 1).gmt_offset #=> 32400 (JST: UTC+9) + * Time.utc(2000, 1, 1).utc_offset #=> 0 (UTC) + * Time.local(2000, 7, 1).gmtoff #=> 32400 (or 28800 if DST) + */ +static mrb_value +time_utc_offset(mrb_state *mrb, mrb_value self) +{ + struct mrb_time *tm = time_get_ptr(mrb, self); + + if (tm->timezone == MRB_TIMEZONE_UTC) { + return mrb_fixnum_value(0); /* UTC is always offset 0 */ + } + + /* For local times, calculate offset = local_time_t - utc_time_t */ + time_t utc_time_t = timegm(&tm->datetime); /* Convert datetime as UTC */ + mrb_int offset_seconds = (mrb_int)(tm->sec - utc_time_t); + + return mrb_fixnum_value(offset_seconds); +} + +/* ---------------------------*/ +static const mrb_mt_entry time_rom_entries[] = { + MRB_MT_ENTRY(time_hash, MRB_SYM(hash), MRB_ARGS_NONE()), + MRB_MT_ENTRY(time_eq, MRB_SYM_Q(eql), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(time_eq, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(time_cmp, MRB_OPSYM(cmp), MRB_ARGS_REQ(1)), /* 15.2.19.7.1 */ + MRB_MT_ENTRY(time_plus, MRB_OPSYM(add), MRB_ARGS_REQ(1)), /* 15.2.19.7.2 */ + MRB_MT_ENTRY(time_minus, MRB_OPSYM(sub), MRB_ARGS_REQ(1)), /* 15.2.19.7.3 */ + MRB_MT_ENTRY(time_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), + MRB_MT_ENTRY(time_to_s, MRB_SYM(inspect), MRB_ARGS_NONE()), + MRB_MT_ENTRY(time_asctime, MRB_SYM(asctime), MRB_ARGS_NONE()), /* 15.2.19.7.4 */ + MRB_MT_ENTRY(time_asctime, MRB_SYM(ctime), MRB_ARGS_NONE()), /* 15.2.19.7.5 */ + MRB_MT_ENTRY(time_day, MRB_SYM(day), MRB_ARGS_NONE()), /* 15.2.19.7.6 */ + MRB_MT_ENTRY(time_dst_p, MRB_SYM_Q(dst), MRB_ARGS_NONE()), /* 15.2.19.7.7 */ + MRB_MT_ENTRY(time_getutc, MRB_SYM(getgm), MRB_ARGS_NONE()), /* 15.2.19.7.8 */ + MRB_MT_ENTRY(time_getlocal, MRB_SYM(getlocal), MRB_ARGS_NONE()), /* 15.2.19.7.9 */ + MRB_MT_ENTRY(time_getutc, MRB_SYM(getutc), MRB_ARGS_NONE()), /* 15.2.19.7.10 */ + MRB_MT_ENTRY(time_utc_p, MRB_SYM_Q(gmt), MRB_ARGS_NONE()), /* 15.2.19.7.11 */ + MRB_MT_ENTRY(time_utc, MRB_SYM(gmtime), MRB_ARGS_NONE()), /* 15.2.19.7.13 */ + MRB_MT_ENTRY(time_hour, MRB_SYM(hour), MRB_ARGS_NONE()), /* 15.2.19.7.15 */ + MRB_MT_ENTRY(time_localtime, MRB_SYM(localtime), MRB_ARGS_NONE()), /* 15.2.19.7.18 */ + MRB_MT_ENTRY(time_day, MRB_SYM(mday), MRB_ARGS_NONE()), /* 15.2.19.7.19 */ + MRB_MT_ENTRY(time_min, MRB_SYM(min), MRB_ARGS_NONE()), /* 15.2.19.7.20 */ + MRB_MT_ENTRY(time_mon, MRB_SYM(mon), MRB_ARGS_NONE()), /* 15.2.19.7.21 */ + MRB_MT_ENTRY(time_mon, MRB_SYM(month), MRB_ARGS_NONE()), /* 15.2.19.7.22 */ + MRB_MT_ENTRY(time_sec, MRB_SYM(sec), MRB_ARGS_NONE()), /* 15.2.19.7.23 */ + MRB_MT_ENTRY(time_to_i, MRB_SYM(to_i), MRB_ARGS_NONE()), /* 15.2.19.7.25 */ + MRB_MT_ENTRY(time_usec, MRB_SYM(usec), MRB_ARGS_NONE()), /* 15.2.19.7.26 */ + MRB_MT_ENTRY(time_nsec, MRB_SYM(nsec), MRB_ARGS_NONE()), + MRB_MT_ENTRY(time_nsec, MRB_SYM(tv_nsec), MRB_ARGS_NONE()), + MRB_MT_ENTRY(time_utc, MRB_SYM(utc), MRB_ARGS_NONE()), /* 15.2.19.7.27 */ + MRB_MT_ENTRY(time_utc_p, MRB_SYM_Q(utc), MRB_ARGS_NONE()), /* 15.2.19.7.28 */ + MRB_MT_ENTRY(time_wday, MRB_SYM(wday), MRB_ARGS_NONE()), /* 15.2.19.7.30 */ + MRB_MT_ENTRY(time_yday, MRB_SYM(yday), MRB_ARGS_NONE()), /* 15.2.19.7.31 */ + MRB_MT_ENTRY(time_year, MRB_SYM(year), MRB_ARGS_NONE()), /* 15.2.19.7.32 */ + MRB_MT_ENTRY(time_zone, MRB_SYM(zone), MRB_ARGS_NONE()), /* 15.2.19.7.33 */ + MRB_MT_ENTRY(time_init, MRB_SYM(initialize), MRB_ARGS_OPT(7) | MRB_MT_PRIVATE), /* 15.2.19.7.16 */ + MRB_MT_ENTRY(time_init_copy, MRB_SYM(initialize_copy), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), /* 15.2.19.7.17 */ + MRB_MT_ENTRY(time_sunday, MRB_SYM_Q(sunday), MRB_ARGS_NONE()), + MRB_MT_ENTRY(time_monday, MRB_SYM_Q(monday), MRB_ARGS_NONE()), + MRB_MT_ENTRY(time_tuesday, MRB_SYM_Q(tuesday), MRB_ARGS_NONE()), + MRB_MT_ENTRY(time_wednesday, MRB_SYM_Q(wednesday), MRB_ARGS_NONE()), + MRB_MT_ENTRY(time_thursday, MRB_SYM_Q(thursday), MRB_ARGS_NONE()), + MRB_MT_ENTRY(time_friday, MRB_SYM_Q(friday), MRB_ARGS_NONE()), + MRB_MT_ENTRY(time_saturday, MRB_SYM_Q(saturday), MRB_ARGS_NONE()), + MRB_MT_ENTRY(time_utc_offset, MRB_SYM(gmt_offset), MRB_ARGS_NONE()), /* 15.2.19.7.12 */ + MRB_MT_ENTRY(time_utc_offset, MRB_SYM(gmtoff), MRB_ARGS_NONE()), /* 15.2.19.7.14 */ + MRB_MT_ENTRY(time_utc_offset, MRB_SYM(utc_offset), MRB_ARGS_NONE()), /* 15.2.19.7.29 */ #ifndef MRB_NO_FLOAT - mrb_define_method(mrb, tc, "to_f", mrb_time_to_f, MRB_ARGS_NONE()); /* 15.2.19.7.24 */ + MRB_MT_ENTRY(time_to_f, MRB_SYM(to_f), MRB_ARGS_NONE()), /* 15.2.19.7.24 */ #endif - mrb_define_method(mrb, tc, "usec", mrb_time_usec, MRB_ARGS_NONE()); /* 15.2.19.7.26 */ - mrb_define_method(mrb, tc, "utc" , mrb_time_utc, MRB_ARGS_NONE()); /* 15.2.19.7.27 */ - mrb_define_method(mrb, tc, "utc?", mrb_time_utc_p,MRB_ARGS_NONE()); /* 15.2.19.7.28 */ - mrb_define_method(mrb, tc, "wday", mrb_time_wday, MRB_ARGS_NONE()); /* 15.2.19.7.30 */ - mrb_define_method(mrb, tc, "yday", mrb_time_yday, MRB_ARGS_NONE()); /* 15.2.19.7.31 */ - mrb_define_method(mrb, tc, "year", mrb_time_year, MRB_ARGS_NONE()); /* 15.2.19.7.32 */ - mrb_define_method(mrb, tc, "zone", mrb_time_zone, MRB_ARGS_NONE()); /* 15.2.19.7.33 */ - - mrb_define_method(mrb, tc, "initialize", mrb_time_initialize, MRB_ARGS_REQ(1)); /* 15.2.19.7.16 */ - mrb_define_method(mrb, tc, "initialize_copy", mrb_time_initialize_copy, MRB_ARGS_REQ(1)); /* 15.2.19.7.17 */ +}; - /* - methods not available: - gmt_offset(15.2.19.7.12) - gmtoff(15.2.19.7.14) - utc_offset(15.2.19.7.29) - */ +void +mrb_mruby_time_gem_init(mrb_state* mrb) +{ + /* ISO 15.2.19.2 */ + struct RClass *tc = mrb_define_class_id(mrb, MRB_SYM(Time), mrb->object_class); + MRB_SET_INSTANCE_TT(tc, MRB_TT_CDATA); + mrb_include_module(mrb, tc, mrb_module_get_id(mrb, MRB_SYM(Comparable))); + mrb_define_class_method_id(mrb, tc, MRB_SYM(at), time_at_m, MRB_ARGS_ARG(1, 1)); /* 15.2.19.6.1 */ + mrb_define_class_method_id(mrb, tc, MRB_SYM(gm), time_gm, MRB_ARGS_ARG(1,6)); /* 15.2.19.6.2 */ + mrb_define_class_method_id(mrb, tc, MRB_SYM(local), time_local, MRB_ARGS_ARG(1,6)); /* 15.2.19.6.3 */ + mrb_define_class_method_id(mrb, tc, MRB_SYM(mktime), time_local, MRB_ARGS_ARG(1,6)); /* 15.2.19.6.4 */ + mrb_define_class_method_id(mrb, tc, MRB_SYM(now), time_now, MRB_ARGS_NONE()); /* 15.2.19.6.5 */ + mrb_define_class_method_id(mrb, tc, MRB_SYM(utc), time_gm, MRB_ARGS_ARG(1,6)); /* 15.2.19.6.6 */ + + MRB_MT_INIT_ROM(mrb, tc, time_rom_entries); } void diff --git a/mrbgems/mruby-time/test/time.rb b/mrbgems/mruby-time/test/time.rb index 2c3a354f4c..58e1363b19 100644 --- a/mrbgems/mruby-time/test/time.rb +++ b/mrbgems/mruby-time/test/time.rb @@ -219,8 +219,52 @@ assert_predicate(Time.at(1300000000).utc, :utc?) end -# ATM not implemented -# assert('Time#utc_offset', '15.2.19.7.29') do +assert('Time#utc_offset, #gmt_offset, #gmtoff', '15.2.19.7.12, 15.2.19.7.14, 15.2.19.7.29') do + # UTC times should have zero offset + utc_time = Time.utc(2000, 1, 1) + assert_equal(0, utc_time.utc_offset) + + # Local times should return integer offsets in seconds + local_time = Time.local(2000, 1, 1) + assert_kind_of(Integer, local_time.utc_offset) + + # Test that offset values make sense (should be multiple of 900 seconds = 15 minutes) + assert_equal(0, local_time.utc_offset % 900) + + # All three methods should be aliases returning identical values + assert_equal(utc_time.utc_offset, utc_time.gmt_offset) + assert_equal(utc_time.utc_offset, utc_time.gmtoff) + assert_equal(local_time.utc_offset, local_time.gmt_offset) + assert_equal(local_time.utc_offset, local_time.gmtoff) +end + +assert('Time#nsec, #tv_nsec') do + # Test nanosecond methods exist and return integers + t = Time.now + assert_kind_of(Integer, t.nsec) + assert_kind_of(Integer, t.tv_nsec) + + # nsec and tv_nsec should be aliases + assert_equal(t.nsec, t.tv_nsec) + + # Nanoseconds should be in valid range (0-999999999) + assert_operator(t.nsec, :>=, 0) + assert_operator(t.nsec, :<=, 999999999) + + # Test with Time.at using microseconds + t1 = Time.at(1000000000, 123456) + assert_equal(123456000, t1.nsec) # 123456 usec = 123456000 nsec + assert_equal(123456, t1.usec) # usec should still work + + # Test that usec == nsec/1000 + assert_equal(t1.usec, t1.nsec / 1000) + + # Test nanosecond precision in comparisons + t2 = Time.at(1000000000, 123457) + assert_equal(123457000, t2.nsec) + assert_not_equal(t1, t2) # Different nanoseconds should not be equal + assert_operator(t1, :<, t2) # t1 should be less than t2 +end assert('Time#wday', '15.2.19.7.30') do assert_equal(0, Time.gm(2012, 12, 23).wday) diff --git a/mrbgems/mruby-toplevel-ext/README.md b/mrbgems/mruby-toplevel-ext/README.md new file mode 100644 index 0000000000..181bc28675 --- /dev/null +++ b/mrbgems/mruby-toplevel-ext/README.md @@ -0,0 +1,32 @@ +# mruby-toplevel-ext + +mruby-toplevel-ext is a gem that modifies the toplevel in mruby by making the `include` method private. This means modules must be included directly into `Object` (e.g., `Object.include MyModule`) rather than at the toplevel. + +## Usage + +After installing this gem, the `include` method at the toplevel (main) becomes private. This is the primary change provided by this gem. + +Consider the following example: + +```ruby +module MyModule + def my_method + puts "Hello from MyModule!" + end +end + +# This will result in a NoMethodError because include is private: +# include MyModule +# my_method + +# Instead, you must include the module directly into Object: +Object.include MyModule +my_method +# => Hello from MyModule! +``` + +This encourages a more explicit way of extending the global object space. + +## License + +The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT). diff --git a/mrbgems/mruby-toplevel-ext/mrblib/toplevel.rb b/mrbgems/mruby-toplevel-ext/mrblib/toplevel.rb index 664008d1cd..688d78afe8 100644 --- a/mrbgems/mruby-toplevel-ext/mrblib/toplevel.rb +++ b/mrbgems/mruby-toplevel-ext/mrblib/toplevel.rb @@ -1,11 +1,24 @@ +class < self + # + # Invokes Module.append_features on each parameter in reverse order. + # When called at the toplevel, this includes the module(s) into Object, + # making their methods available to all objects. + # + # module Greeting + # def hello + # "Hello, world!" + # end + # end + # + # include Greeting + # "".hello #=> "Hello, world!" + # + def include(*modules) + Object.include(*modules) + end end diff --git a/mrbgems/stdlib-ext.gembox b/mrbgems/stdlib-ext.gembox index 2177dca36e..29157dc671 100644 --- a/mrbgems/stdlib-ext.gembox +++ b/mrbgems/stdlib-ext.gembox @@ -13,6 +13,9 @@ MRuby::GemBox.new do |conf| # Use standard Struct class conf.gem :core => "mruby-struct" + # Use standard Data class + conf.gem :core => "mruby-data" + # Use Random class conf.gem :core => "mruby-random" end diff --git a/mrbgems/stdlib-io.gembox b/mrbgems/stdlib-io.gembox index cd1c2049c3..3e13d183d4 100644 --- a/mrbgems/stdlib-io.gembox +++ b/mrbgems/stdlib-io.gembox @@ -7,6 +7,12 @@ MRuby::GemBox.new do |conf| # Use standard IO/File class conf.gem :core => "mruby-socket" - # Use standard print/puts/p - conf.gem :core => "mruby-print" + # Use errno extension for a good mruby-io/mruby-socket experience + conf.gem :core => "mruby-errno" + + # Use Dir class + conf.gem :core => "mruby-dir" + + # Use ENV object + conf.gem :core => "mruby-env" end diff --git a/mrbgems/stdlib.gembox b/mrbgems/stdlib.gembox index 2b2f453346..fe168fe244 100644 --- a/mrbgems/stdlib.gembox +++ b/mrbgems/stdlib.gembox @@ -31,9 +31,15 @@ MRuby::GemBox.new do |conf| # Use Object class extension conf.gem :core => "mruby-object-ext" + # Use Regexp class + conf.gem :core => "mruby-regexp" + # Use ObjectSpace class conf.gem :core => "mruby-objectspace" + # Use Set class + conf.gem :core => "mruby-set" + # Use Fiber class conf.gem :core => "mruby-fiber" @@ -43,6 +49,9 @@ MRuby::GemBox.new do |conf| # Use Enumerator::Lazy class (require mruby-enumerator) conf.gem :core => "mruby-enum-lazy" + # Use Enumerator::Chain class (require mruby-enumerator) + conf.gem :core => "mruby-enum-chain" + # Use toplevel object (main) methods extension conf.gem :core => "mruby-toplevel-ext" @@ -52,6 +61,6 @@ MRuby::GemBox.new do |conf| # Use class/module extension conf.gem :core => "mruby-class-ext" - # Use errno extension - conf.gem :core => "mruby-errno" + # Use catch/throw methods + conf.gem :core => "mruby-catch" end diff --git a/mrblib/00class.rb b/mrblib/00class.rb deleted file mode 100644 index 5bef3f126e..0000000000 --- a/mrblib/00class.rb +++ /dev/null @@ -1,41 +0,0 @@ -class BasicObject - def !=(other) - if self == other - false - else - true - end - end -end - -class Module - # 15.2.2.4.12 - def attr_accessor(*names) - attr_reader(*names) - attr_writer(*names) - end - # 15.2.2.4.11 - alias attr attr_reader - #def attr(name) - # attr_reader(name) - #end - - # 15.2.2.4.27 - def include(*args) - args.reverse! - args.each do |m| - m.append_features(self) - m.included(self) - end - self - end - - def prepend(*args) - args.reverse! - args.each do |m| - m.prepend_features(self) - m.prepended(self) - end - self - end -end diff --git a/mrblib/00kernel.rb b/mrblib/00kernel.rb deleted file mode 100644 index 02f3a99810..0000000000 --- a/mrblib/00kernel.rb +++ /dev/null @@ -1,35 +0,0 @@ -module Kernel - ## - # call-seq: - # obj.extend(module, ...) -> obj - # - # Adds to _obj_ the instance methods from each module given as a - # parameter. - # - # module Mod - # def hello - # "Hello from Mod.\n" - # end - # end - # - # class Klass - # def hello - # "Hello from Klass.\n" - # end - # end - # - # k = Klass.new - # k.hello #=> "Hello from Klass.\n" - # k.extend(Mod) #=> # - # k.hello #=> "Hello from Mod.\n" - # - # ISO 15.3.1.3.13 - def extend(*args) - args.reverse! - args.each do |m| - m.extend_object(self) - m.extended(self) - end - self - end -end diff --git a/mrblib/10error.rb b/mrblib/10error.rb index 431f77809d..734e48b12f 100644 --- a/mrblib/10error.rb +++ b/mrblib/10error.rb @@ -1,43 +1,3 @@ -class Exception - ## - # call-seq: - # exception.message -> string - # - # Returns the result of invoking exception.to_s. - # Normally this returns the exception's message or name. - # - def message - to_s - end -end - -# ISO 15.2.24 -class ArgumentError < StandardError -end - -# ISO 15.2.25 -class LocalJumpError < StandardError -end - -# ISO 15.2.26 -class RangeError < StandardError -end - -class FloatDomainError < RangeError -end - -# ISO 15.2.26 -class RegexpError < StandardError -end - -# ISO 15.2.29 -class TypeError < StandardError -end - -# ISO 15.2.30 -class ZeroDivisionError < StandardError -end - # ISO 15.2.31 class NameError < StandardError attr_accessor :name @@ -54,23 +14,10 @@ class NoMethodError < NameError def initialize(message=nil, name=nil, args=nil) @args = args - super message, name + super(message, name) end end -# ISO 15.2.33 -class IndexError < StandardError -end - -class KeyError < IndexError -end - -class NotImplementedError < ScriptError -end - -class FrozenError < RuntimeError -end - class StopIteration < IndexError attr_accessor :result end diff --git a/mrblib/array.rb b/mrblib/array.rb index 657a7a5e4d..a7c0f84ccf 100644 --- a/mrblib/array.rb +++ b/mrblib/array.rb @@ -1,4 +1,3 @@ -# coding: utf-8 ## # Array # @@ -9,16 +8,16 @@ class Array # array.each {|element| ... } -> self # array.each -> Enumerator # - # Calls the given block for each element of +self+ + # Calls the given block for each element of `self` # and pass the respective element. # # ISO 15.2.12.5.10 def each(&block) - return to_enum :each unless block + return to_enum(:each) unless block idx = 0 while idx < length - block.call(self[idx]) + yield self[idx] idx += 1 end self @@ -29,16 +28,16 @@ def each(&block) # array.each_index {|index| ... } -> self # array.each_index -> Enumerator # - # Calls the given block for each element of +self+ + # Calls the given block for each element of `self` # and pass the index of the respective element. # # ISO 15.2.12.5.11 def each_index(&block) - return to_enum :each_index unless block + return to_enum(:each_index) unless block idx = 0 while idx < length - block.call(idx) + yield idx idx += 1 end self @@ -49,18 +48,18 @@ def each_index(&block) # array.collect! {|element| ... } -> self # array.collect! -> new_enumerator # - # Calls the given block for each element of +self+ + # Calls the given block for each element of `self` # and pass the respective element. Each element will # be replaced by the resulting values. # # ISO 15.2.12.5.7 def collect!(&block) - return to_enum :collect! unless block + return to_enum(:collect!) unless block idx = 0 len = size while idx < len - self[idx] = block.call self[idx] + self[idx] = block.call(self[idx]) idx += 1 end self @@ -76,247 +75,25 @@ def collect!(&block) # ISO 15.2.12.5.20 alias map! collect! - ## - # Private method for Array creation. - # - # ISO 15.2.12.5.15 - def initialize(size=0, obj=nil, &block) - if size.is_a?(Array) && obj==nil && block == nil - self.replace(size) - return self - end - size = size.__to_int - raise ArgumentError, "negative array size" if size < 0 - - self.clear - if size > 0 - self[size - 1] = nil # allocate - - idx = 0 - while idx < size - self[idx] = (block)? block.call(idx): obj - idx += 1 - end - end - - self - end - - def _inspect(recur_list) - size = self.size - return "[]" if size == 0 - return "[...]" if recur_list[self.object_id] - recur_list[self.object_id] = true - ary=[] - i=0 - while i true or false - # - # Equality---Two arrays are equal if they contain the same number - # of elements and if each element is equal to (according to - # Object.==) the corresponding element in the other array. - # - # ISO 15.2.12.5.33 (x) - def ==(other) - other = self.__ary_eq(other) - return false if other == false - return true if other == true - len = self.size - i = 0 - while i < len - return false if self[i] != other[i] - i += 1 - end - return true - end - - ## - # call-seq: - # array.eql? other_array -> true or false - # - # Returns true if +self+ and _other_ are the same object, - # or are both arrays with the same content. - # - # ISO 15.2.12.5.34 (x) - def eql?(other) - other = self.__ary_eq(other) - return false if other == false - return true if other == true - len = self.size - i = 0 - while i < len - return false unless self[i].eql?(other[i]) - i += 1 - end - return true - end - - ## - # call-seq: - # array <=> other_array -> -1, 0, or 1 - # - # Comparison---Returns an integer (-1, 0, or +1) - # if this array is less than, equal to, or greater than other_ary. - # Each object in each array is compared (using <=>). If any value isn't - # equal, then that inequality is the return value. If all the - # values found are equal, then the return is based on a - # comparison of the array lengths. Thus, two arrays are - # "equal" according to Array#<=> if and only if they have - # the same length and the value of each element is equal to the - # value of the corresponding element in the other array. - # - # ISO 15.2.12.5.36 (x) - def <=>(other) - other = self.__ary_cmp(other) - return 0 if 0 == other - return nil if nil == other - - len = self.size - n = other.size - len = n if len > n - i = 0 - begin - while i < len - n = (self[i] <=> other[i]) - return n if n.nil? || n != 0 - i += 1 - end - rescue NoMethodError - return nil - end - len = self.size - other.size - if len == 0 - 0 - elsif len > 0 - 1 - else - -1 - end - end - - ## - # call-seq: - # array.delete(obj) -> deleted_object - # array.delete(obj) {|nosuch| ... } -> deleted_object or block_return - # - # Delete element with index +key+ - def delete(key, &block) - while i = self.index(key) - self.delete_at(i) - ret = key - end - return block.call if ret.nil? && block - ret - end - - ## - # call-seq: - # array.sort! -> self - # array.sort! {|a, b| ... } -> self - # - # Sort all elements and replace +self+ with these - # elements. - def sort!(&block) - stack = [ [ 0, self.size - 1 ] ] - until stack.empty? - left, mid, right = stack.pop - if right == nil - right = mid - # sort self[left..right] - if left < right - if left + 1 == right - lval = self[left] - rval = self[right] - cmp = if block then block.call(lval,rval) else lval <=> rval end - if cmp.nil? - raise ArgumentError, "comparison of #{lval.inspect} and #{rval.inspect} failed" - end - if cmp > 0 - self[left] = rval - self[right] = lval - end - else - mid = ((left + right + 1) / 2).floor - stack.push [ left, mid, right ] - stack.push [ mid, right ] - stack.push [ left, (mid - 1) ] if left < mid - 1 - end - end - else - lary = self[left, mid - left] - lsize = lary.size - - # The entity sharing between lary and self may cause a large memory - # copy operation in the merge loop below. This harmless operation - # cancels the sharing and provides a huge performance gain. - lary[0] = lary[0] - - # merge - lidx = 0 - ridx = mid - (left..right).each { |i| - if lidx >= lsize - break - elsif ridx > right - self[i, lsize - lidx] = lary[lidx, lsize - lidx] - break - else - lval = lary[lidx] - rval = self[ridx] - cmp = if block then block.call(lval,rval) else lval <=> rval end - if cmp.nil? - raise ArgumentError, "comparison of #{lval.inspect} and #{rval.inspect} failed" - end - if cmp <= 0 - self[i] = lval - lidx += 1 - else - self[i] = rval - ridx += 1 - end - end - } - end - end - self - end - ## # call-seq: # array.sort -> new_array # array.sort {|a, b| ... } -> new_array # - # Returns a new Array whose elements are those from +self+, sorted. + # Returns a new Array whose elements are those from `self`, sorted. def sort(&block) self.dup.sort!(&block) end ## # call-seq: - # array.to_a -> self + # array.deconstruct -> self + # + # Returns self. Used for array pattern matching in case/in expressions. # - # Returns self, no need to convert. - def to_a + def deconstruct self end - alias entries to_a ## # Array is enumerable diff --git a/mrblib/compar.rb b/mrblib/compar.rb index b39d619f4e..4ca0b6748a 100644 --- a/mrblib/compar.rb +++ b/mrblib/compar.rb @@ -8,8 +8,8 @@ module Comparable # call-seq: # obj < other -> true or false # - # Return true if +self+ is less - # than +other+. Otherwise return + # Return true if `self` is less + # than `other`. Otherwise return # false. # # ISO 15.3.3.2.1 @@ -25,8 +25,8 @@ def < other # call-seq: # obj <= other -> true or false # - # Return true if +self+ is less - # than or equal to +other+. + # Return true if `self` is less + # than or equal to `other`. # Otherwise return false. # # ISO 15.3.3.2.2 @@ -42,22 +42,22 @@ def <= other # call-seq: # obj == other -> true or false # - # Return true if +self+ is equal - # to +other+. Otherwise return + # Return true if `self` is equal + # to `other`. Otherwise return # false. # # ISO 15.3.3.2.3 def == other cmp = self <=> other - cmp == 0 + cmp.equal?(0) end ## # call-seq: # obj > other -> true or false # - # Return true if +self+ is greater - # than +other+. Otherwise return + # Return true if `self` is greater + # than `other`. Otherwise return # false. # # ISO 15.3.3.2.4 @@ -73,8 +73,8 @@ def > other # call-seq: # obj >= other -> true or false # - # Return true if +self+ is greater - # than or equal to +other+. + # Return true if `self` is greater + # than or equal to `other`. # Otherwise return false. # # ISO 15.3.3.2.5 @@ -90,9 +90,9 @@ def >= other # call-seq: # obj.between?(min,max) -> true or false # - # Return true if +self+ is greater - # than or equal to +min+ and - # less than or equal to +max+. + # Return true if `self` is greater + # than or equal to `min` and + # less than or equal to `max`. # Otherwise return false. # # ISO 15.3.3.2.6 diff --git a/mrblib/enum.rb b/mrblib/enum.rb index 23b275ae24..6efa331ce0 100644 --- a/mrblib/enum.rb +++ b/mrblib/enum.rb @@ -1,7 +1,7 @@ ## # Enumerable # -# The Enumerable mixin provides collection classes with +# The `Enumerable` mixin provides collection classes with # several traversal and searching methods, and with the ability to # sort. The class must provide a method `each`, which # yields successive members of the collection. If @@ -17,65 +17,65 @@ module Enumerable ## # Call the given block for each element - # which is yield by +each+. Return false + # which is yield by `each`. Return false # if one block value is false. Otherwise # return true. If no block is given and - # +self+ is false return false. + # `self` is false return false. # # ISO 15.3.2.2.1 def all?(&block) if block - self.each{|*val| return false unless block.call(*val)} + self.each {|*val| return false unless block.call(*val)} else - self.each{|*val| return false unless val.__svalue} + self.each {|*val| return false unless val.__svalue} end true end ## # Call the given block for each element - # which is yield by +each+. Return true + # which is yield by `each`. Return true # if one block value is true. Otherwise # return false. If no block is given and - # +self+ is true object return true. + # `self` is true object return true. # # ISO 15.3.2.2.2 def any?(&block) if block - self.each{|*val| return true if block.call(*val)} + self.each {|*val| return true if block.call(*val)} else - self.each{|*val| return true if val.__svalue} + self.each {|*val| return true if val.__svalue} end false end ## # Call the given block for each element - # which is yield by +each+. Append all + # which is yield by `each`. Append all # values of each block together and # return this value. # # ISO 15.3.2.2.3 def collect(&block) - return to_enum :collect unless block + return to_enum(:collect) unless block ary = [] - self.each{|*val| ary.push(block.call(*val))} + self.each {|*val| ary.push(block.call(*val))} ary end ## # Return the first element for which # value from the block is true. If no - # object matches, calls +ifnone+ and + # object matches, calls `ifnone` and # returns its result. Otherwise returns - # +nil+. + # `nil`. # # ISO 15.3.2.2.4 def detect(ifnone=nil, &block) - return to_enum :detect, ifnone unless block + return to_enum(:detect, ifnone) unless block - self.each{|*val| + self.each {|*val| if block.call(*val) return val.__svalue end @@ -85,16 +85,16 @@ def detect(ifnone=nil, &block) ## # Call the given block for each element - # which is yield by +each+. Pass an + # which is yield by `each`. Pass an # index to the block which starts at 0 # and increase by 1 for each element. # # ISO 15.3.2.2.5 def each_with_index(&block) - return to_enum :each_with_index unless block + return to_enum(:each_with_index) unless block i = 0 - self.each{|*val| + self.each {|*val| block.call(val.__svalue, i) i += 1 } @@ -103,12 +103,12 @@ def each_with_index(&block) ## # Return an array of all elements which - # are yield by +each+. + # are yield by `each`. # # ISO 15.3.2.2.6 def entries ary = [] - self.each{|*val| + self.each {|*val| # __svalue is an internal method ary.push val.__svalue } @@ -123,16 +123,16 @@ def entries ## # Call the given block for each element - # which is yield by +each+. Return an array + # which is yield by `each`. Return an array # which contains all elements whose block # value was true. # # ISO 15.3.2.2.8 def find_all(&block) - return to_enum :find_all unless block + return to_enum(:find_all) unless block ary = [] - self.each{|*val| + self.each {|*val| ary.push(val.__svalue) if block.call(*val) } ary @@ -140,15 +140,15 @@ def find_all(&block) ## # Call the given block for each element - # which is yield by +each+ and which return + # which is yield by `each` and which return # value was true when invoking === with - # +pattern+. Return an array with all + # `pattern`. Return an array with all # elements or the respective block values. # # ISO 15.3.2.2.9 def grep(pattern, &block) ary = [] - self.each{|*val| + self.each {|*val| sv = val.__svalue if pattern === sv ary.push((block)? block.call(*val): sv) @@ -159,13 +159,13 @@ def grep(pattern, &block) ## # Return true if at least one element which - # is yield by +each+ returns a true value - # by invoking == with +obj+. Otherwise return + # is yield by `each` returns a true value + # by invoking == with `obj`. Otherwise return # false. # # ISO 15.3.2.2.10 def include?(obj) - self.each{|*val| + self.each {|*val| return true if val.__svalue == obj } false @@ -173,7 +173,7 @@ def include?(obj) ## # Call the given block for each element - # which is yield by +each+. Return value + # which is yield by `each`. Return value # is the sum of all block values. Pass # to each block the current sum and the # current element. @@ -193,7 +193,7 @@ def inject(*args, &block) flag = false result = args[0] end - self.each{|*val| + self.each {|*val| val = val.__svalue if flag # push first element as initial @@ -215,7 +215,7 @@ def inject(*args, &block) ## # Return the maximum value of all elements - # yield by +each+. If no block is given <=> + # yield by `each`. If no block is given <=> # will be invoked to define this value. If # a block is given it will be used instead. # @@ -223,7 +223,7 @@ def inject(*args, &block) def max(&block) flag = true # 1st element? result = nil - self.each{|*val| + self.each {|*val| val = val.__svalue if flag # 1st element @@ -242,7 +242,7 @@ def max(&block) ## # Return the minimum value of all elements - # yield by +each+. If no block is given <=> + # yield by `each`. If no block is given <=> # will be invoked to define this value. If # a block is given it will be used instead. # @@ -250,7 +250,7 @@ def max(&block) def min(&block) flag = true # 1st element? result = nil - self.each{|*val| + self.each {|*val| val = val.__svalue if flag # 1st element @@ -275,7 +275,7 @@ def min(&block) ## # Call the given block for each element - # which is yield by +each+. Return an + # which is yield by `each`. Return an # array which contains two arrays. The # first array contains all elements # whose block value was true. The second @@ -284,11 +284,11 @@ def min(&block) # # ISO 15.3.2.2.16 def partition(&block) - return to_enum :partition unless block + return to_enum(:partition) unless block ary_T = [] ary_F = [] - self.each{|*val| + self.each {|*val| if block.call(*val) ary_T.push(val.__svalue) else @@ -300,16 +300,16 @@ def partition(&block) ## # Call the given block for each element - # which is yield by +each+. Return an + # which is yield by `each`. Return an # array which contains only the elements # whose block value was false. # # ISO 15.3.2.2.17 def reject(&block) - return to_enum :reject unless block + return to_enum(:reject) unless block ary = [] - self.each{|*val| + self.each {|*val| ary.push(val.__svalue) unless block.call(*val) } ary @@ -323,7 +323,7 @@ def reject(&block) ## # Return a sorted array of all elements - # which are yield by +each+. If no block + # which are yield by `each`. If no block # is given <=> will be invoked on each # element to define the order. Otherwise # the given block will be used for @@ -331,7 +331,7 @@ def reject(&block) # # ISO 15.3.2.2.19 def sort(&block) - self.map{|*val| val.__svalue}.sort(&block) + self.map {|*val| val.__svalue}.sort(&block) end ## @@ -342,6 +342,11 @@ def sort(&block) # redefine #hash 15.3.1.3.15 def hash + if __method_recursive?(:hash) + # Recursion detected, return a fixed value to break the loop + return 0 + end + h = 12347 i = 0 self.each do |e| diff --git a/mrblib/hash.rb b/mrblib/hash.rb index e57c182123..00debf9fb5 100644 --- a/mrblib/hash.rb +++ b/mrblib/hash.rb @@ -9,57 +9,15 @@ class Hash # ISO 15.2.13.3 include Enumerable - ## - # call-seq: - # hash == object -> true or false - # - # Equality---Two hashes are equal if they each contain the same number - # of keys and if each key-value pair is equal to (according to - # Object#==) the corresponding elements in the other - # hash. - # - # ISO 15.2.13.4.1 - def ==(hash) - return true if self.equal?(hash) - unless Hash === hash - return false - end - return false if self.size != hash.size - self.each do |k,v| - return false unless hash.key?(k) - return false unless self[k] == hash[k] - end - return true - end - ## - # call-seq: - # hash.eql? object -> true or false - # - # Returns true if hash and other are - # both hashes with the same content compared by eql?. - # - # ISO 15.2.13.4.32 (x) - def eql?(hash) - return true if self.equal?(hash) - unless Hash === hash - return false - end - return false if self.size != hash.size - self.each do |k,v| - return false unless hash.key?(k) - return false unless self[k].eql?(hash[k]) - end - return true - end ## # call-seq: # hash.delete(key) -> value or nil # hash.delete(key) {|key| ... } -> object # - # Delete the element with the key +key+. - # Return the value of the element if +key+ + # Delete the element with the key `key`. + # Return the value of the element if `key` # was found. Return nil if nothing was # found. If a block is given, call the # block with the value of the element. @@ -79,7 +37,7 @@ def delete(key, &block) # hsh.each -> an_enumerator # hsh.each_pair -> an_enumerator # - # Calls the given block for each element of +self+ + # Calls the given block for each element of `self` # and pass the key and value of each element. # # If no block is given, an enumerator is returned instead. @@ -94,14 +52,14 @@ def delete(key, &block) # # ISO 15.2.13.4.9 def each(&block) - return to_enum :each unless block + return to_enum(:each) unless block keys = self.keys vals = self.values len = self.size i = 0 while i < len - block.call [keys[i], vals[i]] + block.call([keys[i], vals[i]]) i += 1 end self @@ -112,7 +70,7 @@ def each(&block) # hsh.each_key {| key | block } -> hsh # hsh.each_key -> an_enumerator # - # Calls the given block for each element of +self+ + # Calls the given block for each element of `self` # and pass the key of each element. # # If no block is given, an enumerator is returned instead. @@ -127,9 +85,9 @@ def each(&block) # # ISO 15.2.13.4.10 def each_key(&block) - return to_enum :each_key unless block + return to_enum(:each_key) unless block - self.keys.each{|k| block.call(k)} + self.keys.each {|k| block.call(k)} self end @@ -138,7 +96,7 @@ def each_key(&block) # hsh.each_value {| value | block } -> self # hsh.each_value -> an_enumerator # - # Calls the given block with each value; returns +self+: + # Calls the given block with each value; returns `self`: # # If no block is given, an enumerator is returned instead. # @@ -152,9 +110,9 @@ def each_key(&block) # # ISO 15.2.13.4.11 def each_value(&block) - return to_enum :each_value unless block + return to_enum(:each_value) unless block - self.values.each{|v| block.call(v)} + self.values.each {|v| block.call(v)} self end @@ -163,11 +121,11 @@ def each_value(&block) # hsh.merge(other_hash..) -> hsh # hsh.merge(other_hash..){|key, oldval, newval| block} -> hsh # - # Returns the new \Hash formed by merging each of +other_hashes+ - # into a copy of +self+. + # Returns the new \Hash formed by merging each of `other_hashes` + # into a copy of `self`. # - # Each argument in +other_hashes+ must be a \Hash. - # Adds the contents of _other_hash_ to _hsh_. If no block is specified, + # Each argument in `other_hashes` must be a \Hash. + # Adds the contents of _other_hash_ to _hsh_. If no block is specified, # entries with duplicate keys are overwritten with the values from # _other_hash_, otherwise the value of each duplicate key is determined by # calling the block with the key, its value in _hsh_ and its value in @@ -182,67 +140,41 @@ def each_value(&block) # # ISO 15.2.13.4.22 def merge(*others, &block) - i=0; len=others.size h = self.dup return h.__merge(*others) unless block + i=0; len=others.size while i" + vals[i]._inspect(recur_list)) - i+=1 - end - "{"+ary.join(", ")+"}" - end - ## - # Return the contents of this hash as a string. - # - # ISO 15.2.13.4.30 (x) - def inspect - self._inspect({}) - end - # ISO 15.2.13.4.31 (x) - alias to_s inspect - ## # call-seq: # hsh.reject! {| key, value | block } -> hsh or nil # hsh.reject! -> an_enumerator # - # Equivalent to Hash#delete_if, but returns - # nil if no changes were made. + # Equivalent to `Hash#delete_if`, but returns + # `nil` if no changes were made. # # 1.8/1.9 Hash#reject! returns Hash; ISO says nothing. # def reject!(&block) - return to_enum :reject! unless block + return to_enum(:reject!) unless block keys = [] - self.each{|k,v| + self.each {|k,v| if block.call([k, v]) keys.push(k) end } return nil if keys.size == 0 - keys.each{|k| + keys.each {|k| self.delete(k) } self @@ -264,10 +196,10 @@ def reject!(&block) # 1.8/1.9 Hash#reject returns Hash; ISO says nothing. # def reject(&block) - return to_enum :reject unless block + return to_enum(:reject) unless block h = {} - self.each{|k,v| + self.each {|k,v| unless block.call([k, v]) h[k] = v end @@ -280,22 +212,22 @@ def reject(&block) # hsh.select! {| key, value | block } -> hsh or nil # hsh.select! -> an_enumerator # - # Equivalent to Hash#keep_if, but returns - # nil if no changes were made. + # Equivalent to `Hash#keep_if`, but returns + # `nil` if no changes were made. # # 1.9 Hash#select! returns Hash; ISO says nothing. # def select!(&block) - return to_enum :select! unless block + return to_enum(:select!) unless block keys = [] - self.each{|k,v| + self.each {|k,v| unless block.call([k, v]) keys.push(k) end } return nil if keys.size == 0 - keys.each{|k| + keys.each {|k| self.delete(k) } self @@ -317,14 +249,28 @@ def select!(&block) # 1.9 Hash#select returns Hash; ISO says nothing # def select(&block) - return to_enum :select unless block + return to_enum(:select) unless block h = {} - self.each{|k,v| + self.each {|k,v| if block.call([k, v]) h[k] = v end } h end + + ## + # call-seq: + # hash.deconstruct_keys(keys) -> hash + # + # Returns +self+. This method is called by pattern matching to + # deconstruct the hash for matching. + # + # The +keys+ argument is an array of keys the pattern expects, + # or +nil+ for exact matching (when +**nil+ is used). + # + def deconstruct_keys(_keys) + self + end end diff --git a/mrblib/kernel.rb b/mrblib/kernel.rb index 7c3ea94209..52c3e4931e 100644 --- a/mrblib/kernel.rb +++ b/mrblib/kernel.rb @@ -18,14 +18,14 @@ def `(s) ## # ISO 15.3.1.2.8 Kernel.loop - # provided by Kernel#loop + # not provided by mruby ## # Calls the given block repetitively. # # ISO 15.3.1.3.29 - def loop(&block) - return to_enum :loop unless block + private def loop(&block) + return to_enum(:loop) unless block while true yield @@ -39,11 +39,6 @@ def !~(y) !(self =~ y) end - # internal method for inspect - def _inspect(_recur_list) - self.inspect - end - def to_enum(*a) raise NotImplementedError.new("fiber required for enumerator") end diff --git a/mrblib/numeric.rb b/mrblib/numeric.rb index 67e76ece28..b25513ef44 100644 --- a/mrblib/numeric.rb +++ b/mrblib/numeric.rb @@ -41,7 +41,7 @@ def abs class Integer ## # Calls the given block once for each Integer - # from +self+ downto +num+. + # from `self` downto `num`. # # ISO 15.2.8.3.15 def downto(num, &block) @@ -49,7 +49,7 @@ def downto(num, &block) i = self.to_i while i >= num - block.call(i) + yield i i -= 1 end self @@ -66,15 +66,15 @@ def next alias succ next ## - # Calls the given block +self+ times. + # Calls the given block `self` times. # # ISO 15.2.8.3.22 - def times &block - return to_enum :times unless block + def times(&block) + return to_enum(:times) unless block i = 0 while i < self - block.call i + yield i i += 1 end self @@ -82,7 +82,7 @@ def times &block ## # Calls the given block once for each Integer - # from +self+ upto +num+. + # from `self` upto `num`. # # ISO 15.2.8.3.27 def upto(num, &block) @@ -90,21 +90,21 @@ def upto(num, &block) i = self.to_i while i <= num - block.call(i) + yield i i += 1 end self end ## - # Calls the given block from +self+ to +num+ - # incremented by +step+ (default 1). + # Calls the given block from `self` to `num` + # incremented by `step` (default 1). # def step(num=nil, step=1, &block) raise ArgumentError, "step can't be 0" if step == 0 return to_enum(:step, num, step) unless block - i = __coerce_step_counter(num, step) + i = __coerce_step_counter(step) if num == self || step.infinite? block.call(i) if step > 0 && i <= (num||i) || step < 0 && i >= (num||-i) elsif num == nil @@ -129,8 +129,8 @@ def step(num=nil, step=1, &block) class Float ## - # Calls the given block from +self+ to +num+ - # incremented by +step+ (default 1). + # Calls the given block from `self` to `num` + # incremented by `step` (default 1). # def step(num=nil, step=1, &block) raise ArgumentError, "step can't be 0" if step == 0 diff --git a/mrblib/range.rb b/mrblib/range.rb index 4a8e10f300..b9757affd3 100644 --- a/mrblib/range.rb +++ b/mrblib/range.rb @@ -10,12 +10,12 @@ class Range include Enumerable ## - # Calls the given block for each element of +self+ + # Calls the given block for each element of `self` # and pass the respective element. # # ISO 15.2.14.4.4 def each(&block) - return to_enum :each unless block + return to_enum(:each) unless block val = self.begin last = self.end @@ -74,7 +74,8 @@ def each(&block) # redefine #hash 15.3.1.3.15 def hash - h = first.hash ^ last.hash + # Use self.begin/self.end instead of first/last to handle endless/beginless ranges + h = self.begin.hash ^ self.end.hash h += 1 if self.exclude_end? h end diff --git a/mrblib/string.rb b/mrblib/string.rb index 51be9b7dde..dfbbbd2558 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -43,9 +43,9 @@ def each_line(separator = "\n", &block) end ## - # Replace all matches of +pattern+ with +replacement+. + # Replace all matches of `pattern` with `replacement`. # Call block (if given) for each match and replace - # +pattern+ with the value of the block. Return the + # `pattern` with the value of the block. Return the # final value. # # ISO 15.2.10.5.18 @@ -78,10 +78,10 @@ def gsub(*args, &block) end ## - # Replace all matches of +pattern+ with +replacement+. + # Replace all matches of `pattern` with `replacement`. # Call block (if given) for each match and replace - # +pattern+ with the value of the block. Modify - # +self+ with the final value. + # `pattern` with the value of the block. Modify + # `self` with the final value. # # ISO 15.2.10.5.19 def gsub!(*args, &block) @@ -93,9 +93,9 @@ def gsub!(*args, &block) end # ## -# # Calls the given block for each match of +pattern+ +# # Calls the given block for each match of `pattern` # # If no block is given return an array with all -# # matches of +pattern+. +# # matches of `pattern`. # # # # ISO 15.2.10.5.32 # def scan(pattern, &block) @@ -103,9 +103,9 @@ def gsub!(*args, &block) # end ## - # Replace only the first match of +pattern+ with - # +replacement+. Call block (if given) for each - # match and replace +pattern+ with the value of the + # Replace only the first match of `pattern` with + # `replacement`. Call block (if given) for each + # match and replace `pattern` with the value of the # block. Return the final value. # # ISO 15.2.10.5.36 @@ -133,10 +133,10 @@ def sub(*args, &block) end ## - # Replace only the first match of +pattern+ with - # +replacement+. Call block (if given) for each - # match and replace +pattern+ with the value of the - # block. Modify +self+ with the final value. + # Replace only the first match of `pattern` with + # `replacement`. Call block (if given) for each + # match and replace `pattern` with the value of the + # block. Modify `self` with the final value. # # ISO 15.2.10.5.37 def sub!(*args, &block) @@ -147,7 +147,7 @@ def sub!(*args, &block) end ## - # Call the given block for each byte of +self+. + # Call the given block for each byte of `self`. def each_byte(&block) return to_enum(:each_byte, &block) unless block pos = 0 diff --git a/mrblib/symbol.rb b/mrblib/symbol.rb index 827e2c1e0e..17d7529063 100644 --- a/mrblib/symbol.rb +++ b/mrblib/symbol.rb @@ -1,7 +1,8 @@ class Symbol def to_proc + mid = self ->(obj,*args,**opts,&block) do - obj.__send__(self, *args, **opts, &block) + obj.__send__(mid, *args, **opts, &block) end end end diff --git a/mruby-source.gemspec b/mruby-source.gemspec index 9717ad5f2b..ee284474cb 100644 --- a/mruby-source.gemspec +++ b/mruby-source.gemspec @@ -1,4 +1,3 @@ -# coding: utf-8 lib = File.expand_path('../lib', __FILE__) $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) require 'mruby/source' @@ -6,7 +5,7 @@ require 'mruby/source' Gem::Specification.new do |spec| spec.name = "mruby-source" spec.version = MRuby::Source::MRUBY_VERSION - spec.authors = [ MRuby::Source::MRUBY_AUTHOR ] + spec.authors = [ "mruby developers" ] spec.summary = %q{mruby source code wrapper.} spec.description = %q{mruby source code wrapper for use with Ruby libs.} diff --git a/oss-fuzz/config/mruby.dict b/oss-fuzz/config/mruby.dict index a332d3505c..7780247b36 100644 --- a/oss-fuzz/config/mruby.dict +++ b/oss-fuzz/config/mruby.dict @@ -103,3 +103,26 @@ snippet_multi=" 1*1" string_single_q=" 'a'" string_dbl_q=" \"a\"" +regex_any="." +regex_start="^" +regex_end="$" +regex_star="*" +regex_plus="+" +regex_maybe="?" +regex_alt="|" +regex_group_start="(" +regex_group_end=")" +regex_class_start="[" +regex_class_end="]" +regex_digit="\\d" +regex_word="\\w" +regex_space="\\s" +regex_non_digit="\\D" +regex_non_word="\\W" +regex_non_space="\\S" +regex_named="(?)" +regex_atomic="(?>)" +regex_lookahead="(?=)" +regex_neg_lookahead="(?!)" +regex_lookbehind="(?<=)" +regex_neg_lookbehind="(? +#include +#include +#include + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t size) { + if (size < 1) { + return 0; + } + mrb_state *mrb = mrb_open(); + if (!mrb) { + return 0; + } + + /* mrb_load_irep_buf returns the result of the last expression */ + mrb_load_irep_buf(mrb, Data, size); + + mrb_close(mrb); + return 0; +} diff --git a/oss-fuzz/mruby_misc_fuzzer.c b/oss-fuzz/mruby_misc_fuzzer.c new file mode 100644 index 0000000000..7a22e57316 --- /dev/null +++ b/oss-fuzz/mruby_misc_fuzzer.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include +#include +#include + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t size) { + if (size < 2) { + return 0; + } + mrb_state *mrb = mrb_open(); + if (!mrb) { + return 0; + } + + uint8_t selector = Data[0] % 4; + const uint8_t *D = Data + 1; + size_t S = size - 1; + + if (selector == 0) { + /* Target Set */ + struct RClass *set_class_ptr = mrb_class_get(mrb, "Set"); + if (set_class_ptr) { + mrb_value set_class = mrb_obj_value(set_class_ptr); + mrb_value ary = mrb_ary_new(mrb); + for (size_t i = 0; i < S / 4 && i < 10; i++) { + mrb_ary_push(mrb, ary, mrb_str_new(mrb, (const char *)(D + i*4), 4)); + } + mrb_funcall(mrb, set_class, "new", 1, ary); + } + } else if (selector == 1) { + /* Target Time */ + struct RClass *time_class_ptr = mrb_class_get(mrb, "Time"); + if (time_class_ptr) { + mrb_value time_class = mrb_obj_value(time_class_ptr); + if (S >= 8) { + mrb_int sec = (mrb_int)D[0] | ((mrb_int)D[1] << 8) | ((mrb_int)D[2] << 16) | ((mrb_int)D[3] << 24); + mrb_funcall(mrb, time_class, "at", 1, mrb_fixnum_value(sec)); + } + mrb_funcall(mrb, time_class, "now", 0); + } + } else if (selector == 2) { + /* Target Random */ + struct RClass *random_class_ptr = mrb_class_get(mrb, "Random"); + if (random_class_ptr) { + mrb_value random_class = mrb_obj_value(random_class_ptr); + if (S >= 4) { + mrb_int seed = (mrb_int)D[0] | ((mrb_int)D[1] << 8); + mrb_value rnd = mrb_funcall(mrb, random_class, "new", 1, mrb_fixnum_value(seed)); + mrb_funcall(mrb, rnd, "rand", 1, mrb_fixnum_value(100)); + } + } + } else if (selector == 3) { + /* Target Range and Array-ext */ + if (S >= 4) { + mrb_int start = (mrb_int)D[0]; + mrb_int end = (mrb_int)D[1]; + mrb_value range = mrb_funcall(mrb, mrb_obj_value(mrb->kernel_module), "Range", 2, mrb_fixnum_value(start), mrb_fixnum_value(end)); + mrb_funcall(mrb, range, "to_a", 0); + } + } + + mrb_close(mrb); + return 0; +} diff --git a/oss-fuzz/mruby_numeric_fuzzer.c b/oss-fuzz/mruby_numeric_fuzzer.c new file mode 100644 index 0000000000..15ad4a9c19 --- /dev/null +++ b/oss-fuzz/mruby_numeric_fuzzer.c @@ -0,0 +1,93 @@ +#include +#include +#include +#include +#include +#include + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t size) { + if (size < 2) { + return 0; + } + mrb_state *mrb = mrb_open(); + if (!mrb) { + return 0; + } + + uint8_t selector = Data[0] % 4; + const uint8_t *D = Data + 1; + size_t S = size - 1; + + if (selector == 0) { + /* Integer / BigInt */ + if (S < 3) goto done; + uint8_t base = D[0] % 37; + if (base < 2 && base != 0) base = 10; + uint8_t op = D[1] % 5; + uint16_t split = D[2]; // Simplified split + if (S > 3) split = split % (S - 3); + else split = 0; + + mrb_value s1 = mrb_str_new(mrb, (const char *)(D + 3), split); + mrb_value s2 = mrb_str_new(mrb, (const char *)(D + 3 + split), S - 3 - split); + + mrb_value b1 = mrb_funcall(mrb, mrb_obj_value(mrb->kernel_module), "Integer", 2, s1, mrb_fixnum_value(base)); + mrb_value b2 = mrb_funcall(mrb, mrb_obj_value(mrb->kernel_module), "Integer", 2, s2, mrb_fixnum_value(base)); + + if (!mrb_nil_p(b1) && !mrb_nil_p(b2)) { + switch (op) { + case 0: mrb_funcall(mrb, b1, "+", 1, b2); break; + case 1: mrb_funcall(mrb, b1, "-", 1, b2); break; + case 2: mrb_funcall(mrb, b1, "*", 1, b2); break; + case 3: + if (mrb_test(mrb_funcall(mrb, b2, "!=", 1, mrb_fixnum_value(0)))) { + mrb_funcall(mrb, b1, "/", 1, b2); + } + break; + case 4: + if (mrb_test(mrb_funcall(mrb, b2, "<", 1, mrb_fixnum_value(1000)))) { + mrb_funcall(mrb, b1, "**", 1, b2); + } + break; + } + } + } else if (selector == 1) { + /* Rational */ + if (S < 4) goto done; + mrb_int n = (mrb_int)D[0] | ((mrb_int)D[1] << 8); + mrb_int d = (mrb_int)D[2] | ((mrb_int)D[3] << 8); + mrb_value r = mrb_funcall(mrb, mrb_obj_value(mrb->kernel_module), "Rational", 2, mrb_fixnum_value(n), mrb_fixnum_value(d)); + if (S >= 8) { + mrb_int n2 = (mrb_int)D[4] | ((mrb_int)D[5] << 8); + mrb_int d2 = (mrb_int)D[6] | ((mrb_int)D[7] << 8); + mrb_value r2 = mrb_funcall(mrb, mrb_obj_value(mrb->kernel_module), "Rational", 2, mrb_fixnum_value(n2), mrb_fixnum_value(d2)); + mrb_funcall(mrb, r, "+", 1, r2); + } + } else if (selector == 2) { + /* Complex */ + if (S < 4) goto done; + mrb_float re = (mrb_float)D[0]; + mrb_float im = (mrb_float)D[1]; + struct RClass *complex_class = mrb_class_get(mrb, "Complex"); + if (complex_class) { + mrb_value c = mrb_funcall(mrb, mrb_obj_value(mrb->kernel_module), "Complex", 2, mrb_float_value(mrb, re), mrb_float_value(mrb, im)); + mrb_funcall(mrb, c, "abs", 0); + } + } else if (selector == 3) { + /* Math functions */ + if (S < 8) goto done; + double d1; + memcpy(&d1, D, sizeof(double)); + struct RClass *math_module = mrb_module_get(mrb, "Math"); + if (math_module) { + mrb_value m = mrb_obj_value(math_module); + mrb_funcall(mrb, m, "sin", 1, mrb_float_value(mrb, d1)); + mrb_funcall(mrb, m, "log", 1, mrb_float_value(mrb, d1)); + mrb_funcall(mrb, m, "sqrt", 1, mrb_float_value(mrb, d1)); + } + } + +done: + mrb_close(mrb); + return 0; +} diff --git a/oss-fuzz/mruby_pack_fuzzer.c b/oss-fuzz/mruby_pack_fuzzer.c new file mode 100644 index 0000000000..c2f578ab71 --- /dev/null +++ b/oss-fuzz/mruby_pack_fuzzer.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t size) { + if (size < 2) { + return 0; + } + mrb_state *mrb = mrb_open(); + if (!mrb) { + return 0; + } + + uint8_t fmt_len = Data[0]; + if (fmt_len > size - 1) { + fmt_len = size - 1; + } + + mrb_value fmt = mrb_str_new(mrb, (const char *)(Data + 1), fmt_len); + mrb_value str = mrb_str_new(mrb, (const char *)(Data + 1 + fmt_len), size - 1 - fmt_len); + + /* Target String#unpack */ + mrb_funcall(mrb, str, "unpack", 1, fmt); + + /* Target Array#pack (using the result of unpack if it's an array) */ + /* Or just pack the original string as an array of bytes */ + mrb_value ary = mrb_ary_new_capa(mrb, size); + for (size_t i = 0; i < size; i++) { + mrb_ary_push(mrb, ary, mrb_fixnum_value(Data[i])); + } + mrb_funcall(mrb, ary, "pack", 1, fmt); + + mrb_close(mrb); + return 0; +} diff --git a/oss-fuzz/mruby_proto_fuzzer.cpp b/oss-fuzz/mruby_proto_fuzzer.cpp index d9925bf9d9..2d56b79c0f 100644 --- a/oss-fuzz/mruby_proto_fuzzer.cpp +++ b/oss-fuzz/mruby_proto_fuzzer.cpp @@ -13,33 +13,33 @@ using namespace ruby_fuzzer; using namespace std; int FuzzRB(const uint8_t *Data, size_t size) { - mrb_value v; - mrb_state *mrb = mrb_open(); - if (!mrb) - return 0; - - char *code = (char *)malloc(size+1); - if (!code) - return 0; - memcpy(code, Data, size); - code[size] = '\0'; - - if (const char *dump_path = getenv("PROTO_FUZZER_DUMP_PATH")) { - // With libFuzzer binary run this to generate an RB file x.rb: - // PROTO_FUZZER_DUMP_PATH=x.rb ./a.out proto-input - std::ofstream of(dump_path); - of.write(code, size); - } - std::cout << "\n\n############\n" << code << "\n############\n\n"; - v = mrb_load_string(mrb, code); - mrb_close(mrb); - - free(code); - return 0; + mrb_value v; + mrb_state *mrb = mrb_open(); + if (!mrb) + return 0; + + char *code = (char*)malloc(size+1); + if (!code) + return 0; + memcpy(code, Data, size); + code[size] = '\0'; + + if (const char *dump_path = getenv("PROTO_FUZZER_DUMP_PATH")) { + // With libFuzzer binary run this to generate an RB file x.rb: + // PROTO_FUZZER_DUMP_PATH=x.rb ./a.out proto-input + std::ofstream of(dump_path); + of.write(code, size); + } + std::cout << "\n\n############\n" << code << "\n############\n\n"; + v = mrb_load_string(mrb, code); + mrb_close(mrb); + + free(code); + return 0; } DEFINE_PROTO_FUZZER(const Function &function) { - protoConverter converter; - auto s = converter.FunctionToString(function); - (void)FuzzRB((const uint8_t*)s.data(), s.size()); + protoConverter converter; + auto s = converter.FunctionToString(function); + (void)FuzzRB((const uint8_t*)s.data(), s.size()); } diff --git a/oss-fuzz/mruby_regexp_fuzzer.c b/oss-fuzz/mruby_regexp_fuzzer.c new file mode 100644 index 0000000000..c7faf8f076 --- /dev/null +++ b/oss-fuzz/mruby_regexp_fuzzer.c @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t size) { + if (size < 3) { + return 0; + } + mrb_state *mrb = mrb_open(); + if (!mrb) { + return 0; + } + + /* Use first byte for flags */ + uint8_t flags_byte = Data[0]; + mrb_value flags = mrb_fixnum_value(flags_byte & 0x07); // i, m, x flags + + /* Split remaining data into pattern and string */ + uint8_t pattern_len = Data[1]; + if (pattern_len > size - 2) { + pattern_len = size - 2; + } + + mrb_value pattern = mrb_str_new(mrb, (const char *)(Data + 2), pattern_len); + mrb_value text = mrb_str_new(mrb, (const char *)(Data + 2 + pattern_len), size - 2 - pattern_len); + + /* Target Regexp.new(pattern, flags) */ + struct RClass *regexp_class_ptr = mrb_class_get(mrb, "Regexp"); + if (regexp_class_ptr) { + mrb_value regexp_class = mrb_obj_value(regexp_class_ptr); + mrb_value regexp = mrb_funcall(mrb, regexp_class, "new", 2, pattern, flags); + + /* Target Regexp#match(text) */ + if (!mrb_nil_p(regexp)) { + mrb_funcall(mrb, regexp, "match", 1, text); + } + } + + mrb_close(mrb); + return 0; +} diff --git a/oss-fuzz/mruby_sprintf_fuzzer.c b/oss-fuzz/mruby_sprintf_fuzzer.c new file mode 100644 index 0000000000..53df2c1a38 --- /dev/null +++ b/oss-fuzz/mruby_sprintf_fuzzer.c @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t size) { + if (size < 1) { + return 0; + } + mrb_state *mrb = mrb_open(); + if (!mrb) { + return 0; + } + + /* Use the first byte as a format string length */ + uint8_t fmt_len = Data[0]; + if (fmt_len > size - 1) { + fmt_len = size - 1; + } + + mrb_value fmt = mrb_str_new(mrb, (const char *)(Data + 1), fmt_len); + + /* provide some arguments of different types to satisfy various format specifiers */ + mrb_value args[5]; + args[0] = mrb_fixnum_value(123); + args[1] = mrb_float_value(mrb, 3.14); + args[2] = mrb_str_new_cstr(mrb, "fuzz"); + args[3] = mrb_symbol_value(mrb_intern_cstr(mrb, "symbol")); + args[4] = mrb_ary_new(mrb); + + /* Call sprintf via mrb_funcall */ + /* We don't use all args every time, but it doesn't hurt to pass them */ + mrb_funcall(mrb, mrb_obj_value(mrb->kernel_module), "sprintf", 6, fmt, args[0], args[1], args[2], args[3], args[4]); + + mrb_close(mrb); + return 0; +} diff --git a/oss-fuzz/proto_to_ruby.cpp b/oss-fuzz/proto_to_ruby.cpp index 92ad039e22..d5cc244f19 100644 --- a/oss-fuzz/proto_to_ruby.cpp +++ b/oss-fuzz/proto_to_ruby.cpp @@ -4,452 +4,452 @@ using namespace ruby_fuzzer; std::string protoConverter::removeSpecial(const std::string &x) { - std::string tmp(x); - if (!tmp.empty()) - tmp.erase(std::remove_if(tmp.begin(), tmp.end(), - [](char c) { return !(std::isalpha(c) || std::isdigit(c)); } ), tmp.end()); - return tmp; + std::string tmp(x); + if (!tmp.empty()) + tmp.erase(std::remove_if(tmp.begin(), tmp.end(), + [](char c) { return !(std::isalpha(c) || std::isdigit(c)); } ), tmp.end()); + return tmp; } void protoConverter::visit(ArrType const& x) { - if (x.elements_size() > 0) { - int i = x.elements_size(); - m_output << "["; - for (auto &e : x.elements()) { - i--; - if (i == 0) { - visit(e); - } else { - visit(e); - m_output << ", "; - } - } - m_output << "]"; - } else { - m_output << "[1]"; - } + if (x.elements_size() > 0) { + int i = x.elements_size(); + m_output << "["; + for (auto &e : x.elements()) { + i--; + if (i == 0) { + visit(e); + } else { + visit(e); + m_output << ", "; + } + } + m_output << "]"; + } else { + m_output << "[1]"; + } } void protoConverter::visit(Array const& x) { - switch (x.arr_func()) { - case Array::FLATTEN: - visit(x.arr_arg()); - m_output << ".flatten"; - break; - case Array::COMPACT: - visit(x.arr_arg()); - m_output << ".compact"; - break; - case Array::FETCH: - visit(x.arr_arg()); - m_output << ".fetch"; - break; - case Array::FILL: - visit(x.arr_arg()); - m_output << ".fill"; - break; - case Array::ROTATE: - visit(x.arr_arg()); - m_output << ".rotate"; - break; - case Array::ROTATE_E: - visit(x.arr_arg()); - m_output << ".rotate!"; - break; - case Array::DELETEIF: - visit(x.arr_arg()); - m_output << ".delete_if"; - break; - case Array::INSERT: - visit(x.arr_arg()); - m_output << ".insert"; - break; - case Array::BSEARCH: - visit(x.arr_arg()); - m_output << ".bsearch"; - break; - case Array::KEEPIF: - visit(x.arr_arg()); - m_output << ".keep_if"; - break; - case Array::SELECT: - visit(x.arr_arg()); - m_output << ".select"; - break; - case Array::VALUES_AT: - visit(x.arr_arg()); - m_output << ".values_at"; - break; - case Array::BLOCK: - visit(x.arr_arg()); - m_output << ".index"; - break; - case Array::DIG: - visit(x.arr_arg()); - m_output << ".dig"; - break; - case Array::SLICE: - visit(x.arr_arg()); - m_output << ".slice"; - break; - case Array::PERM: - visit(x.arr_arg()); - m_output << ".permutation"; - break; - case Array::COMB: - visit(x.arr_arg()); - m_output << ".combination"; - break; - case Array::ASSOC: - visit(x.arr_arg()); - m_output << ".assoc"; - break; - case Array::RASSOC: - visit(x.arr_arg()); - m_output << ".rassoc"; - break; - } - m_output << "("; - visit(x.val_arg()); - m_output << ")"; + switch (x.arr_func()) { + case Array::FLATTEN: + visit(x.arr_arg()); + m_output << ".flatten"; + break; + case Array::COMPACT: + visit(x.arr_arg()); + m_output << ".compact"; + break; + case Array::FETCH: + visit(x.arr_arg()); + m_output << ".fetch"; + break; + case Array::FILL: + visit(x.arr_arg()); + m_output << ".fill"; + break; + case Array::ROTATE: + visit(x.arr_arg()); + m_output << ".rotate"; + break; + case Array::ROTATE_E: + visit(x.arr_arg()); + m_output << ".rotate!"; + break; + case Array::DELETEIF: + visit(x.arr_arg()); + m_output << ".delete_if"; + break; + case Array::INSERT: + visit(x.arr_arg()); + m_output << ".insert"; + break; + case Array::BSEARCH: + visit(x.arr_arg()); + m_output << ".bsearch"; + break; + case Array::KEEPIF: + visit(x.arr_arg()); + m_output << ".keep_if"; + break; + case Array::SELECT: + visit(x.arr_arg()); + m_output << ".select"; + break; + case Array::VALUES_AT: + visit(x.arr_arg()); + m_output << ".values_at"; + break; + case Array::BLOCK: + visit(x.arr_arg()); + m_output << ".index"; + break; + case Array::DIG: + visit(x.arr_arg()); + m_output << ".dig"; + break; + case Array::SLICE: + visit(x.arr_arg()); + m_output << ".slice"; + break; + case Array::PERM: + visit(x.arr_arg()); + m_output << ".permutation"; + break; + case Array::COMB: + visit(x.arr_arg()); + m_output << ".combination"; + break; + case Array::ASSOC: + visit(x.arr_arg()); + m_output << ".assoc"; + break; + case Array::RASSOC: + visit(x.arr_arg()); + m_output << ".rassoc"; + break; + } + m_output << "("; + visit(x.val_arg()); + m_output << ")"; } void protoConverter::visit(AssignmentStatement const& x) { - m_output << "var_" << m_numLiveVars << " = "; - visit(x.rvalue()); - m_numVarsPerScope.top()++; - m_numLiveVars++; - m_output << "\n"; + m_output << "var_" << m_numLiveVars << " = "; + visit(x.rvalue()); + m_numVarsPerScope.top()++; + m_numLiveVars++; + m_output << "\n"; } void protoConverter::visit(BinaryOp const& x) { - m_output << "("; - visit(x.left()); - switch (x.op()) { - case BinaryOp::ADD: m_output << " + "; break; - case BinaryOp::SUB: m_output << " - "; break; - case BinaryOp::MUL: m_output << " * "; break; - case BinaryOp::DIV: m_output << " / "; break; - case BinaryOp::MOD: m_output << " % "; break; - case BinaryOp::XOR: m_output << " ^ "; break; - case BinaryOp::AND: m_output << " and "; break; - case BinaryOp::OR: m_output << " or "; break; - case BinaryOp::EQ: m_output << " == "; break; - case BinaryOp::NE: m_output << " != "; break; - case BinaryOp::LE: m_output << " <= "; break; - case BinaryOp::GE: m_output << " >= "; break; - case BinaryOp::LT: m_output << " < "; break; - case BinaryOp::GT: m_output << " > "; break; - case BinaryOp::RS: m_output << " >> "; break; - } - visit(x.right()); - m_output << ")"; + m_output << "("; + visit(x.left()); + switch (x.op()) { + case BinaryOp::ADD: m_output << " + "; break; + case BinaryOp::SUB: m_output << " - "; break; + case BinaryOp::MUL: m_output << " * "; break; + case BinaryOp::DIV: m_output << " / "; break; + case BinaryOp::MOD: m_output << " % "; break; + case BinaryOp::XOR: m_output << " ^ "; break; + case BinaryOp::AND: m_output << " and "; break; + case BinaryOp::OR: m_output << " or "; break; + case BinaryOp::EQ: m_output << " == "; break; + case BinaryOp::NE: m_output << " != "; break; + case BinaryOp::LE: m_output << " <= "; break; + case BinaryOp::GE: m_output << " >= "; break; + case BinaryOp::LT: m_output << " < "; break; + case BinaryOp::GT: m_output << " > "; break; + case BinaryOp::RS: m_output << " >> "; break; + } + visit(x.right()); + m_output << ")"; } void protoConverter::visit(BuiltinFuncs const& x) { - switch (x.bifunc_oneof_case()) { - case BuiltinFuncs::kOs: - visit(x.os()); - break; - case BuiltinFuncs::kTime: - visit(x.time()); - break; - case BuiltinFuncs::kArr: - visit(x.arr()); - break; - case BuiltinFuncs::kMops: - visit(x.mops()); - break; - case BuiltinFuncs::BIFUNC_ONEOF_NOT_SET: - m_output << "1"; - break; - } - m_output << "\n"; + switch (x.bifunc_oneof_case()) { + case BuiltinFuncs::kOs: + visit(x.os()); + break; + case BuiltinFuncs::kTime: + visit(x.time()); + break; + case BuiltinFuncs::kArr: + visit(x.arr()); + break; + case BuiltinFuncs::kMops: + visit(x.mops()); + break; + case BuiltinFuncs::BIFUNC_ONEOF_NOT_SET: + m_output << "1"; + break; + } + m_output << "\n"; } void protoConverter::visit(Const const& x) { - switch (x.const_oneof_case()) { - case Const::kIntLit: - m_output << "(" << (x.int_lit() % 13) << ")"; - break; - case Const::kBoolVal: - m_output << "(" << x.bool_val() << ")"; - break; - case Const::CONST_ONEOF_NOT_SET: - m_output << "1"; - break; - } + switch (x.const_oneof_case()) { + case Const::kIntLit: + m_output << "(" << (x.int_lit() % 13) << ")"; + break; + case Const::kBoolVal: + m_output << "(" << x.bool_val() << ")"; + break; + case Const::CONST_ONEOF_NOT_SET: + m_output << "1"; + break; + } } void protoConverter::visit(Function const& x) { - m_output << "def foo()\nvar_0 = 1\n"; - visit(x.statements()); - m_output << "end\n"; - m_output << "foo\n"; + m_output << "def foo()\nvar_0 = 1\n"; + visit(x.statements()); + m_output << "end\n"; + m_output << "foo\n"; } void protoConverter::visit(HashType const& x) { - if (x.keyval_size() > 0) { - int i = x.keyval_size(); - m_output << "{"; - for (auto &e : x.keyval()) { - i--; - if (i == 0) { - visit(e); - } - else { - visit(e); - m_output << ", "; - } - } - m_output << "}"; - } + if (x.keyval_size() > 0) { + int i = x.keyval_size(); + m_output << "{"; + for (auto &e : x.keyval()) { + i--; + if (i == 0) { + visit(e); + } + else { + visit(e); + m_output << ", "; + } + } + m_output << "}"; + } } void protoConverter::visit(IfElse const& x) { - m_output << "if "; - visit(x.cond()); - m_output << "\n"; - visit(x.if_body()); - m_output << "\nelse\n"; - visit(x.else_body()); - m_output << "\nend\n"; + m_output << "if "; + visit(x.cond()); + m_output << "\n"; + visit(x.if_body()); + m_output << "\nelse\n"; + visit(x.else_body()); + m_output << "\nend\n"; } void protoConverter::visit(KVPair const& x) { - m_output << "\"" << removeSpecial(x.key()) << "\""; - m_output << " => "; - m_output << "\"" << removeSpecial(x.val()) << "\""; + m_output << "\"" << removeSpecial(x.key()) << "\""; + m_output << " => "; + m_output << "\"" << removeSpecial(x.val()) << "\""; } void protoConverter::visit(MathConst const& x) { - switch (x.math_const()) { - case MathConst::PI: - m_output << "Math::PI"; - break; - case MathConst::E: - m_output << "Math::E"; - break; - } + switch (x.math_const()) { + case MathConst::PI: + m_output << "Math::PI"; + break; + case MathConst::E: + m_output << "Math::E"; + break; + } } void protoConverter::visit(MathOps const& x) { - switch (x.math_op()) { - case MathOps::CBRT: - m_output << "Math.cbrt("; - visit(x.math_arg()); - m_output << ")"; - break; - case MathOps::COS: - m_output << "Math.cos("; - visit(x.math_arg()); - m_output << ")"; - break; - case MathOps::ERF: - m_output << "Math.erf("; - visit(x.math_arg()); - m_output << ")"; - break; - case MathOps::ERFC: - m_output << "Math.erfc("; - visit(x.math_arg()); - m_output << ")"; - break; - case MathOps::LOG: - m_output << "Math.log("; - visit(x.math_arg()); - m_output << ")"; - break; - case MathOps::LOG10: - m_output << "Math.log10("; - visit(x.math_arg()); - m_output << ")"; - break; - case MathOps::LOG2: - m_output << "Math.log2("; - visit(x.math_arg()); - m_output << ")"; - break; - case MathOps::SIN: - m_output << "Math.sin("; - visit(x.math_arg()); - m_output << ")"; - break; - case MathOps::SQRT: - m_output << "Math.sqrt("; - visit(x.math_arg()); - m_output << ")"; - break; - case MathOps::TAN: - m_output << "Math.tan("; - visit(x.math_arg()); - m_output << ")"; - break; - } + switch (x.math_op()) { + case MathOps::CBRT: + m_output << "Math.cbrt("; + visit(x.math_arg()); + m_output << ")"; + break; + case MathOps::COS: + m_output << "Math.cos("; + visit(x.math_arg()); + m_output << ")"; + break; + case MathOps::ERF: + m_output << "Math.erf("; + visit(x.math_arg()); + m_output << ")"; + break; + case MathOps::ERFC: + m_output << "Math.erfc("; + visit(x.math_arg()); + m_output << ")"; + break; + case MathOps::LOG: + m_output << "Math.log("; + visit(x.math_arg()); + m_output << ")"; + break; + case MathOps::LOG10: + m_output << "Math.log10("; + visit(x.math_arg()); + m_output << ")"; + break; + case MathOps::LOG2: + m_output << "Math.log2("; + visit(x.math_arg()); + m_output << ")"; + break; + case MathOps::SIN: + m_output << "Math.sin("; + visit(x.math_arg()); + m_output << ")"; + break; + case MathOps::SQRT: + m_output << "Math.sqrt("; + visit(x.math_arg()); + m_output << ")"; + break; + case MathOps::TAN: + m_output << "Math.tan("; + visit(x.math_arg()); + m_output << ")"; + break; + } } void protoConverter::visit(MathType const& x) { - switch (x.math_arg_oneof_case()) { - case MathType::kMathRval: - visit(x.math_rval()); - break; - case MathType::kMathConst: - visit(x.math_const()); - break; - case MathType::MATH_ARG_ONEOF_NOT_SET: - m_output << "1"; - break; - } + switch (x.math_arg_oneof_case()) { + case MathType::kMathRval: + visit(x.math_rval()); + break; + case MathType::kMathConst: + visit(x.math_const()); + break; + case MathType::MATH_ARG_ONEOF_NOT_SET: + m_output << "1"; + break; + } } void protoConverter::visit(ObjectSpace const& x) { - switch (x.os_func()) { - case ObjectSpace::COUNT: - m_output << "ObjectSpace.count_objects"; - break; - } - m_output << "("; - visit(x.os_arg()); - m_output << ")" << "\n"; + switch (x.os_func()) { + case ObjectSpace::COUNT: + m_output << "ObjectSpace.count_objects"; + break; + } + m_output << "("; + visit(x.os_arg()); + m_output << ")" << "\n"; } void protoConverter::visit(Rvalue const& x) { - switch (x.rvalue_oneof_case()) { - case Rvalue::kVarref: - visit(x.varref()); - break; - case Rvalue::kCons: - visit(x.cons()); - break; - case Rvalue::kBinop: - visit(x.binop()); - break; - case Rvalue::RVALUE_ONEOF_NOT_SET: - m_output << "1"; - break; - } + switch (x.rvalue_oneof_case()) { + case Rvalue::kVarref: + visit(x.varref()); + break; + case Rvalue::kCons: + visit(x.cons()); + break; + case Rvalue::kBinop: + visit(x.binop()); + break; + case Rvalue::RVALUE_ONEOF_NOT_SET: + m_output << "1"; + break; + } } void protoConverter::visit(Statement const& x) { - switch (x.stmt_oneof_case()) { - case Statement::kAssignment: - visit(x.assignment()); - break; - case Statement::kIfelse: - visit(x.ifelse()); - break; - case Statement::kTernaryStmt: - visit(x.ternary_stmt()); - break; - case Statement::kBuiltins: - visit(x.builtins()); - break; - case Statement::kBlockstmt: - visit(x.blockstmt()); - break; - case Statement::STMT_ONEOF_NOT_SET: - break; - } - m_output << "\n"; + switch (x.stmt_oneof_case()) { + case Statement::kAssignment: + visit(x.assignment()); + break; + case Statement::kIfelse: + visit(x.ifelse()); + break; + case Statement::kTernaryStmt: + visit(x.ternary_stmt()); + break; + case Statement::kBuiltins: + visit(x.builtins()); + break; + case Statement::kBlockstmt: + visit(x.blockstmt()); + break; + case Statement::STMT_ONEOF_NOT_SET: + break; + } + m_output << "\n"; } void protoConverter::visit(StatementSeq const& x) { - if (x.statements_size() > 0) { - m_numVarsPerScope.push(0); - m_output << "@scope ||= begin\n"; - for (auto &st : x.statements()) - visit(st); - m_output << "end\n"; - m_numLiveVars -= m_numVarsPerScope.top(); - m_numVarsPerScope.pop(); - } + if (x.statements_size() > 0) { + m_numVarsPerScope.push(0); + m_output << "@scope ||= begin\n"; + for (auto &st : x.statements()) + visit(st); + m_output << "end\n"; + m_numLiveVars -= m_numVarsPerScope.top(); + m_numVarsPerScope.pop(); + } } void protoConverter::visit(StringExtNoArg const& x) { - m_output << "\"" << removeSpecial(x.str_arg()) << "\""; - switch (x.str_op()) { - case StringExtNoArg::DUMP: - m_output << ".dump"; - break; - case StringExtNoArg::STRIP: - m_output << ".strip"; - break; - case StringExtNoArg::LSTRIP: - m_output << ".lstrip"; - break; - case StringExtNoArg::RSTRIP: - m_output << ".rstrip"; - break; - case StringExtNoArg::STRIPE: - m_output << ".strip!"; - break; - case StringExtNoArg::LSTRIPE: - m_output << ".lstrip!"; - break; - case StringExtNoArg::RSTRIPE: - m_output << ".rstrip!"; - break; - case StringExtNoArg::SWAPCASE: - m_output << ".swapcase"; - break; - case StringExtNoArg::SWAPCASEE: - m_output << ".swapcase!"; - break; - case StringExtNoArg::SQUEEZE: - m_output << ".squeeze"; - break; - } + m_output << "\"" << removeSpecial(x.str_arg()) << "\""; + switch (x.str_op()) { + case StringExtNoArg::DUMP: + m_output << ".dump"; + break; + case StringExtNoArg::STRIP: + m_output << ".strip"; + break; + case StringExtNoArg::LSTRIP: + m_output << ".lstrip"; + break; + case StringExtNoArg::RSTRIP: + m_output << ".rstrip"; + break; + case StringExtNoArg::STRIPE: + m_output << ".strip!"; + break; + case StringExtNoArg::LSTRIPE: + m_output << ".lstrip!"; + break; + case StringExtNoArg::RSTRIPE: + m_output << ".rstrip!"; + break; + case StringExtNoArg::SWAPCASE: + m_output << ".swapcase"; + break; + case StringExtNoArg::SWAPCASEE: + m_output << ".swapcase!"; + break; + case StringExtNoArg::SQUEEZE: + m_output << ".squeeze"; + break; + } } void protoConverter::visit(Ternary const& x) { - m_output << "("; - visit(x.tern_cond()); - m_output << " ? "; - visit(x.t_branch()); - m_output << " : "; - visit(x.f_branch()); - m_output << ")\n"; + m_output << "("; + visit(x.tern_cond()); + m_output << " ? "; + visit(x.t_branch()); + m_output << " : "; + visit(x.f_branch()); + m_output << ")\n"; } void protoConverter::visit(Time const& x) { - switch (x.t_func()) { - case Time::AT: - m_output << "Time.at"; - break; - case Time::GM: - m_output << "Time.gm"; - break; - } - m_output << "(" << (x.t_arg()% 13) << ")" << "\n"; + switch (x.t_func()) { + case Time::AT: + m_output << "Time.at"; + break; + case Time::GM: + m_output << "Time.gm"; + break; + } + m_output << "(" << (x.t_arg()% 13) << ")" << "\n"; } void protoConverter::visit(VarRef const& x) { - m_output << "var_" << (static_cast(x.varnum()) % m_numLiveVars); + m_output << "var_" << (static_cast(x.varnum()) % m_numLiveVars); } std::string protoConverter::FunctionToString(Function const& input) { - visit(input); - return m_output.str(); + visit(input); + return m_output.str(); } diff --git a/oss-fuzz/proto_to_ruby.h b/oss-fuzz/proto_to_ruby.h index 355c42839c..e91c78f02d 100644 --- a/oss-fuzz/proto_to_ruby.h +++ b/oss-fuzz/proto_to_ruby.h @@ -7,49 +7,49 @@ #include namespace ruby_fuzzer { - class protoConverter - { - public: - protoConverter() { - m_numLiveVars = 1; - m_numVarsPerScope.push(m_numLiveVars); - } - protoConverter(protoConverter const& x) { - m_numLiveVars = x.m_numLiveVars; - m_numVarsPerScope = x.m_numVarsPerScope; - } - ~protoConverter() {} - std::string FunctionToString(Function const& _input); + class protoConverter + { + public: + protoConverter() { + m_numLiveVars = 1; + m_numVarsPerScope.push(m_numLiveVars); + } + protoConverter(protoConverter const& x) { + m_numLiveVars = x.m_numLiveVars; + m_numVarsPerScope = x.m_numVarsPerScope; + } + ~protoConverter() {} + std::string FunctionToString(Function const& _input); - private: - void visit(ArrType const&); - void visit(Array const&); - void visit(AssignmentStatement const&); - void visit(BinaryOp const&); - void visit(BuiltinFuncs const&); - void visit(Const const&); - void visit(Function const&); - void visit(HashType const&); - void visit(IfElse const&); - void visit(KVPair const&); - void visit(MathConst const&); - void visit(MathOps const&); - void visit(MathType const&); - void visit(ObjectSpace const&); - void visit(Rvalue const&); - void visit(Statement const&); - void visit(StatementSeq const&); - void visit(StringExtNoArg const&); - void visit(Ternary const&); - void visit(Time const&); - void visit(VarRef const&); - template - void visit(google::protobuf::RepeatedPtrField const& _repeated_field); + private: + void visit(ArrType const&); + void visit(Array const&); + void visit(AssignmentStatement const&); + void visit(BinaryOp const&); + void visit(BuiltinFuncs const&); + void visit(Const const&); + void visit(Function const&); + void visit(HashType const&); + void visit(IfElse const&); + void visit(KVPair const&); + void visit(MathConst const&); + void visit(MathOps const&); + void visit(MathType const&); + void visit(ObjectSpace const&); + void visit(Rvalue const&); + void visit(Statement const&); + void visit(StatementSeq const&); + void visit(StringExtNoArg const&); + void visit(Ternary const&); + void visit(Time const&); + void visit(VarRef const&); + template + void visit(google::protobuf::RepeatedPtrField const& _repeated_field); - std::string removeSpecial(const std::string &x); + std::string removeSpecial(const std::string &x); - std::ostringstream m_output; - std::stack m_numVarsPerScope; - int32_t m_numLiveVars; - }; + std::ostringstream m_output; + std::stack m_numVarsPerScope; + int32_t m_numLiveVars; + }; } diff --git a/scripts/check_makefiles_for_tabs.sh b/scripts/check_makefiles_for_tabs.sh new file mode 100755 index 0000000000..5d93e0428e --- /dev/null +++ b/scripts/check_makefiles_for_tabs.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# Iterate over all files passed as arguments by pre-commit +for makefile in "$@"; do + # Check if the file exists and is a regular file + if [[ -f "$makefile" ]]; then + if grep -P '^\s' "$makefile" | grep -vP '^\t' > /dev/null; then + echo "Error: File '$makefile' contains spaces at the beginning of lines instead of tabs." + exit 1 + fi + fi +done +exit 0 diff --git a/src/allocf.c b/src/allocf.c new file mode 100644 index 0000000000..18555d9372 --- /dev/null +++ b/src/allocf.c @@ -0,0 +1,37 @@ +/* +** allocf.c - default memory allocation function +** +** See Copyright Notice in mruby.h +*/ +#include +#include + +/* This function serves as the default memory allocation function and accepts two arguments: + * + * - `p`: The previous pointer to the memory region. For memory allocation, this parameter is NULL. + * - `size`: The new size of the memory region to be returned. If size is 0, the memory region will be freed. + * + * All memory allocation from the inside of mruby uses this function. + * + * If you want to use your own memory allocator, you have two options: + * + * - provide your own version of malloc() / realloc() / free() + * + * - redefine mrb_basic_alloc_func() in your application. + * + * See doc/guides/memory.md for detail. + */ + +void* +mrb_basic_alloc_func(void *p, size_t size) +{ + if (size == 0) { + /* `free(NULL)` should be no-op */ + free(p); + return NULL; + } + else { + /* `realloc(NULL, size)` should work as `malloc(size)` */ + return realloc(p, size); + } +} diff --git a/src/array.c b/src/array.c index a3460c5165..166960f2f3 100644 --- a/src/array.c +++ b/src/array.c @@ -11,7 +11,6 @@ #include #include #include -#include #include "value_array.h" #define ARY_DEFAULT_LEN 4 @@ -22,12 +21,14 @@ #endif #define ARY_MAX_SIZE ((mrb_int)((ARY_C_MAX_SIZE < (size_t)MRB_INT_MAX) ? ARY_C_MAX_SIZE : MRB_INT_MAX-1)) +/* Raises an ArgumentError when array size exceeds limits */ static void ary_too_big(mrb_state *mrb) { mrb_raise(mrb, E_ARGUMENT_ERROR, "array size too big"); } +/* Checks if array size would exceed limits and raises error if so */ static inline void ary_check_too_big(mrb_state *mrb, mrb_int a, mrb_int b) { @@ -39,16 +40,15 @@ ary_check_too_big(mrb_state *mrb, mrb_int a, mrb_int b) #endif } +/* Creates a new RArray with specified capacity */ static struct RArray* ary_new_capa(mrb_state *mrb, mrb_int capa) { - struct RArray *a; - size_t blen; - ary_check_too_big(mrb, capa, 0); - blen = capa * sizeof(mrb_value); - a = MRB_OBJ_ALLOC(mrb, MRB_TT_ARRAY, mrb->array_class); + size_t blen = capa * sizeof(mrb_value); + struct RArray *a = MRB_OBJ_ALLOC(mrb, MRB_TT_ARRAY, mrb->array_class); + if (capa <= MRB_ARY_EMBED_LEN_MAX) { ARY_SET_EMBED_LEN(a, 0); } @@ -61,6 +61,17 @@ ary_new_capa(mrb_state *mrb, mrb_int capa) return a; } +/** + * Creates a new array with a specified initial capacity. + * + * This function allocates an array that can hold at least `capa` elements + * without needing to immediately reallocate memory. If `capa` is 0, + * it may still allocate a small default capacity. + * + * @param mrb The mruby state. + * @param capa The initial capacity desired for the array. + * @return A new mrb_value representing the created array. + */ MRB_API mrb_value mrb_ary_new_capa(mrb_state *mrb, mrb_int capa) { @@ -68,6 +79,15 @@ mrb_ary_new_capa(mrb_state *mrb, mrb_int capa) return mrb_obj_value(a); } +/** + * Creates a new, empty array. + * + * This function is equivalent to calling `mrb_ary_new_capa` with a capacity of 0. + * The array will dynamically resize as elements are added. + * + * @param mrb The mruby state. + * @return A new mrb_value representing the created empty array. + */ MRB_API mrb_value mrb_ary_new(mrb_state *mrb) { @@ -87,16 +107,16 @@ mrb_ary_new(mrb_state *mrb) * * See also https://togetter.com/li/462898 (Japanese) */ +/* Portable array copy function to avoid memcpy issues on some platforms */ static inline void array_copy(mrb_value *dst, const mrb_value *src, mrb_int size) { - mrb_int i; - - for (i = 0; i < size; i++) { + for (mrb_int i = 0; i < size; i++) { dst[i] = src[i]; } } +/* Creates a new RArray initialized with values from an array */ static struct RArray* ary_new_from_values(mrb_state *mrb, mrb_int size, const mrb_value *vals) { @@ -108,6 +128,17 @@ ary_new_from_values(mrb_state *mrb, mrb_int size, const mrb_value *vals) return a; } +/** + * Creates a new array initialized with a given sequence of values. + * + * This function allocates an array and copies `size` elements from the `vals` + * pointer into the new array. + * + * @param mrb The mruby state. + * @param size The number of values to initialize the array with. + * @param vals A pointer to an array of `mrb_value`s to copy into the new array. + * @return A new mrb_value representing the created array. + */ MRB_API mrb_value mrb_ary_new_from_values(mrb_state *mrb, mrb_int size, const mrb_value *vals) { @@ -115,20 +146,30 @@ mrb_ary_new_from_values(mrb_state *mrb, mrb_int size, const mrb_value *vals) return mrb_obj_value(a); } +/** + * Creates a new array of size 2, typically used to represent an association (key-value pair). + * + * The first element of the array is `car` (often the key), and the second element + * is `cdr` (often the value). + * + * @param mrb The mruby state. + * @param car The first value to be placed in the array. + * @param cdr The second value to be placed in the array. + * @return A new mrb_value representing the created 2-element array. + */ MRB_API mrb_value mrb_assoc_new(mrb_state *mrb, mrb_value car, mrb_value cdr) { - struct RArray *a; - mrb_value *p; + struct RArray *a = ary_new_capa(mrb, 2); + mrb_value *p = ARY_PTR(a); - a = ary_new_capa(mrb, 2); - p = ARY_PTR(a); p[0] = car; p[1] = cdr; ARY_SET_LEN(a, 2); return mrb_obj_value(a); } +/* Fills array elements with nil values */ static void ary_fill_with_nil(mrb_value *ptr, mrb_int size) { @@ -139,12 +180,9 @@ ary_fill_with_nil(mrb_value *ptr, mrb_int size) } } -static void -ary_modify_check(mrb_state *mrb, struct RArray *a) -{ - mrb_check_frozen(mrb, a); -} +#define ary_modify_check(mrb, a) mrb_check_frozen((mrb), (a)) +/* Prepares array for modification, handling shared arrays and frozen check */ static void ary_modify(mrb_state *mrb, struct RArray *a) { @@ -159,12 +197,9 @@ ary_modify(mrb_state *mrb, struct RArray *a) mrb_free(mrb, shared); } else { - mrb_value *ptr, *p; - mrb_int len; + mrb_value *p = a->as.heap.ptr; + mrb_value *ptr = (mrb_value*)mrb_malloc(mrb, a->as.heap.len * sizeof(mrb_value)); - p = a->as.heap.ptr; - len = a->as.heap.len * sizeof(mrb_value); - ptr = (mrb_value *)mrb_malloc(mrb, len); if (p) { array_copy(ptr, p, a->as.heap.len); } @@ -176,6 +211,17 @@ ary_modify(mrb_state *mrb, struct RArray *a) } } +/** + * Prepares an array for modification. + * + * This function ensures that the array is not frozen and is not shared. + * If the array is shared and has multiple references, this function will + * duplicate the array data to ensure that modifications do not affect + * other references. It also triggers a write barrier for the garbage collector. + * + * @param mrb The mruby state. + * @param a A pointer to the RArray structure to modify. + */ MRB_API void mrb_ary_modify(mrb_state *mrb, struct RArray* a) { @@ -183,17 +229,18 @@ mrb_ary_modify(mrb_state *mrb, struct RArray* a) ary_modify(mrb, a); } +/* Converts array to shared representation for copy-on-write semantics */ static void ary_make_shared(mrb_state *mrb, struct RArray *a) { if (!ARY_SHARED_P(a) && !ARY_EMBED_P(a)) { - mrb_shared_array *shared = (mrb_shared_array *)mrb_malloc(mrb, sizeof(mrb_shared_array)); + mrb_shared_array *shared = (mrb_shared_array*)mrb_malloc(mrb, sizeof(mrb_shared_array)); mrb_value *ptr = a->as.heap.ptr; mrb_int len = a->as.heap.len; shared->refcnt = 1; if (a->as.heap.aux.capa > len) { - a->as.heap.ptr = shared->ptr = (mrb_value *)mrb_realloc(mrb, ptr, sizeof(mrb_value)*len+1); + a->as.heap.ptr = shared->ptr = (mrb_value*)mrb_realloc(mrb, ptr, sizeof(mrb_value)*len+1); } else { shared->ptr = ptr; @@ -204,6 +251,45 @@ ary_make_shared(mrb_state *mrb, struct RArray *a) } } +/* Creates a shared copy of array for temporary GC protection. + * Frozen arrays are returned as-is (cannot be modified). + * Embedded arrays get full copy (cannot be shared). + * Heap arrays get zero-copy shared reference. + */ +MRB_API mrb_value +mrb_ary_make_shared_copy(mrb_state *mrb, mrb_value ary) +{ + struct RArray *orig = mrb_ary_ptr(ary); + + // Frozen arrays don't need protection + if (mrb_frozen_p(orig)) { + return ary; + } + + // Embedded arrays can't be shared - make full copy + if (ARY_EMBED_P(orig)) { + return mrb_ary_dup(mrb, ary); + } + + // Make original array shared if not already + if (!ARY_SHARED_P(orig)) { + ary_make_shared(mrb, orig); + } + + // Create new array that shares the buffer + struct RArray *shared = (struct RArray*)mrb_obj_alloc(mrb, MRB_TT_ARRAY, mrb->array_class); + + shared->as.heap.ptr = orig->as.heap.ptr; + shared->as.heap.len = orig->as.heap.len; + shared->as.heap.aux.shared = orig->as.heap.aux.shared; + shared->as.heap.aux.shared->refcnt++; + ARY_SET_SHARED_FLAG(shared); + mrb_write_barrier(mrb, (struct RBasic*)shared); + + return mrb_obj_value(shared); +} + +/* Expands array capacity to accommodate at least len elements */ static void ary_expand_capa(mrb_state *mrb, struct RArray *a, mrb_int len) { @@ -221,38 +307,37 @@ ary_expand_capa(mrb_state *mrb, struct RArray *a, mrb_int len) capa = len; } } - if (capa < len || capa > ARY_MAX_SIZE) { + if (capa > ARY_MAX_SIZE) { ary_too_big(mrb); } if (ARY_EMBED_P(a)) { mrb_value *ptr = ARY_EMBED_PTR(a); - mrb_int len = ARY_EMBED_LEN(a); - mrb_value *expanded_ptr = (mrb_value *)mrb_malloc(mrb, sizeof(mrb_value)*capa); + mrb_int slen = ARY_EMBED_LEN(a); + mrb_value *expanded_ptr = (mrb_value*)mrb_malloc(mrb, sizeof(mrb_value)*capa); ARY_UNSET_EMBED_FLAG(a); - array_copy(expanded_ptr, ptr, len); - a->as.heap.len = len; + array_copy(expanded_ptr, ptr, slen); + a->as.heap.len = slen; a->as.heap.aux.capa = capa; a->as.heap.ptr = expanded_ptr; } else if (capa > a->as.heap.aux.capa) { - mrb_value *expanded_ptr = (mrb_value *)mrb_realloc(mrb, a->as.heap.ptr, sizeof(mrb_value)*capa); + mrb_value *expanded_ptr = (mrb_value*)mrb_realloc(mrb, a->as.heap.ptr, sizeof(mrb_value)*capa); a->as.heap.aux.capa = capa; a->as.heap.ptr = expanded_ptr; } } +/* Shrinks array capacity to save memory when array becomes much smaller */ static void ary_shrink_capa(mrb_state *mrb, struct RArray *a) { - - mrb_int capa; - if (ARY_EMBED_P(a)) return; - capa = a->as.heap.aux.capa; + mrb_int capa = a->as.heap.aux.capa; + if (capa < ARY_DEFAULT_LEN * 2) return; if (capa <= a->as.heap.len * ARY_SHRINK_RATIO) return; @@ -266,18 +351,30 @@ ary_shrink_capa(mrb_state *mrb, struct RArray *a) if (capa > a->as.heap.len && capa < a->as.heap.aux.capa) { a->as.heap.aux.capa = capa; - a->as.heap.ptr = (mrb_value *)mrb_realloc(mrb, a->as.heap.ptr, sizeof(mrb_value)*capa); + a->as.heap.ptr = (mrb_value*)mrb_realloc(mrb, a->as.heap.ptr, sizeof(mrb_value)*capa); } } +/** + * Resizes an array to a new length. + * + * If `new_len` is smaller than the current length, the array is truncated. + * If `new_len` is larger than the current length, the array is expanded, + * and new elements are filled with `nil`. + * This function modifies the array in place. + * + * @param mrb The mruby state. + * @param ary The array (mrb_value) to resize. + * @param new_len The desired new length of the array. + * @return The resized array (the same mrb_value as `ary`). + */ MRB_API mrb_value mrb_ary_resize(mrb_state *mrb, mrb_value ary, mrb_int new_len) { - mrb_int old_len; struct RArray *a = mrb_ary_ptr(ary); ary_modify(mrb, a); - old_len = RARRAY_LEN(ary); + mrb_int old_len = RARRAY_LEN(ary); if (old_len != new_len) { if (new_len < old_len) { ary_shrink_capa(mrb, a); @@ -292,17 +389,25 @@ mrb_ary_resize(mrb_state *mrb, mrb_value ary, mrb_int new_len) return ary; } +/* + * call-seq: + * Array[obj, ...] -> new_array + * + * Creates a new Array containing the given objects: + * + * Array[1, 'a', /^A/] # => [1, "a", /^A/] + * Array[1, 2, 3] # => [1, 2, 3] + * Array[] # => [] + */ static mrb_value mrb_ary_s_create(mrb_state *mrb, mrb_value klass) { - mrb_value ary; const mrb_value *vals; mrb_int len; - struct RArray *a; mrb_get_args(mrb, "*!", &vals, &len); - ary = mrb_ary_new_from_values(mrb, len, vals); - a = mrb_ary_ptr(ary); + mrb_value ary = mrb_ary_new_from_values(mrb, len, vals); + struct RArray *a = mrb_ary_ptr(ary); a->c = mrb_class_ptr(klass); return ary; @@ -310,27 +415,112 @@ mrb_ary_s_create(mrb_state *mrb, mrb_value klass) static void ary_replace(mrb_state*, struct RArray*, struct RArray*); +/* + * call-seq: + * Array.new(size=0, default=nil) -> new_array + * Array.new(array) -> new_array + * Array.new(size) {|index| ... } -> new_array + * + * Returns a new Array. + * + * With no block and no arguments, returns a new empty Array object. + * + * With no block and a single `size` argument, returns a new Array object + * of the given size whose elements are all `nil`: + * + * a = Array.new(3) + * a # => [nil, nil, nil] + * a.size # => 3 + * + * With no block and arguments `size` and `default`, returns an Array object + * of the given size; each element is the same `default` object: + * + * a = Array.new(3, 'x') + * a # => ['x', 'x', 'x'] + * + * With a block and argument `size`, returns an Array object of the given size; + * the block is called with each successive integer `index`; + * the element for that `index` is the return value from the block: + * + * a = Array.new(3) {|index| "Element #{index}" } + * a # => ["Element 0", "Element 1", "Element 2"] + * + * With a single Array argument `array`, returns a new Array formed from `array`: + * + * a = Array.new([:foo, 'bar', 2]) + * a.class # => Array + * a # => [:foo, "bar", 2] + */ +static mrb_value +mrb_ary_init(mrb_state *mrb, mrb_value ary) +{ + mrb_value ss = mrb_fixnum_value(0); + mrb_value obj = mrb_nil_value(); + mrb_value blk = mrb_nil_value(); + + mrb_get_args(mrb, "|oo&", &ss, &obj, &blk); + + if (mrb_array_p(ss) && mrb_nil_p(obj) && mrb_nil_p(blk)) { + ary_replace(mrb, mrb_ary_ptr(ary), mrb_ary_ptr(ss)); + return ary; + } + + mrb_int size = mrb_as_int(mrb, ss); + struct RArray *a = mrb_ary_ptr(ary); + + if (ARY_CAPA(a) < size) { + ary_expand_capa(mrb, a, size); + } + + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i=0; i self * - * Adds to +array+ all elements from each \Array in +other_arrays+; returns +self+: + * Adds to `array` all elements from each \Array in `other_arrays`; returns `self`: * * a = [0, 1] * a.concat([2, 3], [4, 5]) # => [0, 1, 2, 3, 4, 5] @@ -365,18 +555,27 @@ mrb_ary_concat_m(mrb_state *mrb, mrb_value self) return self; } +/* + * call-seq: + * array + other_array -> new_array + * + * Returns a new Array containing all elements of `array` + * followed by all elements of `other_array`: + * + * a = [0, 1] + [2, 3] + * a # => [0, 1, 2, 3] + */ static mrb_value mrb_ary_plus(mrb_state *mrb, mrb_value self) { struct RArray *a1 = mrb_ary_ptr(self); - struct RArray *a2; const mrb_value *ptr; - mrb_int blen, len1; + mrb_int blen; mrb_get_args(mrb, "a", &ptr, &blen); ary_check_too_big(mrb, ARY_LEN(a1), blen); - len1 = ARY_LEN(a1); - a2 = ary_new_capa(mrb, len1 + blen); + mrb_int len1 = ARY_LEN(a1); + struct RArray *a2 = ary_new_capa(mrb, len1 + blen); array_copy(ARY_PTR(a2), ARY_PTR(a1), len1); array_copy(ARY_PTR(a2) + len1, ptr, blen); ARY_SET_LEN(a2, len1+blen); @@ -386,6 +585,7 @@ mrb_ary_plus(mrb_state *mrb, mrb_value self) #define ARY_REPLACE_SHARED_MIN 20 +/* Internal helper to replace array contents with another array */ static void ary_replace(mrb_state *mrb, struct RArray *a, struct RArray *b) { @@ -416,7 +616,7 @@ ary_replace(mrb_state *mrb, struct RArray *a, struct RArray *b) mrb_write_barrier(mrb, (struct RBasic*)a); return; } - if (!mrb_frozen_p(b) && len > ARY_REPLACE_SHARED_MIN) { + if (len > ARY_REPLACE_SHARED_MIN) { ary_make_shared(mrb, b); goto shared_b; } @@ -427,6 +627,16 @@ ary_replace(mrb_state *mrb, struct RArray *a, struct RArray *b) ARY_SET_LEN(a, len); } +/** + * Replaces the contents of an array with the contents of another array. + * + * After this operation, the `self` array will contain the same elements + * as the `other` array. This function modifies the `self` array in place. + * + * @param mrb The mruby state. + * @param self The array (mrb_value) whose contents will be replaced. + * @param other The array (mrb_value) from which to copy the elements. + */ MRB_API void mrb_ary_replace(mrb_state *mrb, mrb_value self, mrb_value other) { @@ -438,6 +648,18 @@ mrb_ary_replace(mrb_state *mrb, mrb_value self, mrb_value other) } } +/* + * call-seq: + * array.replace(other_array) -> self + * array.initialize_copy(other_array) -> self + * + * Replaces the contents of `self` with the contents of `other_array`; + * returns `self`: + * + * a = [0, 1, 2] + * a.replace(['foo', 'bar']) # => ["foo", "bar"] + * a # => ["foo", "bar"] + */ static mrb_value mrb_ary_replace_m(mrb_state *mrb, mrb_value self) { @@ -449,21 +671,34 @@ mrb_ary_replace_m(mrb_state *mrb, mrb_value self) return self; } +/* + * call-seq: + * array * int -> new_array + * array * str -> new_string + * + * When the argument is an Integer `n`, + * returns a new Array built by concatenating `n` copies of `self`: + * + * a = ['x', 'y'] + * a * 3 # => ["x", "y", "x", "y", "x", "y"] + * + * When the argument is a String `separator`, + * equivalent to `array.join(separator)`: + * + * [1, 2, 3] * '|' # => "1|2|3" + */ static mrb_value mrb_ary_times(mrb_state *mrb, mrb_value self) { struct RArray *a1 = mrb_ary_ptr(self); - struct RArray *a2; - mrb_value *ptr, sep, tmp; - mrb_int times, len1; - mrb_get_args(mrb, "o", &sep); - tmp = mrb_check_string_type(mrb, sep); + mrb_value arg = mrb_get_arg1(mrb); + mrb_value tmp = mrb_check_string_type(mrb, arg); if (!mrb_nil_p(tmp)) { return mrb_ary_join(mrb, self, tmp); } - mrb_get_args(mrb, "i", ×); + mrb_int times = mrb_as_int(mrb, arg); if (times < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument"); } @@ -471,10 +706,12 @@ mrb_ary_times(mrb_state *mrb, mrb_value self) if (ARY_MAX_SIZE / times < ARY_LEN(a1)) { ary_too_big(mrb); } - len1 = ARY_LEN(a1); - a2 = ary_new_capa(mrb, len1 * times); + + mrb_int len1 = ARY_LEN(a1); + struct RArray *a2 = ary_new_capa(mrb, len1 * times); ARY_SET_LEN(a2, len1 * times); - ptr = ARY_PTR(a2); + + mrb_value *ptr = ARY_PTR(a2); while (times--) { array_copy(ptr, ARY_PTR(a1), len1); ptr += len1; @@ -483,6 +720,16 @@ mrb_ary_times(mrb_state *mrb, mrb_value self) return mrb_obj_value(a2); } +/* + * call-seq: + * array.reverse! -> self + * + * Reverses `self` in place: + * + * a = ['foo', 'bar', 'two'] + * a.reverse! # => ["two", "bar", "foo"] + * a # => ["two", "bar", "foo"] + */ static mrb_value mrb_ary_reverse_bang(mrb_state *mrb, mrb_value self) { @@ -490,11 +737,10 @@ mrb_ary_reverse_bang(mrb_state *mrb, mrb_value self) mrb_int len = ARY_LEN(a); if (len > 1) { - mrb_value *p1, *p2; - ary_modify(mrb, a); - p1 = ARY_PTR(a); - p2 = p1 + len - 1; + + mrb_value *p1 = ARY_PTR(a); + mrb_value *p2 = p1 + len - 1; while (p1 < p2) { mrb_value tmp = *p1; @@ -505,6 +751,17 @@ mrb_ary_reverse_bang(mrb_state *mrb, mrb_value self) return self; } +/* + * call-seq: + * array.reverse -> new_array + * + * Returns a new Array with the elements of `self` in reverse order: + * + * a = ['foo', 'bar', 'two'] + * a1 = a.reverse + * a1 # => ["two", "bar", "foo"] + * a # => ["foo", "bar", "two"] + */ static mrb_value mrb_ary_reverse(mrb_state *mrb, mrb_value self) { @@ -512,11 +769,9 @@ mrb_ary_reverse(mrb_state *mrb, mrb_value self) mrb_int len = ARY_LEN(a); if (len > 0) { - mrb_value *p1, *p2, *e; - - p1 = ARY_PTR(a); - e = p1 + len; - p2 = ARY_PTR(b) + len - 1; + mrb_value *p1 = ARY_PTR(a); + mrb_value *e = p1 + len; + mrb_value *p2 = ARY_PTR(b) + len - 1; while (p1 < e) { *p2-- = *p1++; } @@ -525,6 +780,17 @@ mrb_ary_reverse(mrb_state *mrb, mrb_value self) return mrb_obj_value(b); } +/** + * Pushes an element onto the end of an array. + * + * This function appends `elem` to the `ary` array, increasing its length by one. + * The array capacity may be expanded if necessary. + * This function modifies the array in place. + * + * @param mrb The mruby state. + * @param ary The array (mrb_value) to push the element onto. + * @param elem The mrb_value to append to the array. + */ MRB_API void mrb_ary_push(mrb_state *mrb, mrb_value ary, mrb_value elem) { @@ -539,23 +805,39 @@ mrb_ary_push(mrb_state *mrb, mrb_value ary, mrb_value elem) mrb_field_write_barrier_value(mrb, (struct RBasic*)a, elem); } +/* + * call-seq: + * array.push(*objects) -> self + * array << object -> self + * + * Appends trailing elements. + * + * Appends each argument in `objects` to `self`; returns `self`: + * + * a = [:foo, 'bar', 2] + * a.push(:baz, :bat) # => [:foo, "bar", 2, :baz, :bat] + * + * Appends `object` to `self`; returns `self`: + * + * a = [:foo, 'bar', 2] + * a << :baz # => [:foo, "bar", 2, :baz] + */ static mrb_value mrb_ary_push_m(mrb_state *mrb, mrb_value self) { - mrb_int argc; - const mrb_value *argv; - mrb_int len, len2; - struct RArray *a; - - argc = mrb_get_argc(mrb); - argv = mrb_get_argv(mrb); - a = mrb_ary_ptr(self); + mrb_int argc = mrb_get_argc(mrb); + if (argc == 1) { + mrb_ary_push(mrb, self, mrb_get_argv(mrb)[0]); + return self; + } + struct RArray *a = mrb_ary_ptr(self); + mrb_int len = ARY_LEN(a); + mrb_int len2 = len + argc; ary_modify(mrb, a); - len = ARY_LEN(a); - len2 = len + argc; if (ARY_CAPA(a) < len2) { ary_expand_capa(mrb, a, len2); } + const mrb_value *argv = mrb_get_argv(mrb); array_copy(ARY_PTR(a)+len, argv, argc); ARY_SET_LEN(a, len2); while (argc--) { @@ -565,6 +847,16 @@ mrb_ary_push_m(mrb_state *mrb, mrb_value self) return self; } +/** + * Removes and returns the last element from an array. + * + * If the array is empty, returns `nil`. + * This function modifies the array in place. + * + * @param mrb The mruby state. + * @param ary The array (mrb_value) from which to pop the element. + * @return The last element of the array, or `nil` if the array is empty. + */ MRB_API mrb_value mrb_ary_pop(mrb_state *mrb, mrb_value ary) { @@ -579,58 +871,87 @@ mrb_ary_pop(mrb_state *mrb, mrb_value ary) #define ARY_SHIFT_SHARED_MIN 10 +/** + * Removes and returns the first element from an array. + * + * If the array is empty, returns `nil`. + * All other elements are shifted down by one index. + * This function modifies the array in place. + * + * @param mrb The mruby state. + * @param self The array (mrb_value) from which to shift the element. + * @return The first element of the array, or `nil` if the array is empty. + */ MRB_API mrb_value mrb_ary_shift(mrb_state *mrb, mrb_value self) { struct RArray *a = mrb_ary_ptr(self); mrb_int len = ARY_LEN(a); - mrb_value val; ary_modify_check(mrb, a); if (len == 0) return mrb_nil_value(); if (ARY_SHARED_P(a)) { L_SHIFT: - val = a->as.heap.ptr[0]; a->as.heap.ptr++; a->as.heap.len--; - return val; + return a->as.heap.ptr[-1]; } - if (len > ARY_SHIFT_SHARED_MIN) { + else if (len > ARY_SHIFT_SHARED_MIN) { ary_make_shared(mrb, a); goto L_SHIFT; } else { mrb_value *ptr = ARY_PTR(a); mrb_int size = len; + mrb_value val = *ptr; - val = *ptr; while (--size) { *ptr = *(ptr+1); - ++ptr; + ptr++; } ARY_SET_LEN(a, len-1); + return val; } - return val; } +/* + * call-seq: + * array.shift -> object or nil + * array.shift(n) -> new_array + * + * Removes and returns leading elements. + * + * When no argument is given, removes and returns the first element: + * + * a = [:foo, 'bar', 2] + * a.shift # => :foo + * a # => ["bar", 2] + * + * Returns `nil` if `self` is empty. + * + * When argument `n` is given, removes and returns the first `n` elements in a new Array: + * + * a = [:foo, 'bar', 2] + * a.shift(2) # => [:foo, "bar"] + * a # => [2] + */ static mrb_value mrb_ary_shift_m(mrb_state *mrb, mrb_value self) { - mrb_int n; - if (mrb_get_args(mrb, "|i", &n) == 0) { + if (mrb_get_argc(mrb) == 0) { return mrb_ary_shift(mrb, self); } + mrb_int n = mrb_as_int(mrb, mrb_get_arg1(mrb)); struct RArray *a = mrb_ary_ptr(self); mrb_int len = ARY_LEN(a); - mrb_value val; ary_modify_check(mrb, a); if (len == 0 || n == 0) return mrb_ary_new(mrb); if (n < 0) mrb_raise(mrb, E_ARGUMENT_ERROR, "negative array shift"); if (n > len) n = len; - val = mrb_ary_new_from_values(mrb, n, ARY_PTR(a)); + mrb_value val = mrb_ary_new_from_values(mrb, n, ARY_PTR(a)); if (ARY_SHARED_P(a)) { L_SHIFT: a->as.heap.ptr+=n; @@ -650,7 +971,7 @@ mrb_ary_shift_m(mrb_state *mrb, mrb_value self) while (size--) { *ptr = *(ptr+n); - ++ptr; + ptr++; } ARY_SET_LEN(a, len-n); } @@ -661,6 +982,19 @@ mrb_ary_shift_m(mrb_state *mrb, mrb_value self) item = 0 self.unshift item p self #=> [0, 1, 2, 3] */ +/** + * Prepends an element to the beginning of an array. + * + * This function adds `item` to the front of the `self` array, + * shifting all existing elements up by one index. + * The array capacity may be expanded if necessary. + * This function modifies the array in place. + * + * @param mrb The mruby state. + * @param self The array (mrb_value) to unshift the element onto. + * @param item The mrb_value to prepend to the array. + * @return The modified array (the same mrb_value as `self`). + */ MRB_API mrb_value mrb_ary_unshift(mrb_state *mrb, mrb_value self, mrb_value item) { @@ -693,7 +1027,7 @@ mrb_ary_unshift(mrb_state *mrb, mrb_value self, mrb_value item) * call-seq: * array.unshift(*objects) -> self * - * Prepends the given +objects+ to +self+: + * Prepends the given `objects` to `self`: * * a = [:foo, 'bar', 2] * a.unshift(:bam, :bat) # => [:bam, :bat, :foo, "bar", 2] @@ -707,18 +1041,18 @@ static mrb_value mrb_ary_unshift_m(mrb_state *mrb, mrb_value self) { struct RArray *a = mrb_ary_ptr(self); - const mrb_value *vals; mrb_value *ptr; - mrb_int alen, len; - mrb_get_args(mrb, "*!", &vals, &alen); + mrb_int alen = mrb_get_argc(mrb); + if (alen == 0) { ary_modify_check(mrb, a); return self; } - len = ARY_LEN(a); + const mrb_value *vals = mrb_get_argv(mrb); + mrb_int len = ARY_LEN(a); if (alen > ARY_MAX_SIZE - len) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "array size too big"); + ary_too_big(mrb); } if (ARY_SHARED_P(a) && a->as.heap.aux.shared->refcnt == 1 /* shared only referenced from this array */ @@ -745,6 +1079,22 @@ mrb_ary_unshift_m(mrb_state *mrb, mrb_value self) return self; } +/** + * Sets the element at a given index in an array. + * + * If `n` is within the current bounds of the array, the element at that index + * is replaced with `val`. + * If `n` is beyond the current bounds, the array is expanded to accommodate + * the new element, and any intermediate elements are filled with `nil`. + * If `n` is negative, it counts from the end of the array. + * An IndexError is raised if a negative index points past the beginning of the array. + * This function modifies the array in place. + * + * @param mrb The mruby state. + * @param ary The array (mrb_value) to modify. + * @param n The index at which to set the element. + * @param val The mrb_value to set at the specified index. + */ MRB_API void mrb_ary_set(mrb_state *mrb, mrb_value ary, mrb_int n, mrb_value val) { @@ -773,12 +1123,39 @@ mrb_ary_set(mrb_state *mrb, mrb_value ary, mrb_int n, mrb_value val) mrb_field_write_barrier_value(mrb, (struct RBasic*)a, val); } +/* Creates a duplicate of an array */ static struct RArray* ary_dup(mrb_state *mrb, struct RArray *a) { - return ary_new_from_values(mrb, ARY_LEN(a), ARY_PTR(a)); + struct RArray *dup = ary_new_capa(mrb, 0); + ary_replace(mrb, dup, a); + return dup; } +MRB_API mrb_value +mrb_ary_dup(mrb_state *mrb, mrb_value ary) +{ + return mrb_obj_value(ary_dup(mrb, mrb_ary_ptr(ary))); +} + +/** + * Replaces a portion of an array with elements from another array or a single value. + * + * Removes `len` elements from `ary` starting at `head` index, and inserts + * the elements from `rpl` (if `rpl` is an array) or `rpl` itself (if it's not an array) + * at that position. + * If `head` is negative, it counts from the end of the array. + * If `len` is negative, an IndexError is raised. + * If `rpl` is `mrb_undef_p()`, then the elements are removed without replacement. + * This function modifies the `ary` array in place. + * + * @param mrb The mruby state. + * @param ary The array (mrb_value) to modify. + * @param head The starting index for the splice operation. + * @param len The number of elements to remove. + * @param rpl The mrb_value to insert (can be an array or a single value, or mrb_undef_p()). + * @return The modified array (the same mrb_value as `ary`). + */ MRB_API mrb_value mrb_ary_splice(mrb_state *mrb, mrb_value ary, mrb_int head, mrb_int len, mrb_value rpl) { @@ -786,7 +1163,6 @@ mrb_ary_splice(mrb_state *mrb, mrb_value ary, mrb_int head, mrb_int len, mrb_val mrb_int alen = ARY_LEN(a); const mrb_value *argv; mrb_int argc; - mrb_int tail; ary_modify(mrb, a); @@ -802,7 +1178,8 @@ mrb_ary_splice(mrb_state *mrb, mrb_value ary, mrb_int head, mrb_int len, mrb_val out_of_range: mrb_raisef(mrb, E_INDEX_ERROR, "index %i is out of array", head); } - tail = head + len; + + mrb_int tail = head + len; if (alen < len || alen < tail) { len = alen - head; tail = head + len; @@ -820,6 +1197,14 @@ mrb_ary_splice(mrb_state *mrb, mrb_value ary, mrb_int head, mrb_int len, mrb_val } r = ary_dup(mrb, a); argv = ARY_PTR(r); + /* ary_dup -> ary_replace converts `a` to shared as a + copy-on-write optimization when len > ARY_REPLACE_SHARED_MIN. + Subsequent ARY_CAPA(a) reads would land on aux.shared's + pointer bits instead of the actual capacity, so the + expand-capa check below silently mis-sizes and value_move + walks past the buffer. Re-modify here to unshare before + mutating `a` in place. */ + ary_modify(mrb, a); } } else if (mrb_undef_p(rpl)) { @@ -843,13 +1228,11 @@ mrb_ary_splice(mrb_state *mrb, mrb_value ary, mrb_int head, mrb_int len, mrb_val ARY_SET_LEN(a, len); } else { - mrb_int newlen; - if (alen - len > ARY_MAX_SIZE - argc) { head = alen + argc - len; goto out_of_range; } - newlen = alen + argc - len; + mrb_int newlen = alen + argc - len; if (newlen > ARY_CAPA(a)) { ary_expand_capa(mrb, a, newlen); } @@ -877,6 +1260,7 @@ mrb_ary_decref(mrb_state *mrb, mrb_shared_array *shared) } } +/* Creates a subsequence array, using shared storage when appropriate */ static mrb_value ary_subseq(mrb_state *mrb, struct RArray *a, mrb_int beg, mrb_int len) { @@ -896,6 +1280,19 @@ ary_subseq(mrb_state *mrb, struct RArray *a, mrb_int beg, mrb_int len) return mrb_obj_value(b); } +/** + * Creates a new array that is a subsequence of an existing array. + * + * The new array contains `len` elements, starting from index `beg` of the + * original `ary`. + * This function attempts to create a shared array if appropriate for efficiency. + * + * @param mrb The mruby state. + * @param ary The original array (mrb_value). + * @param beg The starting index of the subsequence. + * @param len The length of the subsequence. + * @return A new mrb_value representing the subsequence array. + */ mrb_value mrb_ary_subseq(mrb_state *mrb, mrb_value ary, mrb_int beg, mrb_int len) { @@ -903,6 +1300,7 @@ mrb_ary_subseq(mrb_state *mrb, mrb_value ary, mrb_int beg, mrb_int len) return ary_subseq(mrb, a, beg, len); } +/* Converts various types to array index integer */ static mrb_int aget_index(mrb_state *mrb, mrb_value index) { @@ -932,16 +1330,16 @@ aget_index(mrb_state *mrb, mrb_value index) * ary.slice(start, length) -> new_ary or nil * ary.slice(range) -> new_ary or nil * - * Element Reference --- Returns the element at +index+, or returns a - * subarray starting at the +start+ index and continuing for +length+ - * elements, or returns a subarray specified by +range+ of indices. + * Element Reference --- Returns the element at `index`, or returns a + * subarray starting at the `start` index and continuing for `length` + * elements, or returns a subarray specified by `range` of indices. * * Negative indices count backward from the end of the array (-1 is the last - * element). For +start+ and +range+ cases the starting index is just before + * element). For `start` and `range` cases the starting index is just before * an element. Additionally, an empty array is returned when the starting * index for an element range is at the end of the array. * - * Returns +nil+ if the index (or starting index) are out of range. + * Returns `nil` if the index (or starting index) are out of range. * * a = [ "a", "b", "c", "d", "e" ] * a[1] => "b" @@ -954,8 +1352,7 @@ static mrb_value mrb_ary_aget(mrb_state *mrb, mrb_value self) { struct RArray *a = mrb_ary_ptr(self); - mrb_int i; - mrb_int len, alen; + mrb_int i, len; mrb_value index; if (mrb_get_argc(mrb) == 1) { @@ -978,7 +1375,7 @@ mrb_ary_aget(mrb_state *mrb, mrb_value self) mrb_get_args(mrb, "oi", &index, &len); i = aget_index(mrb, index); - alen = ARY_LEN(a); + mrb_int alen = ARY_LEN(a); if (i < 0) i += alen; if (i < 0 || alen < i) return mrb_nil_value(); if (len < 0) return mrb_nil_value(); @@ -994,16 +1391,16 @@ mrb_ary_aget(mrb_state *mrb, mrb_value self) * ary[start, length] = obj or other_ary or nil -> obj or other_ary or nil * ary[range] = obj or other_ary or nil -> obj or other_ary or nil * - * Element Assignment --- Sets the element at +index+, or replaces a subarray - * from the +start+ index for +length+ elements, or replaces a subarray - * specified by the +range+ of indices. + * Element Assignment --- Sets the element at `index`, or replaces a subarray + * from the `start` index for `length` elements, or replaces a subarray + * specified by the `range` of indices. * * If indices are greater than the current capacity of the array, the array - * grows automatically. Elements are inserted into the array at +start+ if - * +length+ is zero. + * grows automatically. Elements are inserted into the array at `start` if + * `length` is zero. * * Negative indices will count backward from the end of the array. For - * +start+ and +range+ cases the starting index is just before an element. + * `start` and `range` cases the starting index is just before an element. * * An IndexError is raised if a negative index points past the beginning of * the array. @@ -1027,10 +1424,9 @@ static mrb_value mrb_ary_aset(mrb_state *mrb, mrb_value self) { mrb_value v1, v2, v3; - mrb_int i, len; - ary_modify(mrb, mrb_ary_ptr(self)); if (mrb_get_argc(mrb) == 2) { + mrb_int i, len; const mrb_value *vs = mrb_get_argv(mrb); v1 = vs[0]; v2 = vs[1]; @@ -1059,25 +1455,21 @@ mrb_value mrb_ary_delete_at(mrb_state *mrb, mrb_value self) { struct RArray *a = mrb_ary_ptr(self); - mrb_int index; - mrb_value val; - mrb_value *ptr; - mrb_int len, alen; - mrb_get_args(mrb, "i", &index); - alen = ARY_LEN(a); + mrb_int index = mrb_as_int(mrb, mrb_get_arg1(mrb)); + mrb_int alen = ARY_LEN(a); if (index < 0) index += alen; if (index < 0 || alen <= index) return mrb_nil_value(); ary_modify(mrb, a); - ptr = ARY_PTR(a); - val = ptr[index]; + mrb_value *ptr = ARY_PTR(a); + mrb_value val = ptr[index]; ptr += index; - len = alen - index; + mrb_int len = alen - index; while (--len) { *ptr = *(ptr+1); - ++ptr; + ptr++; } ARY_SET_LEN(a, alen-1); @@ -1086,21 +1478,43 @@ mrb_ary_delete_at(mrb_state *mrb, mrb_value self) return val; } +/* + * call-seq: + * array.first -> object or nil + * array.first(n) -> new_array + * + * Returns elements from the beginning of `self`. + * + * When no argument is given, returns the first element: + * + * a = [:foo, 'bar', 2] + * a.first # => :foo + * a # => [:foo, "bar", 2] + * + * If `self` is empty, returns `nil`. + * + * When non-negative Integer argument `n` is given, + * returns the first `n` elements in a new Array: + * + * a = [:foo, 'bar', 2] + * a.first(2) # => [:foo, "bar"] + */ static mrb_value mrb_ary_first(mrb_state *mrb, mrb_value self) { struct RArray *a = mrb_ary_ptr(self); - mrb_int size, alen; + mrb_int size; if (mrb_get_argc(mrb) == 0) { - return (ARY_LEN(a) > 0)? ARY_PTR(a)[0]: mrb_nil_value(); + if (ARY_LEN(a) > 0) return ARY_PTR(a)[0]; + return mrb_nil_value(); } mrb_get_args(mrb, "|i", &size); if (size < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "negative array size"); } - alen = ARY_LEN(a); + mrb_int alen = ARY_LEN(a); if (size > alen) size = alen; if (ARY_SHARED_P(a)) { return ary_subseq(mrb, a, 0, size); @@ -1108,18 +1522,39 @@ mrb_ary_first(mrb_state *mrb, mrb_value self) return mrb_ary_new_from_values(mrb, size, ARY_PTR(a)); } +/* + * call-seq: + * array.last -> object or nil + * array.last(n) -> new_array + * + * Returns elements from the end of `self`. + * + * When no argument is given, returns the last element: + * + * a = [:foo, 'bar', 2] + * a.last # => 2 + * a # => [:foo, "bar", 2] + * + * If `self` is empty, returns `nil`. + * + * When non-negative Integer argument `n` is given, + * returns the last `n` elements in a new Array: + * + * a = [:foo, 'bar', 2] + * a.last(2) # => ["bar", 2] + */ static mrb_value mrb_ary_last(mrb_state *mrb, mrb_value self) { struct RArray *a = mrb_ary_ptr(self); - mrb_int n, size, alen; + mrb_int alen = ARY_LEN(a); - n = mrb_get_args(mrb, "|i", &size); - alen = ARY_LEN(a); - if (n == 0) { - return (alen > 0) ? ARY_PTR(a)[alen - 1]: mrb_nil_value(); + if (mrb_get_argc(mrb) == 0) { + if (alen > 0) return ARY_PTR(a)[alen - 1]; + return mrb_nil_value(); } + mrb_int size = mrb_integer(mrb_get_arg1(mrb)); if (size < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "negative array size"); } @@ -1130,41 +1565,107 @@ mrb_ary_last(mrb_state *mrb, mrb_value self) return mrb_ary_new_from_values(mrb, size, ARY_PTR(a) + alen - size); } +/* + * call-seq: + * ary.index(val) -> int or nil + * ary.index {|item| block } -> int or nil + * array.index -> enumerator + * + * Returns the _index_ of the first object in `ary` such that the object is + * `==` to `obj`. + * + * If a block is given instead of an argument, returns the _index_ of the + * first object for which the block returns `true`. Returns `nil` if no + * match is found. + * + * ISO 15.2.12.5.14 + */ static mrb_value mrb_ary_index_m(mrb_state *mrb, mrb_value self) { - mrb_value obj = mrb_get_arg1(mrb); - mrb_int i; + mrb_value obj, blk; - for (i = 0; i < RARRAY_LEN(self); i++) { - if (mrb_equal(mrb, RARRAY_PTR(self)[i], obj)) { - return mrb_int_value(mrb, i); + if (mrb_get_args(mrb, "|o&", &obj, &blk) == 0 && mrb_nil_p(blk)) { + return mrb_funcall_argv1(mrb, self, MRB_SYM(to_enum), mrb_symbol_value(MRB_SYM(index))); + } + + if (mrb_nil_p(blk)) { + for (mrb_int i = 0; i < RARRAY_LEN(self); i++) { + if (mrb_equal(mrb, RARRAY_PTR(self)[i], obj)) { + return mrb_int_value(mrb, i); + } + } + } + else { + for (mrb_int i = 0; i < RARRAY_LEN(self); i++) { + mrb_value eq = mrb_yield(mrb, blk, RARRAY_PTR(self)[i]); + if (mrb_test(eq)) { + return mrb_int_value(mrb, i); + } } } return mrb_nil_value(); } +/* + * call-seq: + * ary.rindex(val) -> int or nil + * ary.rindex {|item| block } -> int or nil + * array.rindex -> enumerator + * + * Returns the _index_ of the first object in `ary` such that the object is + * `==` to `obj`. + * + * If a block is given instead of an argument, returns the _index_ of the + * first object for which the block returns `true`. Returns `nil` if no + * match is found. + * + * ISO 15.2.12.5.26 + */ static mrb_value mrb_ary_rindex_m(mrb_state *mrb, mrb_value self) { - mrb_value obj = mrb_get_arg1(mrb); - mrb_int i, len; + mrb_value obj, blk; + + if (mrb_get_args(mrb, "|o&", &obj, &blk) == 0 && mrb_nil_p(blk)) { + return mrb_funcall_argv1(mrb, self, MRB_SYM(to_enum), mrb_symbol_value(MRB_SYM(rindex))); + } - for (i = RARRAY_LEN(self) - 1; i >= 0; i--) { - if (mrb_equal(mrb, RARRAY_PTR(self)[i], obj)) { + for (mrb_int i = RARRAY_LEN(self) - 1; i >= 0; i--) { + if (mrb_nil_p(blk)) { + if (mrb_equal(mrb, RARRAY_PTR(self)[i], obj)) { return mrb_int_value(mrb, i); + } + } + else { + mrb_value eq = mrb_yield(mrb, blk, RARRAY_PTR(self)[i]); + if (mrb_test(eq)) return mrb_int_value(mrb, i); } - if (i > (len = RARRAY_LEN(self))) { + mrb_int len = RARRAY_LEN(self); + if (i > len) { i = len; } } return mrb_nil_value(); } +/** + * Creates a new array from a given value, performing a "splat" operation. + * + * If `v` is already an array, a duplicate of `v` is returned. + * If `v` responds to `to_a`, it is called, and if the result is an array, + * a duplicate of that result is returned. If `to_a` returns `nil` or something + * other than an array, `v` itself is wrapped in a new, single-element array. + * Otherwise (if `v` is not an array and does not respond to `to_a`), + * `v` itself is wrapped in a new, single-element array. + * + * @param mrb The mruby state. + * @param v The mrb_value to convert into an array. + * @return A new mrb_value representing the "splatted" array. + */ MRB_API mrb_value mrb_ary_splat(mrb_state *mrb, mrb_value v) { - mrb_value ary; struct RArray *a; if (mrb_array_p(v)) { @@ -1176,7 +1677,7 @@ mrb_ary_splat(mrb_state *mrb, mrb_value v) return mrb_ary_new_from_values(mrb, 1, &v); } - ary = mrb_funcall_id(mrb, v, MRB_SYM(to_a), 0); + mrb_value ary = mrb_funcall_argv(mrb, v, MRB_SYM(to_a), 0, NULL); if (mrb_nil_p(ary)) { return mrb_ary_new_from_values(mrb, 1, &v); } @@ -1186,6 +1687,16 @@ mrb_ary_splat(mrb_state *mrb, mrb_value v) return mrb_obj_value(a); } +/* + * call-seq: + * array.size -> integer + * array.length -> integer + * + * Returns the count of elements in `self`: + * + * [0, 1, 2].size # => 3 + * [].size # => 0 + */ static mrb_value mrb_ary_size(mrb_state *mrb, mrb_value self) { @@ -1194,6 +1705,15 @@ mrb_ary_size(mrb_state *mrb, mrb_value self) return mrb_int_value(mrb, ARY_LEN(a)); } +/** + * Removes all elements from an array, making it empty. + * + * This function modifies the array in place. + * + * @param mrb The mruby state. + * @param self The array (mrb_value) to clear. + * @return The cleared (now empty) array (the same mrb_value as `self`). + */ MRB_API mrb_value mrb_ary_clear(mrb_state *mrb, mrb_value self) { @@ -1218,12 +1738,16 @@ mrb_ary_clear(mrb_state *mrb, mrb_value self) return self; } -static mrb_value -mrb_ary_clear_m(mrb_state *mrb, mrb_value self) -{ - return mrb_ary_clear(mrb, self); -} - +/* + * call-seq: + * array.empty? -> true or false + * + * Returns `true` if the count of elements in `self` is zero, + * `false` otherwise: + * + * [].empty? # => true + * [0].empty? # => false + */ static mrb_value mrb_ary_empty_p(mrb_state *mrb, mrb_value self) { @@ -1232,6 +1756,19 @@ mrb_ary_empty_p(mrb_state *mrb, mrb_value self) return mrb_bool_value(ARY_LEN(a) == 0); } +/** + * Retrieves an element from an array at a specific index. + * This is a direct (unsafe) equivalent of `RARRAY_PTR(ary)[n]`. + * + * If `n` is negative, it counts from the end of the array. + * Returns `nil` if the index is out of bounds. + * This function does not perform a bounds check before accessing the element if the index is positive. + * Prefer using `mrb_ary_ref` for safe access or ensure `n` is within bounds. + * + * @param ary The array (mrb_value) from which to retrieve the element. + * @param n The index of the element to retrieve. + * @return The mrb_value at the specified index, or `nil` if out of bounds. + */ MRB_API mrb_value mrb_ary_entry(mrb_value ary, mrb_int n) { @@ -1248,11 +1785,8 @@ mrb_ary_entry(mrb_value ary, mrb_int n) static mrb_value join_ary(mrb_state *mrb, mrb_value ary, mrb_value sep, mrb_value list) { - mrb_int i; - mrb_value result, val, tmp; - /* check recursive */ - for (i=0; i 0 && !mrb_nil_p(sep)) { mrb_str_cat_str(mrb, result, sep); } - val = RARRAY_PTR(ary)[i]; + mrb_value val = RARRAY_PTR(ary)[i]; + switch (mrb_type(val)) { case MRB_TT_ARRAY: ary_join: @@ -1281,7 +1816,7 @@ join_ary(mrb_state *mrb, mrb_value ary, mrb_value sep, mrb_value list) default: if (!mrb_immediate_p(val)) { - tmp = mrb_check_string_type(mrb, val); + mrb_value tmp = mrb_check_string_type(mrb, val); if (!mrb_nil_p(tmp)) { val = tmp; goto str_join; @@ -1302,6 +1837,20 @@ join_ary(mrb_state *mrb, mrb_value ary, mrb_value sep, mrb_value list) return result; } +/** + * Joins the elements of an array into a string, separated by a given separator. + * + * Each element of `ary` is converted to a string. These strings are then + * concatenated, with the string representation of `sep` inserted between + * adjacent elements. + * If `sep` is `nil`, no separator is used. + * This function handles recursive array joins by raising an E_ARGUMENT_ERROR. + * + * @param mrb The mruby state. + * @param ary The array (mrb_value) whose elements are to be joined. + * @param sep The separator (mrb_value) to use between elements. Can be `nil`. + * @return A new mrb_value string representing the joined array elements. + */ MRB_API mrb_value mrb_ary_join(mrb_state *mrb, mrb_value ary, mrb_value sep) { @@ -1316,7 +1865,7 @@ mrb_ary_join(mrb_state *mrb, mrb_value ary, mrb_value sep) * ary.join(sep="") -> str * * Returns a string created by converting each element of the array to - * a string, separated by sep. + * a string, separated by *sep*. * * [ "a", "b", "c" ].join #=> "abc" * [ "a", "b", "c" ].join("-") #=> "a-b-c" @@ -1331,33 +1880,138 @@ mrb_ary_join_m(mrb_state *mrb, mrb_value ary) return mrb_ary_join(mrb, ary, sep); } +/* + * call-seq: + * ary.to_s -> string + * ary.inspect -> string + * + * Return the contents of this array as a string. + */ +static mrb_value +mrb_ary_to_s(mrb_state *mrb, mrb_value self) +{ + mrb->c->ci->mid = MRB_SYM(inspect); + mrb_value ret = mrb_str_new_lit(mrb, "["); + int ai = mrb_gc_arena_save(mrb); + if (MRB_RECURSIVE_UNARY_P(mrb, MRB_SYM(inspect), self)) { + mrb_str_cat_lit(mrb, ret, "...]"); + return ret; + } + for (mrb_int i=0; i0) mrb_str_cat_lit(mrb, ret, ", "); + mrb_str_cat_str(mrb, ret, mrb_inspect(mrb, RARRAY_PTR(self)[i])); + mrb_gc_arena_restore(mrb, ai); + } + mrb_str_cat_lit(mrb, ret, "]"); + + return ret; +} + +/* check array equality: 1=equal,0=not_equal,-1=need_elements_check */ +static mrb_int +ary_eq(mrb_state *mrb, mrb_value ary1, mrb_value ary2) +{ + if (mrb_obj_equal(mrb, ary1, ary2)) return 1; + if (!mrb_array_p(ary2)) return 0; + if (RARRAY_LEN(ary1) != RARRAY_LEN(ary2)) return 0; + + return -1; +} + +/* + * call-seq: + * array == other -> true or false + * + * Equality---Two arrays are equal if they contain the same number + * of elements and if each element is equal to (according to + * Object.==) the corresponding element in the other array. + * + */ static mrb_value mrb_ary_eq(mrb_state *mrb, mrb_value ary1) { mrb_value ary2 = mrb_get_arg1(mrb); + mrb_int n = ary_eq(mrb, ary1, ary2); + + if (n == 1) return mrb_true_value(); + if (n == 0) return mrb_false_value(); - mrb->c->ci->mid = 0; - if (mrb_obj_equal(mrb, ary1, ary2)) return mrb_true_value(); - if (!mrb_array_p(ary2)) { + /* Check for recursion */ + if (MRB_RECURSIVE_BINARY_FUNC_P(mrb, MRB_OPSYM(eq), ary1, ary2)) { return mrb_false_value(); } - if (RARRAY_LEN(ary1) != RARRAY_LEN(ary2)) return mrb_false_value(); - return ary2; + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i=0; i true or false + * + * Returns `true` if `self` and _other_ are the same object, + * or are both arrays with the same content. + * + */ +static mrb_value +mrb_ary_eql(mrb_state *mrb, mrb_value ary1) +{ + mrb_value ary2 = mrb_get_arg1(mrb); + mrb_int n = ary_eq(mrb, ary1, ary2); + + if (n == 1) return mrb_true_value(); + if (n == 0) return mrb_false_value(); + + /* Check for recursion */ + if (MRB_RECURSIVE_BINARY_FUNC_P(mrb, MRB_SYM_Q(eql), ary1, ary2)) { + return mrb_false_value(); + } + + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i=0; i other_array -> -1, 0, or 1 + * + * Comparison---Returns an integer (-1, 0, or +1) + * if this array is less than, equal to, or greater than *other_ary*. + * Each object in each array is compared (using <=>). If any value isn't + * equal, then that inequality is the return value. If all the + * values found are equal, then the return is based on a + * comparison of the array lengths. Thus, two arrays are + * "equal" according to `Array#<=>` if and only if they have + * the same length and the value of each element is equal to the + * value of the corresponding element in the other array. + */ static mrb_value mrb_ary_cmp(mrb_state *mrb, mrb_value ary1) { mrb_value ary2 = mrb_get_arg1(mrb); - mrb->c->ci->mid = 0; if (mrb_obj_equal(mrb, ary1, ary2)) return mrb_fixnum_value(0); - if (!mrb_array_p(ary2)) { - return mrb_nil_value(); - } + if (!mrb_array_p(ary2)) return mrb_nil_value(); - return ary2; + for (mrb_int i=0; i 0) return mrb_fixnum_value(1); + else return mrb_fixnum_value(-1); } /* internal method to convert multi-value to single value */ @@ -1374,44 +2028,509 @@ mrb_ary_svalue(mrb_state *mrb, mrb_value ary) } } +/* + * call-seq: + * array.delete(obj) -> deleted_object + * array.delete(obj) {|nosuch| ... } -> deleted_object or block_return + * + * Removes zero or more elements from self; returns self. + * + * When no block is given, removes from self each element e such + * that e == obj; returns the last deleted element + * + * Returns nil if no elements removed. + * + * When a block is given, removes from self each element e such + * that e == obj. If any such elements are found, ignores the block and + * returns the last. Otherwise, returns the block's return value. + */ +static mrb_value +mrb_ary_delete(mrb_state *mrb, mrb_value self) +{ + mrb_value obj, blk; + + mrb_get_args(mrb, "o&", &obj, &blk); + + struct RArray *ary = RARRAY(self); + mrb_value ret = obj; + int ai = mrb_gc_arena_save(mrb); + mrb_int i = 0; + mrb_int j = 0; + for (; i < ARY_LEN(ary); i++) { + mrb_value elem = ARY_PTR(ary)[i]; + + if (mrb_equal(mrb, elem, obj)) { + mrb_gc_arena_restore(mrb, ai); + mrb_gc_protect(mrb, elem); + ret = elem; + continue; + } + + if (i != j) { + if (j >= ARY_LEN(ary)) { + // Since breaking here will further change the array length, + // there is no choice but to raise an exception or return. + mrb_raise(mrb, E_RUNTIME_ERROR, "array modified during delete"); + } + ary_modify(mrb, ary); + ARY_PTR(ary)[j] = elem; + } + + j++; + } + + if (i == j) { + if (mrb_nil_p(blk)) return mrb_nil_value(); + return mrb_yield(mrb, blk, obj); + } + + ARY_SET_LEN(ary, j); + return ret; +} + + +#define SMALL_ARRAY_SORT_THRESHOLD 16 + +/* Check if all elements in the array are integers (fast path candidate) */ +static mrb_bool +ary_all_fixnum_p(const mrb_value *a, mrb_int n) +{ + for (mrb_int i = 0; i < n; i++) { + if (!mrb_integer_p(a[i])) return FALSE; + } + return TRUE; +} + +/* Integer-specialized heapify: no sort_cmp overhead, direct comparison */ +static void +heapify_fixnum(mrb_value *a, mrb_int index, mrb_int size) +{ + mrb_int val = mrb_integer(a[index]); + + while (1) { + mrb_int child = 2 * index + 1; + if (child >= size) break; + if (child + 1 < size && mrb_integer(a[child + 1]) > mrb_integer(a[child])) { + child++; + } + if (mrb_integer(a[child]) <= val) break; + a[index] = a[child]; + index = child; + } + SET_FIXNUM_VALUE(a[index], val); +} + +/* Integer-specialized Floyd's bottom-up heap deletion */ +static void +heap_delete_root_fixnum(mrb_value *a, mrb_int size) +{ + mrb_int last = mrb_integer(a[0]); + + mrb_int hole = 0; + mrb_int child = 1; + while (child + 1 < size) { + if (mrb_integer(a[child + 1]) > mrb_integer(a[child])) { + child++; + } + a[hole] = a[child]; + hole = child; + child = 2 * hole + 1; + } + if (child < size) { + a[hole] = a[child]; + hole = child; + } + + while (hole > 0) { + mrb_int parent = (hole - 1) / 2; + if (mrb_integer(a[parent]) >= last) break; + a[hole] = a[parent]; + hole = parent; + } + SET_FIXNUM_VALUE(a[hole], last); +} + +/* Integer-specialized insertion sort */ +static void +insertion_sort_fixnum(mrb_value *a, mrb_int size) +{ + for (mrb_int i = 1; i < size; i++) { + mrb_int key = mrb_integer(a[i]); + mrb_int j = i - 1; + while (j >= 0 && mrb_integer(a[j]) > key) { + a[j + 1] = a[j]; + j--; + } + SET_FIXNUM_VALUE(a[j + 1], key); + } +} + +/* Check if all elements are plain String (not subclass) */ +static mrb_bool +ary_all_string_p(mrb_state *mrb, const mrb_value *a, mrb_int n) +{ + for (mrb_int i = 0; i < n; i++) { + if (!mrb_string_p(a[i])) return FALSE; + if (mrb_obj_ptr(a[i])->c != mrb->string_class) return FALSE; + } + return TRUE; +} + +/* String-specialized heapify using mrb_str_cmp directly */ +static void +heapify_str(mrb_state *mrb, mrb_value *a, mrb_int index, mrb_int size) +{ + mrb_value val = a[index]; + + while (1) { + mrb_int child = 2 * index + 1; + if (child >= size) break; + if (child + 1 < size && mrb_str_cmp(mrb, a[child + 1], a[child]) > 0) { + child++; + } + if (mrb_str_cmp(mrb, a[child], val) <= 0) break; + a[index] = a[child]; + index = child; + } + a[index] = val; +} + +/* String-specialized Floyd's bottom-up heap deletion */ +static void +heap_delete_root_str(mrb_state *mrb, mrb_value *a, mrb_int size) +{ + mrb_value last = a[0]; + + mrb_int hole = 0; + mrb_int child = 1; + while (child + 1 < size) { + if (mrb_str_cmp(mrb, a[child + 1], a[child]) > 0) { + child++; + } + a[hole] = a[child]; + hole = child; + child = 2 * hole + 1; + } + if (child < size) { + a[hole] = a[child]; + hole = child; + } + + while (hole > 0) { + mrb_int parent = (hole - 1) / 2; + if (mrb_str_cmp(mrb, a[parent], last) >= 0) break; + a[hole] = a[parent]; + hole = parent; + } + a[hole] = last; +} + +/* String-specialized insertion sort */ +static void +insertion_sort_str(mrb_state *mrb, mrb_value *a, mrb_int size) +{ + for (mrb_int i = 1; i < size; i++) { + mrb_value key = a[i]; + mrb_int j = i - 1; + while (j >= 0 && mrb_str_cmp(mrb, a[j], key) > 0) { + a[j + 1] = a[j]; + j--; + } + a[j + 1] = key; + } +} + +static mrb_bool +sort_cmp(mrb_state *mrb, mrb_value ary, mrb_value a_val, mrb_value b_val, mrb_value blk) +{ + mrb_value *p = RARRAY_PTR(ary); + mrb_int n = RARRAY_LEN(ary); + + mrb_int cmp; + int ai = mrb_gc_arena_save(mrb); + + if (mrb_nil_p(blk)) { + enum mrb_vtype type_a = mrb_type(a_val); + enum mrb_vtype type_b = mrb_type(b_val); + + if (type_a == type_b) { + switch (type_a) { + case MRB_TT_FIXNUM: + cmp = (mrb_fixnum(a_val) > mrb_fixnum(b_val)) ? 1 : (mrb_fixnum(a_val) < mrb_fixnum(b_val)) ? -1 : 0; + break; +#ifndef MRB_NO_FLOAT + case MRB_TT_FLOAT: + cmp = (mrb_float(a_val) > mrb_float(b_val)) ? 1 : (mrb_float(a_val) < mrb_float(b_val)) ? -1 : 0; + break; +#endif + case MRB_TT_STRING: + cmp = mrb_str_cmp(mrb, a_val, b_val); + break; + default: + cmp = mrb_cmp(mrb, a_val, b_val); + break; + } + } + else { + cmp = mrb_cmp(mrb, a_val, b_val); + } + } + else { + mrb_value args[2] = {a_val, b_val}; + mrb_value c = mrb_yield_argv(mrb, blk, 2, args); + if (mrb_nil_p(c) || !mrb_fixnum_p(c)) { + cmp = -2; + } + else { + cmp = mrb_fixnum(c); + } + } + mrb_gc_arena_restore(mrb, ai); + if (cmp == -2) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "comparison failed"); + } + if (RARRAY_PTR(ary) != p || RARRAY_LEN(ary) != n) { + mrb_raise(mrb, E_RUNTIME_ERROR, "array modified during sort"); + } + return cmp > 0; +} + +/* Hole-style sift-down: save root, move larger children up, write once at end. + Reduces assignments from 3 per level (swap) to 1 per level (move). */ +static void +heapify(mrb_state *mrb, mrb_value ary, mrb_value *a, mrb_int index, mrb_int size, mrb_value blk) +{ + int ai = mrb_gc_arena_save(mrb); + mrb_value val = a[index]; /* save root to hole */ + mrb_gc_protect(mrb, val); + + while (1) { + mrb_int child = 2 * index + 1; + if (child >= size) break; + + /* pick the larger child */ + if (child + 1 < size && sort_cmp(mrb, ary, a[child + 1], a[child], blk)) { + child++; + } + /* if hole value >= larger child, done */ + if (!sort_cmp(mrb, ary, a[child], val, blk)) break; + + a[index] = a[child]; /* move child up */ + index = child; + } + a[index] = val; /* place saved value */ + mrb_gc_arena_restore(mrb, ai); +} + +/* Floyd's bottom-up heap deletion: sift the hole down to a leaf without + comparing against the removed root, then sift up from the leaf position. + This reduces comparisons from ~2 log n to ~log n per extraction, + because most elements end up near the bottom of the heap anyway. */ +static void +heap_delete_root(mrb_state *mrb, mrb_value ary, mrb_value *a, mrb_int size, mrb_value blk) +{ + int ai = mrb_gc_arena_save(mrb); + /* a[0] already holds the value to be re-inserted (set by caller) */ + mrb_value last = a[0]; + mrb_gc_protect(mrb, last); + + /* Phase 1: sift the hole down to a leaf (only child-child comparisons) */ + mrb_int hole = 0; + mrb_int child = 1; + while (child + 1 < size) { + /* pick the larger child - 1 comparison per level */ + if (sort_cmp(mrb, ary, a[child + 1], a[child], blk)) { + child++; + } + a[hole] = a[child]; + hole = child; + child = 2 * hole + 1; + } + /* handle single child at bottom */ + if (child < size) { + a[hole] = a[child]; + hole = child; + } + + /* Phase 2: sift up from hole to find correct position for last */ + while (hole > 0) { + mrb_int parent = (hole - 1) / 2; + if (!sort_cmp(mrb, ary, last, a[parent], blk)) break; + a[hole] = a[parent]; + hole = parent; + } + a[hole] = last; + mrb_gc_arena_restore(mrb, ai); +} + +static void +insertion_sort(mrb_state *mrb, mrb_value ary, mrb_value *a, mrb_int size, mrb_value blk) +{ + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 1; i < size; i++) { + mrb_value key = a[i]; + mrb_int j = i - 1; + + /* Protect key from GC - it's temporarily out of the array during sort */ + mrb_gc_protect(mrb, key); + + /* Move elements that are greater than key to one position ahead */ + while (j >= 0 && sort_cmp(mrb, ary, a[j], key, blk)) { + a[j + 1] = a[j]; + j--; + } + a[j + 1] = key; + mrb_gc_arena_restore(mrb, ai); + } +} + +/* + * call-seq: + * array.sort! -> self + * array.sort! {|a, b| ... } -> self + * + * Sort all elements and replace `self` with these + * elements. + */ +static mrb_value +mrb_ary_sort_bang(mrb_state *mrb, mrb_value ary) +{ + mrb_value blk; + + mrb_int n = RARRAY_LEN(ary); + if (n < 2) return ary; + + ary_modify(mrb, mrb_ary_ptr(ary)); + mrb_get_args(mrb, "&", &blk); + + mrb_value *a = RARRAY_PTR(ary); + + /* Integer fast path: no block and all elements are integers */ + if (mrb_nil_p(blk) && ary_all_fixnum_p(a, n)) { + if (n <= SMALL_ARRAY_SORT_THRESHOLD) { + insertion_sort_fixnum(a, n); + } + else { + for (mrb_int i = n / 2 - 1; i >= 0; i--) { + heapify_fixnum(a, i, n); + } + for (mrb_int i = n - 1; i > 0; i--) { + mrb_value tmp = a[0]; + a[0] = a[i]; + a[i] = tmp; + heap_delete_root_fixnum(a, i); + } + } + return ary; + } + + /* String fast path: no block and all elements are plain String */ + if (mrb_nil_p(blk) && ary_all_string_p(mrb, a, n)) { + if (n <= SMALL_ARRAY_SORT_THRESHOLD) { + insertion_sort_str(mrb, a, n); + } + else { + for (mrb_int i = n / 2 - 1; i >= 0; i--) { + heapify_str(mrb, a, i, n); + } + for (mrb_int i = n - 1; i > 0; i--) { + mrb_value tmp = a[0]; + a[0] = a[i]; + a[i] = tmp; + heap_delete_root_str(mrb, a, i); + } + } + return ary; + } + + /* General path */ + if (n <= SMALL_ARRAY_SORT_THRESHOLD) { + /* Use insertion sort for small arrays */ + insertion_sort(mrb, ary, a, n, blk); + } + else { + /* Heap sort with Floyd's bottom-up deletion */ + /* Phase 1: build max-heap (standard sift-down, hole style) */ + for (mrb_int i = n / 2 - 1; i >= 0; i--) { + heapify(mrb, ary, a, i, n, blk); + } + /* Phase 2: extract max elements using Floyd's method */ + for (mrb_int i = n - 1; i > 0; i--) { + mrb_value max = a[0]; + a[0] = a[i]; /* temporary for GC safety */ + a[i] = max; /* max goes to final position */ + heap_delete_root(mrb, ary, a, i, blk); + } + } + return ary; +} + +/* + * call-seq: + * array.to_a -> self + * + * Returns self. If called on a subclass of Array, converts + * the receiver to an Array object. + */ +static mrb_value +mrb_ary_to_a(mrb_state *mrb, mrb_value self) +{ + if (mrb_obj_class(mrb, self) != mrb->array_class) { + /* Convert subclass to Array */ + return mrb_ary_dup(mrb, self); + } + return self; +} + +/* ---------------------------*/ +static const mrb_mt_entry array_rom_entries[] = { + MRB_MT_ENTRY(mrb_ary_plus, MRB_OPSYM(add), MRB_ARGS_REQ(1)), /* 15.2.12.5.1 */ + MRB_MT_ENTRY(mrb_ary_times, MRB_OPSYM(mul), MRB_ARGS_REQ(1)), /* 15.2.12.5.2 */ + MRB_MT_ENTRY(mrb_ary_push_m, MRB_OPSYM(lshift), MRB_ARGS_REQ(1)), /* 15.2.12.5.3 */ + MRB_MT_ENTRY(mrb_ary_aget, MRB_OPSYM(aref), MRB_ARGS_ARG(1,1)), /* 15.2.12.5.4 */ + MRB_MT_ENTRY(mrb_ary_aset, MRB_OPSYM(aset), MRB_ARGS_ARG(2,1)), /* 15.2.12.5.5 */ + MRB_MT_ENTRY(mrb_ary_clear, MRB_SYM(clear), MRB_ARGS_NONE()), /* 15.2.12.5.6 */ + MRB_MT_ENTRY(mrb_ary_cmp, MRB_OPSYM(cmp), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_ary_concat_m, MRB_SYM(concat), MRB_ARGS_REQ(1)), /* 15.2.12.5.8 */ + MRB_MT_ENTRY(mrb_ary_delete, MRB_SYM(delete), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_ary_delete_at, MRB_SYM(delete_at), MRB_ARGS_REQ(1)), /* 15.2.12.5.9 */ + MRB_MT_ENTRY(mrb_ary_empty_p, MRB_SYM_Q(empty), MRB_ARGS_NONE()), /* 15.2.12.5.12 */ + MRB_MT_ENTRY(mrb_ary_eq, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_ary_eql, MRB_SYM_Q(eql), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_ary_first, MRB_SYM(first), MRB_ARGS_OPT(1)), /* 15.2.12.5.13 */ + MRB_MT_ENTRY(mrb_ary_index_m, MRB_SYM(index), MRB_ARGS_OPT(1)), /* 15.2.12.5.14 */ + MRB_MT_ENTRY(mrb_ary_init, MRB_SYM(initialize), MRB_ARGS_OPT(2) | MRB_MT_PRIVATE), /* 15.2.12.5.15 */ + MRB_MT_ENTRY(mrb_ary_replace_m, MRB_SYM(initialize_copy), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), /* 15.2.12.5.16 */ + MRB_MT_ENTRY(mrb_ary_join_m, MRB_SYM(join), MRB_ARGS_OPT(1)), /* 15.2.12.5.17 */ + MRB_MT_ENTRY(mrb_ary_last, MRB_SYM(last), MRB_ARGS_OPT(1)), /* 15.2.12.5.18 */ + MRB_MT_ENTRY(mrb_ary_size, MRB_SYM(length), MRB_ARGS_NONE()), /* 15.2.12.5.19 */ + MRB_MT_ENTRY(mrb_ary_pop, MRB_SYM(pop), MRB_ARGS_NONE()), /* 15.2.12.5.21 */ + MRB_MT_ENTRY(mrb_ary_push_m, MRB_SYM(push), MRB_ARGS_ANY()), /* 15.2.12.5.22 */ + MRB_MT_ENTRY(mrb_ary_replace_m, MRB_SYM(replace), MRB_ARGS_REQ(1)), /* 15.2.12.5.23 */ + MRB_MT_ENTRY(mrb_ary_reverse, MRB_SYM(reverse), MRB_ARGS_NONE()), /* 15.2.12.5.24 */ + MRB_MT_ENTRY(mrb_ary_reverse_bang, MRB_SYM_B(reverse), MRB_ARGS_NONE()), /* 15.2.12.5.25 */ + MRB_MT_ENTRY(mrb_ary_rindex_m, MRB_SYM(rindex), MRB_ARGS_OPT(1)), /* 15.2.12.5.26 */ + MRB_MT_ENTRY(mrb_ary_shift_m, MRB_SYM(shift), MRB_ARGS_OPT(1)), /* 15.2.12.5.27 */ + MRB_MT_ENTRY(mrb_ary_size, MRB_SYM(size), MRB_ARGS_NONE()), /* 15.2.12.5.28 */ + MRB_MT_ENTRY(mrb_ary_aget, MRB_SYM(slice), MRB_ARGS_ARG(1,1)), /* 15.2.12.5.29 */ + MRB_MT_ENTRY(mrb_ary_unshift_m, MRB_SYM(unshift), MRB_ARGS_ANY()), /* 15.2.12.5.30 */ + MRB_MT_ENTRY(mrb_ary_to_a, MRB_SYM(to_a), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_ary_to_a, MRB_SYM(entries), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_ary_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_ary_to_s, MRB_SYM(inspect), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_ary_sort_bang, MRB_SYM_B(sort), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_ary_svalue, MRB_SYM(__svalue), MRB_ARGS_NONE()), +}; + void mrb_init_array(mrb_state *mrb) { struct RClass *a; - mrb->array_class = a = mrb_define_class(mrb, "Array", mrb->object_class); /* 15.2.12 */ + mrb->array_class = a = mrb_define_class_id(mrb, MRB_SYM(Array), mrb->object_class); /* 15.2.12 */ MRB_SET_INSTANCE_TT(a, MRB_TT_ARRAY); - mrb_define_class_method(mrb, a, "[]", mrb_ary_s_create, MRB_ARGS_ANY()); /* 15.2.12.4.1 */ - - mrb_define_method(mrb, a, "+", mrb_ary_plus, MRB_ARGS_REQ(1)); /* 15.2.12.5.1 */ - mrb_define_method(mrb, a, "*", mrb_ary_times, MRB_ARGS_REQ(1)); /* 15.2.12.5.2 */ - mrb_define_method(mrb, a, "<<", mrb_ary_push_m, MRB_ARGS_REQ(1)); /* 15.2.12.5.3 */ - mrb_define_method(mrb, a, "[]", mrb_ary_aget, MRB_ARGS_ARG(1,1)); /* 15.2.12.5.4 */ - mrb_define_method(mrb, a, "[]=", mrb_ary_aset, MRB_ARGS_ARG(2,1)); /* 15.2.12.5.5 */ - mrb_define_method(mrb, a, "clear", mrb_ary_clear_m, MRB_ARGS_NONE()); /* 15.2.12.5.6 */ - mrb_define_method(mrb, a, "concat", mrb_ary_concat_m, MRB_ARGS_REQ(1)); /* 15.2.12.5.8 */ - mrb_define_method(mrb, a, "delete_at", mrb_ary_delete_at, MRB_ARGS_REQ(1)); /* 15.2.12.5.9 */ - mrb_define_method(mrb, a, "empty?", mrb_ary_empty_p, MRB_ARGS_NONE()); /* 15.2.12.5.12 */ - mrb_define_method(mrb, a, "first", mrb_ary_first, MRB_ARGS_OPT(1)); /* 15.2.12.5.13 */ - mrb_define_method(mrb, a, "index", mrb_ary_index_m, MRB_ARGS_REQ(1)); /* 15.2.12.5.14 */ - mrb_define_method(mrb, a, "initialize_copy", mrb_ary_replace_m, MRB_ARGS_REQ(1)); /* 15.2.12.5.16 */ - mrb_define_method(mrb, a, "join", mrb_ary_join_m, MRB_ARGS_OPT(1)); /* 15.2.12.5.17 */ - mrb_define_method(mrb, a, "last", mrb_ary_last, MRB_ARGS_OPT(1)); /* 15.2.12.5.18 */ - mrb_define_method(mrb, a, "length", mrb_ary_size, MRB_ARGS_NONE()); /* 15.2.12.5.19 */ - mrb_define_method(mrb, a, "pop", mrb_ary_pop, MRB_ARGS_NONE()); /* 15.2.12.5.21 */ - mrb_define_method(mrb, a, "push", mrb_ary_push_m, MRB_ARGS_ANY()); /* 15.2.12.5.22 */ - mrb_define_method(mrb, a, "replace", mrb_ary_replace_m, MRB_ARGS_REQ(1)); /* 15.2.12.5.23 */ - mrb_define_method(mrb, a, "reverse", mrb_ary_reverse, MRB_ARGS_NONE()); /* 15.2.12.5.24 */ - mrb_define_method(mrb, a, "reverse!", mrb_ary_reverse_bang, MRB_ARGS_NONE()); /* 15.2.12.5.25 */ - mrb_define_method(mrb, a, "rindex", mrb_ary_rindex_m, MRB_ARGS_REQ(1)); /* 15.2.12.5.26 */ - mrb_define_method(mrb, a, "shift", mrb_ary_shift_m, MRB_ARGS_OPT(1)); /* 15.2.12.5.27 */ - mrb_define_method(mrb, a, "size", mrb_ary_size, MRB_ARGS_NONE()); /* 15.2.12.5.28 */ - mrb_define_method(mrb, a, "slice", mrb_ary_aget, MRB_ARGS_ARG(1,1)); /* 15.2.12.5.29 */ - mrb_define_method(mrb, a, "unshift", mrb_ary_unshift_m, MRB_ARGS_ANY()); /* 15.2.12.5.30 */ - - mrb_define_method(mrb, a, "__ary_eq", mrb_ary_eq, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, a, "__ary_cmp", mrb_ary_cmp, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, a, "__ary_index", mrb_ary_index_m, MRB_ARGS_REQ(1)); /* kept for mruby-array-ext */ - mrb_define_method(mrb, a, "__svalue", mrb_ary_svalue, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, a, MRB_OPSYM(aref), mrb_ary_s_create, MRB_ARGS_ANY()); /* 15.2.12.4.1 */ + + MRB_MT_INIT_ROM(mrb, a, array_rom_entries); } diff --git a/src/backtrace.c b/src/backtrace.c index 30b4969273..ce4d9b1400 100644 --- a/src/backtrace.c +++ b/src/backtrace.c @@ -15,186 +15,111 @@ #include #include #include -#include -struct backtrace_location { - int32_t lineno; - mrb_sym method_id; - const char *filename; -}; - -typedef void (*each_backtrace_func)(mrb_state*, const struct backtrace_location*, void*); - -static const mrb_data_type bt_type = { "Backtrace", mrb_free }; - -struct RObject *mrb_unpack_backtrace(mrb_state *mrb, struct RObject *backtrace); +#define MAX_IREP_REFCNT UINT16_MAX +#define UNKNOWN_LINENO -1 +#define UNKNOWN_LOCATION "(unknown):0" static void -each_backtrace(mrb_state *mrb, ptrdiff_t ciidx, each_backtrace_func func, void *data) +copy_backtrace(mrb_state *mrb, + const struct mrb_backtrace_location *loc, + struct mrb_backtrace_location *ptr, + size_t n) { - if (ciidx >= mrb->c->ciend - mrb->c->cibase) - ciidx = 10; /* ciidx is broken... */ + ptr[n] = *loc; + if (loc->irep) { + if (loc->irep->refcnt == MAX_IREP_REFCNT) { + ptr[n].irep = NULL; + } + else { + mrb_irep_incref(mrb, (mrb_irep*)loc->irep); + } + } +} + +/** + * Creates a packed backtrace from the current call stack + * + * @param mrb The mruby state + * @param ciidx The current callinfo index + * @param ptr Pointer to store the backtrace locations + * @return Number of backtrace entries + * @note This function handles both proc and non-proc cases, managing IREP references + * and building location entries for each stack frame. + */ +static size_t +pack_backtrace(mrb_state *mrb, ptrdiff_t ciidx, struct mrb_backtrace_location *ptr) +{ + size_t n = 0; for (ptrdiff_t i=ciidx; i >= 0; i--) { - struct backtrace_location loc; + struct mrb_backtrace_location loc; mrb_callinfo *ci; - const mrb_irep *irep = 0; const mrb_code *pc; - uint32_t idx; ci = &mrb->c->cibase[i]; + loc.method_id = ci->mid; - if (!ci->proc || MRB_PROC_CFUNC_P(ci->proc)) { - if (!ci->mid) continue; - loc.lineno = -1; - idx = 0; + if (ci->proc && !MRB_PROC_CFUNC_P(ci->proc)) { + mrb_assert(!MRB_PROC_ALIAS_P(ci->proc)); + loc.irep = ci->proc->body.irep; + if (!loc.irep) continue; + if (!loc.irep->debug_info) continue; + if (!ci->pc) continue; + pc = &ci->pc[-1]; + loc.idx = (uint32_t)(pc - loc.irep->iseq); } else { - irep = ci->proc->body.irep; - if (!irep) continue; - if (mrb->c->cibase[i].pc) { - pc = &mrb->c->cibase[i].pc[-1]; - } - else { - continue; - } - idx = (uint32_t)(pc - irep->iseq); - loc.lineno = mrb_debug_get_line(mrb, irep, idx); - } - loc.method_id = ci->mid; - if (loc.lineno == -1) { + if (!loc.method_id) continue; + loc.irep = NULL; for (ptrdiff_t j=i-1; j >= 0; j--) { ci = &mrb->c->cibase[j]; if (!ci->proc) continue; if (MRB_PROC_CFUNC_P(ci->proc)) continue; + mrb_assert(!MRB_PROC_ALIAS_P(ci->proc)); - irep = ci->proc->body.irep; + const mrb_irep *irep = ci->proc->body.irep; if (!irep) continue; - - if (mrb->c->cibase[j].pc) { - pc = &mrb->c->cibase[j].pc[-1]; - } - else { - continue; - } - - idx = (uint32_t)(pc - irep->iseq); - loc.lineno = mrb_debug_get_line(mrb, irep, idx); - if (loc.lineno > 0) break; + if (!irep->debug_info) continue; + if (!ci->pc) continue; + pc = &ci->pc[-1]; + loc.irep = irep; + loc.idx = (uint32_t)(pc - irep->iseq); + break; } } - - loc.filename = mrb_debug_get_filename(mrb, irep, idx); - if (!loc.filename) { - loc.filename = "(unknown)"; - } - - func(mrb, &loc, data); + copy_backtrace(mrb, &loc, ptr, n); + n++; } + return n; } -#ifndef MRB_NO_STDIO - -static void -print_backtrace(mrb_state *mrb, struct RObject *exc, struct RArray *backtrace) -{ - mrb_int i; - mrb_int n = ARY_LEN(backtrace); - mrb_value *loc, mesg; - - if (n != 0) { - if (n > 1) { - fprintf(stderr, "trace (most recent call last):\n"); - } - for (i=n-1,loc=&ARY_PTR(backtrace)[i]; i>0; i--,loc--) { - if (mrb_string_p(*loc)) { - fprintf(stderr, "\t[%d] %.*s\n", - (int)i, (int)RSTRING_LEN(*loc), RSTRING_PTR(*loc)); - } - } - if (mrb_string_p(*loc)) { - fprintf(stderr, "%.*s: ", (int)RSTRING_LEN(*loc), RSTRING_PTR(*loc)); - } - } - mesg = mrb_exc_inspect(mrb, mrb_obj_value(exc)); - fwrite(RSTRING_PTR(mesg), RSTRING_LEN(mesg), 1, stderr); - fputc('\n', stderr); -} - -/* mrb_print_backtrace - - function to retrieve backtrace information from the last exception. -*/ - -MRB_API void -mrb_print_backtrace(mrb_state *mrb) -{ - if (!mrb->exc) { - return; - } - - struct RObject *backtrace = ((struct RException*)mrb->exc)->backtrace; - if (!backtrace) return; - if (backtrace->tt != MRB_TT_ARRAY) backtrace = mrb_unpack_backtrace(mrb, backtrace); - print_backtrace(mrb, mrb->exc, (struct RArray*)backtrace); -} -#else - -MRB_API void -mrb_print_backtrace(mrb_state *mrb) +static struct RBasic* +packed_backtrace(mrb_state *mrb) { -} - -#endif + ptrdiff_t ciidx = mrb->c->ci - mrb->c->cibase; -static void -count_backtrace_i(mrb_state *mrb, - const struct backtrace_location *loc, - void *data) -{ - int *lenp = (int*)data; + if (ciidx >= mrb->c->ciend - mrb->c->cibase) + ciidx = mrb->c->ciend - mrb->c->cibase; /* ciidx is broken... */ - (*lenp)++; -} + ptrdiff_t len = ciidx + 1; -static void -pack_backtrace_i(mrb_state *mrb, - const struct backtrace_location *loc, - void *data) -{ - struct backtrace_location **pptr = (struct backtrace_location**)data; - struct backtrace_location *ptr = *pptr; + struct RBacktrace *backtrace = MRB_OBJ_ALLOC(mrb, MRB_TT_BACKTRACE, NULL); - *ptr = *loc; - *pptr = ptr+1; -} + void *ptr = mrb_malloc(mrb, len * sizeof(struct mrb_backtrace_location)); + backtrace->locations = (struct mrb_backtrace_location*)ptr; + backtrace->len = pack_backtrace(mrb, ciidx, backtrace->locations); -static struct RObject* -packed_backtrace(mrb_state *mrb) -{ - struct RData *backtrace; - ptrdiff_t ciidx = mrb->c->ci - mrb->c->cibase; - int len = 0; - int size; - void *ptr; - - each_backtrace(mrb, ciidx, count_backtrace_i, &len); - size = len * sizeof(struct backtrace_location); - backtrace = mrb_data_object_alloc(mrb, NULL, NULL, &bt_type); - ptr = mrb_malloc(mrb, size); - backtrace->data = ptr; - backtrace->flags = (uint32_t)len; - each_backtrace(mrb, ciidx, pack_backtrace_i, &ptr); - return (struct RObject*)backtrace; + return (struct RBasic*)backtrace; } static void -store_backtrace(mrb_state *mrb, mrb_value exc, struct RObject *backtrace) +store_backtrace(mrb_state *mrb, mrb_value exc, struct RBasic *backtrace) { struct RException *e = mrb_exc_ptr(exc); e->backtrace = backtrace; - mrb_field_write_barrier(mrb, (struct RBasic*)e, (struct RBasic*)backtrace); + mrb_field_write_barrier(mrb, (struct RBasic*)e, backtrace); } void @@ -202,45 +127,66 @@ mrb_keep_backtrace(mrb_state *mrb, mrb_value exc) { int ai; + if (mrb->c->ci == NULL) return; if (mrb_exc_ptr(exc)->backtrace) return; ai = mrb_gc_arena_save(mrb); - struct RObject *backtrace = packed_backtrace(mrb); + struct RBasic *backtrace = packed_backtrace(mrb); store_backtrace(mrb, exc, backtrace); mrb_gc_arena_restore(mrb, ai); } -struct RObject* -mrb_unpack_backtrace(mrb_state *mrb, struct RObject *backtrace) +static mrb_value +decode_location(mrb_state *mrb, const struct mrb_backtrace_location *entry) { - const struct backtrace_location *bt; - mrb_int n, i; - int ai; + mrb_value btline; + int32_t lineno; + const char *filename; + + // Case 1: No IREP or debug info available + if (!entry->irep) { + return mrb_str_new_lit(mrb, UNKNOWN_LOCATION); + } + + // Case 2: Debug info lookup failed + if (!mrb_debug_get_position(mrb, entry->irep, entry->idx, &lineno, &filename)) { + return mrb_str_new_lit(mrb, UNKNOWN_LOCATION); + } + // Case 3: Valid debug info + if (lineno != UNKNOWN_LINENO) { + btline = mrb_format(mrb, "%s:%d", filename, (int)lineno); + } + else { //all that was left was the stack frame + btline = mrb_format(mrb, "%s:0", filename); + } + + // Add method name if available + if (entry->method_id != 0) { + mrb_str_cat_lit(mrb, btline, ":in "); + mrb_str_cat_cstr(mrb, btline, mrb_sym_name(mrb, entry->method_id)); + } + + return btline; +} + +static struct RBasic* +mrb_unpack_backtrace(mrb_state *mrb, struct RBasic *backtrace) +{ if (backtrace == NULL) { - empty_backtrace: - return mrb_obj_ptr(mrb_ary_new_capa(mrb, 0)); + return mrb_basic_ptr(mrb_ary_new_capa(mrb, 0)); } if (backtrace->tt == MRB_TT_ARRAY) return backtrace; - bt = (struct backtrace_location*)mrb_data_check_get_ptr(mrb, mrb_obj_value(backtrace), &bt_type); - if (bt == NULL) goto empty_backtrace; - n = (mrb_int)backtrace->flags; - if (n == 0) goto empty_backtrace; - backtrace = mrb_obj_ptr(mrb_ary_new_capa(mrb, n)); - ai = mrb_gc_arena_save(mrb); - for (i = 0; i < n; i++) { - const struct backtrace_location *entry = &bt[i]; - mrb_value btline; - if (entry->lineno != -1) {//debug info was available - btline = mrb_format(mrb, "%s:%d", entry->filename, (int)entry->lineno); - } - else { //all that was left was the stack frame - btline = mrb_format(mrb, "%s:0", entry->filename); - } - if (entry->method_id != 0) { - mrb_str_cat_lit(mrb, btline, ":in "); - mrb_str_cat_cstr(mrb, btline, mrb_sym_name(mrb, entry->method_id)); - } + mrb_assert(backtrace->tt == MRB_TT_BACKTRACE); + + struct RBacktrace *bt = (struct RBacktrace*)backtrace; + mrb_int n = (mrb_int)bt->len; + const struct mrb_backtrace_location *loc = bt->locations; + + backtrace = mrb_basic_ptr(mrb_ary_new_capa(mrb, n)); + int ai = mrb_gc_arena_save(mrb); + for (mrb_int i = 0; i < n; i++) { + mrb_value btline = decode_location(mrb, &loc[i]); mrb_ary_push(mrb, mrb_obj_value(backtrace), btline); mrb_gc_arena_restore(mrb, ai); } @@ -251,7 +197,7 @@ mrb_unpack_backtrace(mrb_state *mrb, struct RObject *backtrace) mrb_value mrb_exc_backtrace(mrb_state *mrb, mrb_value exc) { - struct RObject *backtrace = mrb_exc_ptr(exc)->backtrace; + struct RBasic *backtrace = mrb_exc_ptr(exc)->backtrace; if (backtrace == NULL) { return mrb_nil_value(); } @@ -269,3 +215,79 @@ mrb_get_backtrace(mrb_state *mrb) { return mrb_obj_value(mrb_unpack_backtrace(mrb, packed_backtrace(mrb))); } + +#ifndef MRB_NO_STDIO + +static void +print_backtrace(mrb_state *mrb, struct RObject *exc, struct RBasic *ptr) +{ + struct RArray *ary = NULL; + struct RBacktrace *bt = NULL; + mrb_int n = 0; + + if (ptr) { + if (ptr->tt == MRB_TT_ARRAY) { + ary = (struct RArray*)ptr; + n = ARY_LEN(ary); + } + else { + bt = (struct RBacktrace*)ptr; + n = (mrb_int)bt->len; + } + } + + if (n != 0) { + mrb_value btline; + + fputs("trace (most recent call last):\n", stderr); + for (mrb_int i=n-1; i>0; i--) { + if (ary) btline = ARY_PTR(ary)[i]; + else btline = decode_location(mrb, &bt->locations[i]); + if (mrb_string_p(btline)) { + fprintf(stderr, "\t[%d] ", (int)i); + fwrite(RSTRING_PTR(btline), (int)RSTRING_LEN(btline), 1, stderr); + fputc('\n', stderr); + } + } + if (ary) btline = ARY_PTR(ary)[0]; + else btline = decode_location(mrb, &bt->locations[0]); + if (mrb_string_p(btline)) { + fwrite(RSTRING_PTR(btline), (int)RSTRING_LEN(btline), 1, stderr); + fputs(": ", stderr); + } + } + else { + fputs(UNKNOWN_LOCATION ": ", stderr); + } + + if (exc == mrb->nomem_err) { + static const char nomem[] = "Out of memory (NoMemoryError)\n"; + fwrite(nomem, sizeof(nomem)-1, 1, stderr); + } + else { + mrb_value output = mrb_exc_get_output(mrb, exc); + fwrite(RSTRING_PTR(output), RSTRING_LEN(output), 1, stderr); + fputc('\n', stderr); + } +} + +/* mrb_print_backtrace + + function to retrieve backtrace information from the last exception. +*/ + +MRB_API void +mrb_print_backtrace(mrb_state *mrb) +{ + if (!mrb->exc || mrb->exc->tt != MRB_TT_EXCEPTION) { + return; + } + + print_backtrace(mrb, mrb->exc, ((struct RException*)mrb->exc)->backtrace); +} +#else +MRB_API void +mrb_print_backtrace(mrb_state *mrb) +{ +} +#endif diff --git a/src/cdump.c b/src/cdump.c index 79216eab07..35809a6807 100644 --- a/src/cdump.c +++ b/src/cdump.c @@ -17,11 +17,15 @@ #ifndef MRB_NO_FLOAT #include +/* + * MRB_FLOAT_FMT - String format for dumping float values. + * Used when MRB_NO_FLOAT is not defined. + */ #define MRB_FLOAT_FMT "%.17g" #endif static int -cdump_pool(mrb_state *mrb, const mrb_pool_value *p, FILE *fp) +cdump_pool(mrb_state *mrb, const mrb_irep_pool *p, FILE *fp) { if (p->tt & IREP_TT_NFLAG) { /* number */ switch (p->tt) { @@ -40,12 +44,7 @@ cdump_pool(mrb_state *mrb, const mrb_pool_value *p, FILE *fp) break; case IREP_TT_FLOAT: #ifndef MRB_NO_FLOAT - if (p->u.f == 0) { - fprintf(fp, "{IREP_TT_FLOAT, {.f=%#.1f}},\n", p->u.f); - } - else { - fprintf(fp, "{IREP_TT_FLOAT, {.f=" MRB_FLOAT_FMT "}},\n", p->u.f); - } + fprintf(fp, "{IREP_TT_FLOAT, {.f=" MRB_FLOAT_FMT "}},\n", p->u.f); #endif break; case IREP_TT_BIGINT: @@ -64,7 +63,7 @@ cdump_pool(mrb_state *mrb, const mrb_pool_value *p, FILE *fp) else { /* string */ int i, len = p->tt>>2; const char *s = p->u.str; - fprintf(fp, "{IREP_TT_STR|(%d<<2), {\"", len); + fprintf(fp, "{IREP_TT_SSTR|(%d<<2), {\"", len); for (i=0; i= 2 && name[len-1] == '=' && sym_name_word_p(name, len-1); -} - -static mrb_bool -sym_name_with_question_mark_p(const char *name, mrb_int len) +sym_name_with_suffix_p(const char *name, mrb_int len, char suffix) { - return len >= 2 && name[len-1] == '?' && sym_name_word_p(name, len-1); -} - -static mrb_bool -sym_name_with_bang_p(const char *name, mrb_int len) -{ - return len >= 2 && name[len-1] == '!' && sym_name_word_p(name, len-1); + return len >= 2 && name[len-1] == suffix && sym_name_word_p(name, len-1); } static mrb_bool @@ -159,20 +146,17 @@ sym_operator_name(const char *sym_name, mrb_int len) mrb_sym table_size = sizeof(operator_table)/sizeof(struct operator_symbol); if (operator_table[table_size-1].sym_name_len < len) return NULL; - mrb_sym start, idx; - int cmp; - const struct operator_symbol *op_sym; - for (start = 0; table_size != 0; table_size/=2) { - idx = start+table_size/2; - op_sym = &operator_table[idx]; - cmp = (int)len-(int)op_sym->sym_name_len; + for (mrb_sym start = 0; table_size != 0; table_size/=2) { + mrb_sym idx = start+table_size/2; + const struct operator_symbol *op_sym = &operator_table[idx]; + int cmp = (int)len-(int)op_sym->sym_name_len; if (cmp == 0) { cmp = memcmp(sym_name, op_sym->sym_name, len); if (cmp == 0) return op_sym->name; } if (0 < cmp) { start = ++idx; - --table_size; + table_size--; } } return NULL; @@ -200,18 +184,19 @@ cdump_sym(mrb_state *mrb, mrb_sym sym, const char *var_name, int idx, mrb_value } mrb_int len; + const char *name = mrb_sym_name_len(mrb, sym, &len), *op_name; if (!name) return MRB_DUMP_INVALID_ARGUMENT; if (sym_name_word_p(name, len)) { fprintf(fp, "MRB_SYM(%s)", name); } - else if (sym_name_with_equal_p(name, len)) { + else if (sym_name_with_suffix_p(name, len, '=')) { fprintf(fp, "MRB_SYM_E(%.*s)", (int)(len-1), name); } - else if (sym_name_with_question_mark_p(name, len)) { + else if (sym_name_with_suffix_p(name, len, '?')) { fprintf(fp, "MRB_SYM_Q(%.*s)", (int)(len-1), name); } - else if (sym_name_with_bang_p(name, len)) { + else if (sym_name_with_suffix_p(name, len, '!')) { fprintf(fp, "MRB_SYM_B(%.*s)", (int)(len-1), name); } else if (sym_name_ivar_p(name, len)) { @@ -245,6 +230,7 @@ cdump_syms(mrb_state *mrb, const char *name, const char *key, int n, int syms_le int ai = mrb_gc_arena_save(mrb); mrb_int code_len = RSTRING_LEN(init_syms_code); const char *var_name = sym_var_name(mrb, name, key, n); + fprintf(fp, "mrb_DEFINE_SYMS_VAR(%s, %d, (", var_name, syms_len); for (int i=0; ifiles[0]->line_entry_count; + int len = info->files[0]->line_entry_count; - filename = mrb_sym_name_len(mrb, info->files[0]->filename_sym, &file_len); - snprintf(buffer, sizeof(buffer), " %s_debug_file_%d.filename_sym = mrb_intern_lit(mrb,\"", - name, n); + const char *filename = mrb_sym_name_len(mrb, info->files[0]->filename_sym, NULL); + snprintf(buffer, sizeof(buffer), " %s_debug_file_%d.filename_sym = mrb_intern_lit(mrb,", name, n); mrb_str_cat_cstr(mrb, init_syms_code, buffer); - mrb_str_cat_cstr(mrb, init_syms_code, filename); - mrb_str_cat_cstr(mrb, init_syms_code, "\");\n"); + mrb_str_cat_str(mrb, init_syms_code, mrb_str_dump(mrb, mrb_str_new_cstr(mrb, filename))); + mrb_str_cat_cstr(mrb, init_syms_code, ");\n"); switch (info->files[0]->line_type) { case mrb_debug_line_ary: fprintf(fp, "static uint16_t %s_debug_lines_%d[%d] = {", name, n, len); - for (i=0; ifiles[0]->lines.ary[i]); } @@ -305,7 +288,7 @@ cdump_debug(mrb_state *mrb, const char *name, int n, mrb_irep_debug_info *info, case mrb_debug_line_flat_map: line_type = "mrb_debug_line_flat_map"; fprintf(fp, "static struct mrb_irep_debug_info_line %s_debug_lines_%d[%d] = {", name, n, len); - for (i=0; ifiles[0]->lines.flat_map[i]; fprintf(fp, "\t{.start_pos=0x%04x,.line=%d},\n", fmap->start_pos, fmap->line); } @@ -316,7 +299,7 @@ cdump_debug(mrb_state *mrb, const char *name, int n, mrb_irep_debug_info *info, line_type = "mrb_debug_line_packed_map"; fprintf(fp, "static const char %s_debug_lines_%d[] = \"", name, n); const uint8_t *pmap = info->files[0]->lines.packed_map; - for (i=0; ifiles[0]->filename_sym, info->files[0]->line_entry_count, line_type, - name,n); + name, n); fprintf(fp, "static mrb_irep_debug_info_file *%s_debug_file_%d_ = &%s_debug_file_%d;\n", name, n, name, n); fprintf(fp, "static mrb_irep_debug_info %s_debug_%d = {\n", name, n); fprintf(fp, "%d, %d, &%s_debug_file_%d_};\n", info->pc_count, info->flen, name, n); + mrb_gc_arena_restore(mrb, ai); return MRB_DUMP_OK; } @@ -360,7 +344,7 @@ cdump_irep_struct(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, FILE *fp, /* dump pool */ if (irep->pool) { len=irep->plen; - fprintf(fp, "static const mrb_pool_value %s_pool_%d[%d] = {\n", name, n, len); + fprintf(fp, "static const mrb_irep_pool %s_pool_%d[%d] = {\n", name, n, len); for (i=0; ipool[i], fp) != MRB_DUMP_OK) return MRB_DUMP_INVALID_ARGUMENT; @@ -385,8 +369,7 @@ cdump_irep_struct(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, FILE *fp, } /* dump debug */ if (flags & MRB_DUMP_DEBUG_INFO) { - if(cdump_debug(mrb, name, n, irep->debug_info, - init_syms_code, fp) == MRB_DUMP_OK) { + if (cdump_debug(mrb, name, n, irep->debug_info, init_syms_code, fp) == MRB_DUMP_OK) { debug_available = 1; } } @@ -420,7 +403,7 @@ cdump_irep_struct(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, FILE *fp, else { fputs( " NULL,\t\t\t\t\t/* lv */\n", fp); } - if(debug_available) { + if (debug_available) { fprintf(fp, " &%s_debug_%d,\n", name, n); } else { @@ -431,6 +414,26 @@ cdump_irep_struct(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, FILE *fp, return MRB_DUMP_OK; } +/** + * Dumps an mruby irep into a C structure representation. + * + * This function takes an mruby internal representation (irep) and generates + * a C source code representation of it, writing it to the provided file pointer. + * This is useful for embedding mruby bytecode directly into C code. + * This function is only available when MRB_NO_STDIO is not defined. + * + * @param mrb The current mruby state. + * @param irep Pointer to the mruby irep to dump. + * @param flags Flags to control the dumping process. Possible values include + * MRB_DUMP_DEBUG_INFO (to include debug information) and + * MRB_DUMP_STATIC (to declare generated structures as static). + * @param fp File pointer where the C structure will be written. + * @param initname Base name used for generated C variables and functions. + * @return MRB_DUMP_OK on success. + * @return MRB_DUMP_INVALID_ARGUMENT if `fp` or `initname` is NULL or `initname` is empty. + * @return MRB_DUMP_WRITE_FAULT if a file writing error occurs. + * @return Other non-zero values for other errors during the dump. + */ int mrb_dump_irep_cstruct(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, FILE *fp, const char *initname) { @@ -441,11 +444,21 @@ mrb_dump_irep_cstruct(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, FILE "#include \n" "#include \n" "#include \n" - "#include \n" "\n") < 0) { return MRB_DUMP_WRITE_FAULT; } + /* + * Macro to wrap variadic arguments in curly braces. + * Useful for array or struct initializations within other macros. + */ fputs("#define mrb_BRACED(...) {__VA_ARGS__}\n", fp); + /* + * Macro to define a symbol array variable. + * name: Variable name for the symbol array. + * len: Number of symbols in the array. + * syms: Initializer list for the symbols, e.g., (MRB_SYM(foo), MRB_SYM(bar)). + * qualifier: Storage class qualifier like 'static' or 'const'. + */ fputs("#define mrb_DEFINE_SYMS_VAR(name, len, syms, qualifier) \\\n", fp); fputs(" static qualifier mrb_sym name[len] = mrb_BRACED syms\n", fp); fputs("\n", fp); @@ -461,7 +474,7 @@ mrb_dump_irep_cstruct(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, FILE "extern\n" "#endif", initname); - fprintf(fp, "NULL,NULL,MRB_TT_PROC,MRB_GC_RED,0,{&%s_irep_0},NULL,{NULL},\n}};\n", initname); + fprintf(fp, "NULL,MRB_TT_PROC,MRB_GC_RED,MRB_OBJ_IS_FROZEN,0,{&%s_irep_0},NULL,{NULL},\n}};\n", initname); fputs("static void\n", fp); fprintf(fp, "%s_init_syms(mrb_state *mrb)\n", initname); fputs("{\n", fp); diff --git a/src/class.c b/src/class.c index 14be8ca9d9..74d021ce74 100644 --- a/src/class.c +++ b/src/class.c @@ -17,299 +17,291 @@ #include #include #include -#include -union mt_ptr { - struct RProc *proc; - mrb_func_t func; -}; +/* mrb_mt_tbl, union mrb_mt_ptr, mrb_mt_entry defined in internal.h */ +#define MT_PROTECTED MRB_METHOD_PROTECTED_FL +#define MT_VDEFAULT MRB_METHOD_VDEFAULT_FL +#define MT_VMASK MRB_METHOD_VISIBILITY_MASK -#define MT_KEY_P(k) (((k)>>2) != 0) -#define MT_FUNC_P 1 -#define MT_NOARG_P 2 -#define MT_EMPTY 0 -#define MT_DELETED 1 - -#define MT_KEY(sym, flags) ((sym)<<2|(flags)) -#define MT_FLAGS(func_p, noarg_p) ((func_p)?MT_FUNC_P:0)|((noarg_p)?MT_NOARG_P:0) -#define MT_KEY_SYM(k) ((k)>>2) -#define MT_KEY_FLG(k) ((k)&3) - -/* method table structure */ -typedef struct mt_tbl { - int size; - int alloc; - union mt_ptr *ptr; -} mt_tbl; - -#ifdef MRB_USE_INLINE_METHOD_CACHE -#define MT_CACHE_SIZE 256 -static uint8_t mt_cache[MT_CACHE_SIZE]; -#endif +#define MRB_MT_FLAG_BITS (MRB_MT_READONLY_BIT | MRB_MT_FROZEN_BIT) +#define MT_ALLOC(t) ((t)->alloc & ~MRB_MT_FLAG_BITS) +#define mt_readonly_p(t) ((t)->alloc & MRB_MT_READONLY_BIT) +#define mt_frozen_p(t) ((t)->alloc & MRB_MT_FROZEN_BIT) -/* Creates the method table. */ -static mt_tbl* +/* Allocates or grows the method table to exactly new_alloc entries */ +static void +mt_grow(mrb_state *mrb, mrb_mt_tbl *t, int new_alloc) +{ + t->ptr = (mrb_mt_entry*)mrb_realloc(mrb, t->ptr, + new_alloc * sizeof(mrb_mt_entry)); + t->alloc = (t->alloc & MRB_MT_FLAG_BITS) | new_alloc; +} + +/* Creates a new empty method table */ +static mrb_mt_tbl* mt_new(mrb_state *mrb) { - mt_tbl *t; + mrb_mt_tbl *t; - t = (mt_tbl*)mrb_malloc(mrb, sizeof(mt_tbl)); + t = (mrb_mt_tbl*)mrb_malloc(mrb, sizeof(mrb_mt_tbl)); t->size = 0; t->alloc = 0; t->ptr = NULL; + t->next = NULL; return t; } -static void mt_put(mrb_state *mrb, mt_tbl *t, mrb_sym sym, mrb_sym flags, union mt_ptr ptr); - +/* Inserts or updates an entry in the method table (linear scan) */ static void -mt_rehash(mrb_state *mrb, mt_tbl *t) +mt_put(mrb_state *mrb, mrb_mt_tbl *t, mrb_sym sym, uint32_t flags, union mrb_mt_ptr ptrval) { - int old_alloc = t->alloc; - int new_alloc = old_alloc+8; - union mt_ptr *old_ptr = t->ptr; + mrb_mt_entry *entries = t->ptr; - khash_power2(new_alloc); - if (old_alloc == new_alloc) return; - - t->ptr = (union mt_ptr*)mrb_calloc(mrb, sizeof(union mt_ptr)+sizeof(mrb_sym), new_alloc); - t->alloc = new_alloc; - t->size = 0; - if (old_alloc == 0) return; - - mrb_sym *keys = (mrb_sym*)&old_ptr[old_alloc]; - union mt_ptr *vals = old_ptr; - for (int i = 0; i < old_alloc; i++) { - mrb_sym key = keys[i]; - if (MT_KEY_P(key)) { - mt_put(mrb, t, MT_KEY_SYM(key), MT_KEY_FLG(key), vals[i]); + /* Linear scan for existing key */ + for (int i = 0; i < t->size; i++) { + if (entries[i].key == sym) { + entries[i].flags = flags; + entries[i].val = ptrval; + return; } } - mrb_free(mrb, old_ptr); -} - -#define slot_empty_p(slot) ((slot)->key == 0 && (slot)->func_p == 0) - -/* Set the value for the symbol in the method table. */ -static void -mt_put(mrb_state *mrb, mt_tbl *t, mrb_sym sym, mrb_sym flags, union mt_ptr ptr) -{ - int hash, pos, start, dpos = -1; - if (t->alloc == 0) { - mt_rehash(mrb, t); + /* Not found — append to end */ + if (MT_ALLOC(t) == 0) { + mt_grow(mrb, t, 8); } + else if (t->size == MT_ALLOC(t)) { + mt_grow(mrb, t, MT_ALLOC(t) * 2); + } + entries = t->ptr; + entries[t->size].key = sym; + entries[t->size].flags = flags; + entries[t->size].val = ptrval; + t->size++; +} - mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; - union mt_ptr *vals = t->ptr; - hash = kh_int_hash_func(mrb, sym); - start = pos = hash & (t->alloc-1); - for (;;) { - mrb_sym key = keys[pos]; - if (MT_KEY_SYM(key) == sym) { - value_set: - keys[pos] = MT_KEY(sym, flags); - vals[pos] = ptr; - return; - } - else if (key == MT_EMPTY) { - t->size++; - goto value_set; - } - else if (key == MT_DELETED && dpos < 0) { - dpos = pos; - } - pos = (pos+1) & (t->alloc-1); - if (pos == start) { /* not found */ - if (dpos > 0) { - t->size++; - pos = dpos; - goto value_set; +/* Retrieves a value from the method table (walks chain, linear scan). + Returns TRUE if found, FALSE if not found. + On success, *pp and *fp are set. */ +static mrb_bool +mt_get(mrb_state *mrb, mrb_mt_tbl *t, mrb_sym sym, union mrb_mt_ptr *pp, uint32_t *fp) +{ + while (t) { + mrb_mt_entry *entries = t->ptr; + for (int i = 0; i < t->size; i++) { + if (entries[i].key == sym) { + if (MRB_MT_REMOVED_P(entries[i])) return FALSE; + *pp = entries[i].val; + *fp = entries[i].flags; + return TRUE; } - /* no room */ - mt_rehash(mrb, t); - start = pos = hash & (t->alloc-1); - keys = (mrb_sym*)&t->ptr[t->alloc]; - vals = t->ptr; } + t = t->next; } + return FALSE; } -/* Get a value for a symbol from the method table. */ -static mrb_sym -mt_get(mrb_state *mrb, mt_tbl *t, mrb_sym sym, union mt_ptr *pp) +/* Deletes an entry from the method table (swap with last) */ +static mrb_bool +mt_del(mrb_state *mrb, mrb_mt_tbl *t, mrb_sym sym) { - int hash, pos, start; - - if (t == NULL) return 0; - if (t->alloc == 0) return 0; - if (t->size == 0) return 0; + if (!t || t->size == 0) return FALSE; - mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; - union mt_ptr *vals = t->ptr; - hash = kh_int_hash_func(mrb, sym); -#ifdef MRB_USE_INLINE_METHOD_CACHE - int cpos = (hash^(uintptr_t)t) % MT_CACHE_SIZE; - mrb_sym key; - pos = mt_cache[cpos]; - if (cpos < t->alloc && key = keys[pos] && MT_KEY_SYM(key) == sym) { - *pp = vals[pos]; - return key; - } -#endif - start = pos = hash & (t->alloc-1); - for (;;) { - mrb_sym key = keys[pos]; - if (MT_KEY_SYM(key) == sym) { - *pp = vals[pos]; -#ifdef MRB_USE_INLINE_METHOD_CACHE - if (pos < 0x100) { - mt_cache[cpos] = pos; + mrb_mt_entry *entries = t->ptr; + for (int i = 0; i < t->size; i++) { + if (entries[i].key == sym) { + t->size--; + if (i < t->size) { + entries[i] = entries[t->size]; } -#endif - return key; - } - else if (key == MT_EMPTY) { - return 0; - } - pos = (pos+1) & (t->alloc-1); - if (pos == start) { /* not found */ - return 0; + return TRUE; } } + return FALSE; } -/* Deletes the value for the symbol from the method table. */ +/* Checks if any layer in the chain contains the given symbol */ static mrb_bool -mt_del(mrb_state *mrb, mt_tbl *t, mrb_sym sym) +mt_chain_has(mrb_mt_tbl *t, mrb_sym sym) { - int hash, pos, start; - - if (t == NULL) return FALSE; - if (t->alloc == 0) return FALSE; - if (t->size == 0) return FALSE; - - mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; - hash = kh_int_hash_func(mrb, sym); - start = pos = hash & (t->alloc-1); - for (;;) { - mrb_sym key = keys[pos]; - if (MT_KEY_SYM(key) == sym) { - t->size--; - keys[pos] = MT_DELETED; - return TRUE; - } - else if (key == MT_EMPTY) { - return FALSE; - } - pos = (pos+1) & (t->alloc-1); - if (pos == start) { /* not found */ - return FALSE; + while (t) { + mrb_mt_entry *entries = t->ptr; + for (int i = 0; i < t->size; i++) { + if (entries[i].key == sym) return TRUE; } + t = t->next; } + return FALSE; } -/* Copy the method table. */ -static struct mt_tbl* -mt_copy(mrb_state *mrb, mt_tbl *t) +/* Creates a copy of the method table */ +static mrb_mt_tbl* +mt_copy(mrb_state *mrb, mrb_mt_tbl *t) { - mt_tbl *t2; - int i; - - if (t == NULL) return NULL; - if (t->alloc == 0) return NULL; - if (t->size == 0) return NULL; - - t2 = mt_new(mrb); - mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; - union mt_ptr *vals = t->ptr; - for (i=0; ialloc; i++) { - if (MT_KEY_P(keys[i])) { - mt_put(mrb, t2, MT_KEY_SYM(keys[i]), MT_KEY_FLG(keys[i]), vals[i]); - } + if (!t) return NULL; + if (mt_readonly_p(t)) { + /* source is ROM — new class gets empty mutable top + shared ROM chain */ + if (t->size == 0 && !t->next) return NULL; + mrb_mt_tbl *t2 = mt_new(mrb); + t2->next = t; + return t2; + } + if (t->size == 0 && !t->next) return NULL; + mrb_mt_tbl *t2 = mt_new(mrb); + if (t->size > 0) { + mt_grow(mrb, t2, t->size); + memcpy(t2->ptr, t->ptr, t->size * sizeof(mrb_mt_entry)); + t2->size = t->size; } + t2->next = t->next; /* share ROM chain */ return t2; } -/* Free memory of the method table. */ +/* Frees memory of the method table (mutable layers only). + Stops at the first readonly (ROM) layer; ROM wrappers are + shared (by iclasses, dup, etc.) and freed via mrb->rom_mt + at state close. */ static void -mt_free(mrb_state *mrb, mt_tbl *t) +mt_free(mrb_state *mrb, mrb_mt_tbl *t) +{ + while (t && !mt_readonly_p(t)) { + mrb_mt_tbl *next = t->next; + mrb_free(mrb, t->ptr); + mrb_free(mrb, t); + t = next; + } +} + +/* Allocates a per-state ROM wrapper for the const entries array + and pushes it onto the class's method table chain. + The wrapper is also registered in mrb->rom_mt for cleanup + at mrb_close, since ROM layers are shared and must not be + freed by mt_free during normal GC. */ +void +mrb_mt_init_rom(mrb_state *mrb, struct RClass *c, + const mrb_mt_entry *entries, int size) +{ + mrb_mt_tbl *rom = (mrb_mt_tbl*)mrb_malloc(mrb, sizeof(mrb_mt_tbl)); + rom->size = size; + rom->alloc = size | MRB_MT_READONLY_BIT; + rom->ptr = (mrb_mt_entry*)entries; + + /* register for cleanup at mrb_close */ + struct mrb_mt_rom_list *node = + (struct mrb_mt_rom_list*)mrb_malloc(mrb, sizeof(struct mrb_mt_rom_list)); + node->tbl = rom; + node->next = mrb->rom_mt; + mrb->rom_mt = node; + + /* push ROM layer */ + mrb_mt_tbl *t = c->mt; + if (!t || mt_readonly_p(t)) { + rom->next = t; + c->mt = rom; + } + else { + /* freeze mutable top, insert ROM behind it; + * c->mt must not change because iclasses (module inclusion) + * hold a copy of the mt pointer */ + t->alloc |= MRB_MT_FROZEN_BIT; + rom->next = t->next; + t->next = rom; + } +} + +/* Creates a method value structure from flags and pointer */ +static inline mrb_method_t +create_method_value(mrb_state *mrb, uint32_t flags, union mrb_mt_ptr val) { - mrb_free(mrb, t->ptr); - mrb_free(mrb, t); + mrb_method_t m = { flags, { val.proc } }; + return m; } +/* Iterates over methods in a class's method table with callback function */ MRB_API void mrb_mt_foreach(mrb_state *mrb, struct RClass *c, mrb_mt_foreach_func *fn, void *p) { - mt_tbl *t = c->mt; - int i; + mrb_mt_tbl *t = c->mt; + if (!t) return; - if (t == NULL) return; - if (t->alloc == 0) return; - if (t->size == 0) return; - - mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; - union mt_ptr *vals = t->ptr; - for (i=0; ialloc; i++) { - mrb_sym key = keys[i]; - if (MT_KEY_SYM(key)) { - mrb_method_t m; + /* fast path: single layer */ + if (!t->next) { + mrb_mt_entry *entries = t->ptr; + for (int i = 0; i < t->size; i++) { + if (MRB_MT_REMOVED_P(entries[i])) continue; + if (fn(mrb, entries[i].key, + create_method_value(mrb, entries[i].flags, entries[i].val), p) != 0) + return; + } + return; + } - if (key & MT_FUNC_P) { - MRB_METHOD_FROM_FUNC(m, vals[i].func); - } - else { - MRB_METHOD_FROM_PROC(m, vals[i].proc); - } - if (key & MT_NOARG_P) { - MRB_METHOD_NOARG_SET(m); + /* multi-layer: iterate each layer, skip if shadowed by a higher one */ + for (mrb_mt_tbl *layer = t; layer; layer = layer->next) { + mrb_mt_entry *entries = layer->ptr; + for (int i = 0; i < layer->size; i++) { + if (MRB_MT_REMOVED_P(entries[i])) continue; + mrb_sym sym = entries[i].key; + /* check if shadowed by a higher layer */ + if (layer != t) { + mrb_bool shadowed = FALSE; + for (mrb_mt_tbl *upper = t; upper != layer; upper = upper->next) { + mrb_mt_entry *up = upper->ptr; + for (int j = 0; j < upper->size; j++) { + if (up[j].key == sym) { + shadowed = TRUE; + break; + } + } + if (shadowed) break; + } + if (shadowed) continue; } - - if (fn(mrb, MT_KEY_SYM(key), m, p) != 0) + if (fn(mrb, sym, create_method_value(mrb, entries[i].flags, entries[i].val), p) != 0) return; } } - return; } -void +/* Marks method table entries for garbage collection */ +size_t mrb_gc_mark_mt(mrb_state *mrb, struct RClass *c) { - mt_tbl *t = c->mt; - int i; - - if (t == NULL) return; - if (t->alloc == 0) return; - if (t->size == 0) return; - - mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; - union mt_ptr *vals = t->ptr; - for (i=0; ialloc; i++) { - if (MT_KEY_P(keys[i]) && (keys[i] & MT_FUNC_P) == 0) { /* Proc pointer */ - struct RProc *p = vals[i].proc; - mrb_gc_mark(mrb, (struct RBasic*)p); + size_t children = 0; + for (mrb_mt_tbl *t = c->mt; t; t = t->next) { + if (mt_readonly_p(t)) continue; /* ROM layers need no GC marking */ + if (t->size == 0) continue; + mrb_mt_entry *entries = t->ptr; + for (int i = 0; i < t->size; i++) { + if (entries[i].key != 0 && (entries[i].flags & MRB_MT_FUNC) == 0) { + mrb_gc_mark(mrb, (struct RBasic*)entries[i].val.proc); + } } + children += (size_t)t->size; } - return; + return children; } +/* Returns memory size of class method table (mutable layers only) */ size_t -mrb_gc_mark_mt_size(mrb_state *mrb, struct RClass *c) +mrb_class_mt_memsize(mrb_state *mrb, struct RClass *c) { - struct mt_tbl *h = c->mt; - - if (!h) return 0; - return (size_t)h->size; + size_t total = 0; + for (mrb_mt_tbl *h = c->mt; h && !mt_readonly_p(h); h = h->next) + total += sizeof(mrb_mt_tbl) + (size_t)MT_ALLOC(h) * sizeof(mrb_mt_entry); + return total; } +/* Frees mutable layers of the class method table for GC. + ROM layers are left intact (freed via mrb->rom_mt at close). */ void mrb_gc_free_mt(mrb_state *mrb, struct RClass *c) { if (c->mt) mt_free(mrb, c->mt); } +/* Sets the name of a class within an outer namespace */ void mrb_class_name_class(mrb_state *mrb, struct RClass *outer, struct RClass *c, mrb_sym id) { @@ -340,32 +332,34 @@ mrb_class_name_class(mrb_state *mrb, struct RClass *outer, struct RClass *c, mrb mrb_obj_iv_set_force(mrb, (struct RObject*)c, nsym, name); } +/* Checks if a name is a valid constant name */ mrb_bool mrb_const_name_p(mrb_state *mrb, const char *name, mrb_int len) { return len > 0 && ISUPPER(name[0]) && mrb_ident_p(name+1, len-1); } +/* Sets up a class by defining it as a constant in the outer namespace */ static void setup_class(mrb_state *mrb, struct RClass *outer, struct RClass *c, mrb_sym id) { - mrb_class_name_class(mrb, outer, c, id); - mrb_obj_iv_set(mrb, (struct RObject*)outer, id, mrb_obj_value(c)); + mrb_const_set(mrb, mrb_obj_value(outer), id, mrb_obj_value(c)); } #define make_metaclass(mrb, c) prepare_singleton_class((mrb), (struct RBasic*)(c)) +/* Prepares and creates a singleton class for an object */ static void prepare_singleton_class(mrb_state *mrb, struct RBasic *o) { - struct RClass *sc, *c; + struct RClass *c; mrb_assert(o->c); if (o->c->tt == MRB_TT_SCLASS) return; - sc = MRB_OBJ_ALLOC(mrb, MRB_TT_SCLASS, mrb->class_class); + struct RClass *sc = MRB_OBJ_ALLOC(mrb, MRB_TT_SCLASS, mrb->class_class); sc->flags |= MRB_FL_CLASS_IS_INHERITED; - sc->mt = mt_new(mrb); - sc->iv = 0; + sc->mt = NULL; + sc->iv = NULL; if (o->tt == MRB_TT_CLASS) { c = (struct RClass*)o; if (!c->super) { @@ -388,11 +382,11 @@ prepare_singleton_class(mrb_state *mrb, struct RBasic *o) } o->c = sc; mrb_field_write_barrier(mrb, (struct RBasic*)o, (struct RBasic*)sc); - mrb_field_write_barrier(mrb, (struct RBasic*)sc, (struct RBasic*)o); mrb_obj_iv_set(mrb, (struct RObject*)sc, MRB_SYM(__attached__), mrb_obj_value(o)); - sc->flags |= o->flags & MRB_FL_OBJ_IS_FROZEN; + sc->frozen = o->frozen; } +/* Returns a string representation of a class name */ static mrb_value class_name_str(mrb_state *mrb, struct RClass* c) { @@ -406,6 +400,7 @@ class_name_str(mrb_state *mrb, struct RClass* c) return path; } +/* Gets a class from a constant symbol, ensuring it's a class */ static struct RClass* class_from_sym(mrb_state *mrb, struct RClass *klass, mrb_sym id) { @@ -415,6 +410,7 @@ class_from_sym(mrb_state *mrb, struct RClass *klass, mrb_sym id) return mrb_class_ptr(c); } +/* Gets a module from a constant symbol, ensuring it's a module */ static struct RClass* module_from_sym(mrb_state *mrb, struct RClass *klass, mrb_sym id) { @@ -424,6 +420,7 @@ module_from_sym(mrb_state *mrb, struct RClass *klass, mrb_sym id) return mrb_class_ptr(c); } +/* Checks if an object is a class or module */ static mrb_bool class_ptr_p(mrb_value obj) { @@ -437,6 +434,7 @@ class_ptr_p(mrb_value obj) } } +/* Checks if object is class/module and raises TypeError if not */ static void check_if_class_or_module(mrb_state *mrb, mrb_value obj) { @@ -445,26 +443,43 @@ check_if_class_or_module(mrb_state *mrb, mrb_value obj) } } +/* Defines a new module or returns existing one */ static struct RClass* define_module(mrb_state *mrb, mrb_sym name, struct RClass *outer) { - struct RClass *m; - if (mrb_const_defined_at(mrb, mrb_obj_value(outer), name)) { return module_from_sym(mrb, outer, name); } - m = mrb_module_new(mrb); + struct RClass *m = mrb_module_new(mrb); setup_class(mrb, outer, m, name); return m; } +/* + * Defines a new module in the top-level scope (Object) using a symbol for the name. + * + * @param mrb The mruby state. + * @param name The symbol representing the name of the module to define. + * @return A pointer to the newly defined or existing RClass structure for the module. + * @sideeffect Creates a new module or returns an existing one if already defined. + * The module is set as a constant in Object. + */ MRB_API struct RClass* mrb_define_module_id(mrb_state *mrb, mrb_sym name) { return define_module(mrb, name, mrb->object_class); } +/* + * Defines a new module in the top-level scope (Object). + * + * @param mrb The mruby state. + * @param name The name of the module to define. + * @return A pointer to the newly defined or existing RClass structure for the module. + * @sideeffect Creates a new module or returns an existing one if already defined. + * The module is set as a constant in Object. + */ MRB_API struct RClass* mrb_define_module(mrb_state *mrb, const char *name) { @@ -475,7 +490,7 @@ struct RClass* mrb_vm_define_module(mrb_state *mrb, mrb_value outer, mrb_sym id) { check_if_class_or_module(mrb, outer); - if (mrb_const_defined_at(mrb, outer, id)) { + if (mrb_obj_iv_defined(mrb, mrb_obj_ptr(outer), id)) { mrb_value old = mrb_const_get(mrb, outer, id); if (!mrb_module_p(old)) { @@ -483,9 +498,21 @@ mrb_vm_define_module(mrb_state *mrb, mrb_value outer, mrb_sym id) } return mrb_class_ptr(old); } - return define_module(mrb, id, mrb_class_ptr(outer)); + struct RClass *m = mrb_module_new(mrb); + setup_class(mrb, mrb_class_ptr(outer), m, id); + return m; } +/* + * Defines a new module under the given outer module/class using a symbol for the name. + * + * @param mrb The mruby state. + * @param outer A pointer to the RClass structure of the outer module/class. + * @param name The symbol representing the name of the module to define. + * @return A pointer to the newly defined or existing RClass structure for the module. + * @sideeffect Creates a new module or returns an existing one if already defined under `outer`. + * The module is set as a constant in `outer`. + */ MRB_API struct RClass* mrb_define_module_under_id(mrb_state *mrb, struct RClass *outer, mrb_sym name) { @@ -495,6 +522,16 @@ mrb_define_module_under_id(mrb_state *mrb, struct RClass *outer, mrb_sym name) return c; } +/* + * Defines a new module under the given outer module/class using a C string for the name. + * + * @param mrb The mruby state. + * @param outer A pointer to the RClass structure of the outer module/class. + * @param name The C string representing the name of the module to define. + * @return A pointer to the newly defined or existing RClass structure for the module. + * @sideeffect Creates a new module or returns an existing one if already defined under `outer`. + * The module is set as a constant in `outer`. + */ MRB_API struct RClass* mrb_define_module_under(mrb_state *mrb, struct RClass *outer, const char *name) { @@ -533,6 +570,18 @@ define_class(mrb_state *mrb, mrb_sym name, struct RClass *super, struct RClass * return c; } +/* + * Defines a new class in the top-level scope (Object) using a symbol for the name. + * + * @param mrb The mruby state. + * @param name The symbol representing the name of the class to define. + * @param super A pointer to the RClass structure of the superclass. + * If NULL, Object is assumed as the superclass, and a warning is issued. + * @return A pointer to the newly defined or existing RClass structure for the class. + * @sideeffect Creates a new class or returns an existing one if already defined. + * The class is set as a constant in Object. + * Issues a warning if `super` is NULL. + */ MRB_API struct RClass* mrb_define_class_id(mrb_state *mrb, mrb_sym name, struct RClass *super) { @@ -542,6 +591,17 @@ mrb_define_class_id(mrb_state *mrb, mrb_sym name, struct RClass *super) return define_class(mrb, name, super, mrb->object_class); } +/* + * Defines a new class in the top-level scope (Object). + * + * @param mrb The mruby state. + * @param name The name of the class to define. + * @param super A pointer to the RClass structure of the superclass. + * If NULL, Object is assumed as the superclass. + * @return A pointer to the newly defined or existing RClass structure for the class. + * @sideeffect Creates a new class or returns an existing one if already defined. + * The class is set as a constant in Object. + */ MRB_API struct RClass* mrb_define_class(mrb_state *mrb, const char *name, struct RClass *super) { @@ -550,7 +610,6 @@ mrb_define_class(mrb_state *mrb, const char *name, struct RClass *super) static mrb_value mrb_do_nothing(mrb_state *mrb, mrb_value); #ifndef MRB_NO_METHOD_CACHE -static void mc_clear(mrb_state *mrb); static void mc_clear_by_id(mrb_state *mrb, mrb_sym mid); #else #define mc_clear(mrb) @@ -560,14 +619,14 @@ static void mc_clear_by_id(mrb_state *mrb, mrb_sym mid); static void mrb_class_inherited(mrb_state *mrb, struct RClass *super, struct RClass *klass) { - mrb_value s; - mrb_sym mid; if (!super) super = mrb->object_class; super->flags |= MRB_FL_CLASS_IS_INHERITED; - s = mrb_obj_value(super); - mid = MRB_SYM(inherited); + + mrb_value s = mrb_obj_value(super); + mrb_sym mid = MRB_SYM(inherited); + if (!mrb_func_basic_p(mrb, s, mid, mrb_do_nothing)) { mrb_value c = mrb_obj_value(klass); mrb_funcall_argv(mrb, s, mid, 1, &c); @@ -587,10 +646,10 @@ mrb_vm_define_class(mrb_state *mrb, mrb_value outer, mrb_value super, mrb_sym id s = mrb_class_ptr(super); } else { - s = 0; + s = NULL; } check_if_class_or_module(mrb, outer); - if (mrb_const_defined_at(mrb, outer, id)) { + if (mrb_obj_iv_defined(mrb, mrb_obj_ptr(outer), id)) { mrb_value old = mrb_const_get(mrb, outer, id); if (!mrb_class_p(old)) { @@ -600,17 +659,26 @@ mrb_vm_define_class(mrb_state *mrb, mrb_value outer, mrb_value super, mrb_sym id if (s) { /* check super class */ if (mrb_class_real(c->super) != s) { - mrb_raisef(mrb, E_TYPE_ERROR, "superclass mismatch for class %v", old); + mrb_raisef(mrb, E_TYPE_ERROR, "superclass mismatch for %v", old); } } return c; } - c = define_class(mrb, id, s, mrb_class_ptr(outer)); + c = mrb_class_new(mrb, s); + setup_class(mrb, mrb_class_ptr(outer), c, id); mrb_class_inherited(mrb, mrb_class_real(c->super), c); return c; } +/* + * Checks if a class is defined in the top-level scope (Object). + * + * @param mrb The mruby state. + * @param name The name of the class to check. + * @return TRUE if the class is defined, FALSE otherwise. + * Returns FALSE if the name is not a valid symbol. + */ MRB_API mrb_bool mrb_class_defined(mrb_state *mrb, const char *name) { @@ -619,12 +687,28 @@ mrb_class_defined(mrb_state *mrb, const char *name) return mrb_const_defined(mrb, mrb_obj_value(mrb->object_class), sym); } +/* + * Checks if a class is defined in the top-level scope (Object) using a symbol for the name. + * + * @param mrb The mruby state. + * @param name The symbol representing the name of the class to check. + * @return TRUE if the class is defined, FALSE otherwise. + */ MRB_API mrb_bool mrb_class_defined_id(mrb_state *mrb, mrb_sym name) { return mrb_const_defined(mrb, mrb_obj_value(mrb->object_class), name); } +/* + * Checks if a class is defined under the given outer module/class. + * + * @param mrb The mruby state. + * @param outer A pointer to the RClass structure of the outer module/class. + * @param name The name of the class to check. + * @return TRUE if the class is defined under `outer`, FALSE otherwise. + * Returns FALSE if the name is not a valid symbol. + */ MRB_API mrb_bool mrb_class_defined_under(mrb_state *mrb, struct RClass *outer, const char *name) { @@ -633,132 +717,316 @@ mrb_class_defined_under(mrb_state *mrb, struct RClass *outer, const char *name) return mrb_const_defined_at(mrb, mrb_obj_value(outer), sym); } +/* + * Checks if a class is defined under the given outer module/class using a symbol for the name. + * + * @param mrb The mruby state. + * @param outer A pointer to the RClass structure of the outer module/class. + * @param name The symbol representing the name of the class to check. + * @return TRUE if the class is defined under `outer`, FALSE otherwise. + */ MRB_API mrb_bool mrb_class_defined_under_id(mrb_state *mrb, struct RClass *outer, mrb_sym name) { return mrb_const_defined_at(mrb, mrb_obj_value(outer), name); } +/* + * Retrieves a class defined under an outer module/class. + * + * @param mrb The mruby state. + * @param outer A pointer to the RClass structure of the outer module/class. + * If NULL, Object is assumed. + * @param name The name of the class to retrieve. + * @return A pointer to the RClass structure of the found class. + * @raise TypeError if the constant found is not a class. + * @raise NameError if the constant is not found. + */ MRB_API struct RClass* mrb_class_get_under(mrb_state *mrb, struct RClass *outer, const char *name) { return class_from_sym(mrb, outer, mrb_intern_cstr(mrb, name)); } +/* + * Retrieves a class defined under an outer module/class using a symbol for the name. + * + * @param mrb The mruby state. + * @param outer A pointer to the RClass structure of the outer module/class. + * If NULL, Object is assumed. + * @param name The symbol representing the name of the class to retrieve. + * @return A pointer to the RClass structure of the found class. + * @raise TypeError if the constant found is not a class. + * @raise NameError if the constant is not found. + */ MRB_API struct RClass* mrb_class_get_under_id(mrb_state *mrb, struct RClass *outer, mrb_sym name) { return class_from_sym(mrb, outer, name); } +/* + * Retrieves a class defined in the top-level scope (Object). + * + * @param mrb The mruby state. + * @param name The name of the class to retrieve. + * @return A pointer to the RClass structure of the found class. + * @raise TypeError if the constant found is not a class. + * @raise NameError if the constant is not found. + */ MRB_API struct RClass* mrb_class_get(mrb_state *mrb, const char *name) { return mrb_class_get_under(mrb, mrb->object_class, name); } +/* + * Retrieves a class defined in the top-level scope (Object) using a symbol for the name. + * + * @param mrb The mruby state. + * @param name The symbol representing the name of the class to retrieve. + * @return A pointer to the RClass structure of the found class. + * @raise TypeError if the constant found is not a class. + * @raise NameError if the constant is not found. + */ MRB_API struct RClass* mrb_class_get_id(mrb_state *mrb, mrb_sym name) { return mrb_class_get_under_id(mrb, mrb->object_class, name); } +/* + * Retrieves an exception class by its symbol name. + * This function specifically searches for exception classes. + * + * @param mrb The mruby state. + * @param name The symbol representing the name of the exception class. + * @return A pointer to the RClass structure of the found exception class. + * @raise TypeError if the constant found is not a class. + * @raise NameError if the constant is not found. + * @raise Exception if the found class is not an exception (does not inherit from E_EXCEPTION). + * @raise Exception if the exception system is corrupted. + */ MRB_API struct RClass* mrb_exc_get_id(mrb_state *mrb, mrb_sym name) { - struct RClass *exc, *e; - mrb_value c = mrb_const_get(mrb, mrb_obj_value(mrb->object_class), name); + mrb_value c = mrb_exc_const_get(mrb, name); if (!mrb_class_p(c)) { - mrb_raise(mrb, mrb->eException_class, "exception corrupted"); + mrb_raise(mrb, E_EXCEPTION, "exception corrupted"); } - exc = e = mrb_class_ptr(c); - while (e) { - if (e == mrb->eException_class) + struct RClass *exc = mrb_class_ptr(c); + for (struct RClass *e = exc; e; e = e->super) { + if (e == E_EXCEPTION) return exc; - e = e->super; } - return mrb->eException_class; + mrb_raise(mrb, E_EXCEPTION, "non-exception raised"); + /* not reached */ + return NULL; } +/* + * Retrieves a module defined under an outer module/class. + * + * @param mrb The mruby state. + * @param outer A pointer to the RClass structure of the outer module/class. + * @param name The name of the module to retrieve. + * @return A pointer to the RClass structure of the found module. + * @raise TypeError if the constant found is not a module. + * @raise NameError if the constant is not found. + */ MRB_API struct RClass* mrb_module_get_under(mrb_state *mrb, struct RClass *outer, const char *name) { return module_from_sym(mrb, outer, mrb_intern_cstr(mrb, name)); } +/* + * Retrieves a module defined under an outer module/class using a symbol for the name. + * + * @param mrb The mruby state. + * @param outer A pointer to the RClass structure of the outer module/class. + * @param name The symbol representing the name of the module to retrieve. + * @return A pointer to the RClass structure of the found module. + * @raise TypeError if the constant found is not a module. + * @raise NameError if the constant is not found. + */ MRB_API struct RClass* mrb_module_get_under_id(mrb_state *mrb, struct RClass *outer, mrb_sym name) { return module_from_sym(mrb, outer, name); } +/* + * Retrieves a module defined in the top-level scope (Object). + * + * @param mrb The mruby state. + * @param name The name of the module to retrieve. + * @return A pointer to the RClass structure of the found module. + * @raise TypeError if the constant found is not a module. + * @raise NameError if the constant is not found. + */ MRB_API struct RClass* mrb_module_get(mrb_state *mrb, const char *name) { return mrb_module_get_under(mrb, mrb->object_class, name); } +/* + * Retrieves a module defined in the top-level scope (Object) using a symbol for the name. + * + * @param mrb The mruby state. + * @param name The symbol representing the name of the module to retrieve. + * @return A pointer to the RClass structure of the found module. + * @raise TypeError if the constant found is not a module. + * @raise NameError if the constant is not found. + */ MRB_API struct RClass* mrb_module_get_id(mrb_state *mrb, mrb_sym name) { return mrb_module_get_under_id(mrb, mrb->object_class, name); } -/*! - * Defines a class under the namespace of \a outer. - * \param outer a class which contains the new class. - * \param name name of the new class - * \param super a class from which the new class will derive. - * NULL means \c Object class. - * \return the created class - * \throw TypeError if the constant name \a name is already taken but - * the constant is not a \c Class. - * \throw NameError if the class is already defined but the class can not - * be reopened because its superclass is not \a super. - * \post top-level constant named \a name refers the returned class. +/* + * Defines a class under the namespace of outer. * - * \note if a class named \a name is already defined and its superclass is - * \a super, the function just returns the defined class. + * @param mrb The mruby state. + * @param outer A pointer to the RClass structure of the outer module/class. + * @param name The symbol representing the name of the class to define. + * @param super A pointer to the RClass structure of the superclass. + * If NULL, Object is assumed as the superclass. + * @return A pointer to the newly defined or existing RClass structure for the class. + * @raise TypeError if a constant with the same name exists but is not a class. + * @raise NameError if the class is already defined but with a different superclass. + * @sideeffect Creates a new class or returns an existing one if compatible. + * The class is set as a constant in `outer`. + * If a class with the same name is already defined and its superclass + * matches `super`, the existing class is returned. */ MRB_API struct RClass* mrb_define_class_under_id(mrb_state *mrb, struct RClass *outer, mrb_sym name, struct RClass *super) { struct RClass * c; -#if 0 +#if 0 /* Warning is disabled by default, but can be enabled for debugging. */ if (!super) { - mrb_warn(mrb, "no super class for '%C::%n', Object assumed", outer, id); + /* Emits a warning if no superclass is provided, assuming Object. */ + mrb_warn(mrb, "no super class for '%C::%n', Object assumed", outer, name); } #endif c = define_class(mrb, name, super, outer); - setup_class(mrb, outer, c, name); + setup_class(mrb, outer, c, name); /* This sets the constant in outer */ return c; } +/* + * Defines a class under the namespace of outer using a C string for the name. + * + * @param mrb The mruby state. + * @param outer A pointer to the RClass structure of the outer module/class. + * @param name The C string representing the name of the class to define. + * @param super A pointer to the RClass structure of the superclass. + * If NULL, Object is assumed as the superclass. + * @return A pointer to the newly defined or existing RClass structure for the class. + * @raise TypeError if a constant with the same name exists but is not a class. + * @raise NameError if the class is already defined but with a different superclass. + * @sideeffect Creates a new class or returns an existing one if compatible. + * The class is set as a constant in `outer`. + */ MRB_API struct RClass* mrb_define_class_under(mrb_state *mrb, struct RClass *outer, const char *name, struct RClass *super) { return mrb_define_class_under_id(mrb, outer, mrb_intern_cstr(mrb, name), super); } +static mrb_bool +check_visibility_break(const struct RProc *p, const struct RClass *c, mrb_callinfo *ci, struct REnv *env) +{ + if (!p || p->upper == NULL || MRB_PROC_SCOPE_P(p) || p->e.env == NULL || !MRB_PROC_ENV_P(p)) { + return TRUE; + } + if (env) { + return p->e.env->c != c || MRB_ENV_VISIBILITY_BREAK_P(env); + } + return mrb_vm_ci_target_class(ci) != c || MRB_CI_VISIBILITY_BREAK_P(ci); +} + +static void +find_visibility_scope(mrb_state *mrb, const struct RClass *c, int n, mrb_callinfo **cp, struct REnv **ep) +{ + const struct mrb_context *ec = mrb->c; + mrb_callinfo *ci = ec->ci - n; + const struct RProc *p = ci->proc; + + if (c == NULL) c = mrb_vm_ci_target_class(ci); + + if (check_visibility_break(p, c, ci, NULL)) { + *ep = (ci->u.env && ci->u.env->tt == MRB_TT_ENV) ? ci->u.env : NULL; + *cp = ci; + return; + } + + for (;;) { + struct REnv *env = p->e.env; + p = p->upper; + if (check_visibility_break(p, c, ci, env)) { + *ep = env; + *cp = NULL; + return; + } + } +} + +/* + * Defines a method with raw mrb_method_t structure. + * This is a low-level function for method definition. + * + * @param mrb The mruby state. + * @param c The class/module in which to define the method. + * @param mid The symbol ID of the method name. + * @param m The mrb_method_t structure representing the method. + * @sideeffect Modifies the method table of the class/module `c`. + * Clears the method cache for `mid`. + * If `mid` is `initialize`, the method is automatically set to private. + * If the method visibility is default, it's determined by the current scope. + * @raise TypeError if the class/module or its attached object (for singleton classes) is frozen. + */ MRB_API void mrb_define_method_raw(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_method_t m) { - mt_tbl *h; - union mt_ptr ptr; + union mrb_mt_ptr ptr; MRB_CLASS_ORIGIN(c); - h = c->mt; - mrb_check_frozen(mrb, c); - if (!h) h = c->mt = mt_new(mrb); + + mrb_mt_tbl *h = c->mt; + if (c->tt == MRB_TT_SCLASS && mrb_frozen_p(c)) { + mrb_value v = mrb_iv_get(mrb, mrb_obj_value(c), MRB_SYM(__attached__)); + mrb_check_frozen_value(mrb, v); + } + else { + mrb_check_frozen(mrb, c); + } + if (!h) { + h = c->mt = mt_new(mrb); + } + else if (mt_frozen_p(h)) { + /* unfreeze heap-allocated frozen layer to preserve c->mt pointer + * (iclasses hold a copy of the mt pointer for included modules) */ + h->alloc &= ~MRB_MT_FROZEN_BIT; + } + else if (mt_readonly_p(h)) { + /* COW: create mutable top layer, chain to ROM */ + mrb_mt_tbl *top = mt_new(mrb); + top->next = h; + h = c->mt = top; + } if (MRB_METHOD_PROC_P(m)) { - struct RProc *p = MRB_METHOD_PROC(m); + struct RProc *p = (struct RProc*)MRB_METHOD_PROC(m); ptr.proc = p; if (p) { - if (p->color != MRB_GC_RED) { + if (p->gc_color != MRB_GC_RED) { p->flags |= MRB_PROC_SCOPE; p->c = NULL; mrb_field_write_barrier(mrb, (struct RBasic*)c, (struct RBasic*)p); @@ -767,7 +1035,7 @@ mrb_define_method_raw(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_method_ } } else { - mrb_assert(MRB_FROZEN_P(p) && MRB_PROC_SCOPE_P(p)); + mrb_assert(mrb_frozen_p(p) && MRB_PROC_SCOPE_P(p)); mrb_assert(p->c == NULL && p->upper == NULL && p->e.target_class == NULL); } } @@ -775,34 +1043,123 @@ mrb_define_method_raw(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_method_ else { ptr.func = MRB_METHOD_FUNC(m); } - mt_put(mrb, h, mid, MT_FLAGS(MRB_METHOD_FUNC_P(m), MRB_METHOD_NOARG_P(m)), ptr); - mc_clear_by_id(mrb, mid); + + int flags = m.flags; + if (mid == MRB_SYM(initialize) || + mid == MRB_SYM(initialize_copy) || + mid == MRB_SYM_Q(respond_to_missing)) { + MRB_SET_VISIBILITY_FLAGS(flags, MRB_METHOD_PRIVATE_FL); + } + else if ((flags & MT_VMASK) == MT_VDEFAULT) { + /* singleton methods are always public */ + if (c->tt == MRB_TT_SCLASS) { + MRB_SET_VISIBILITY_FLAGS(flags, MRB_METHOD_PUBLIC_FL); + } + else { + mrb_callinfo *ci; + struct REnv *e; + find_visibility_scope(mrb, c, 0, &ci, &e); + mrb_assert(ci || e); + MRB_SET_VISIBILITY_FLAGS(flags, (uint32_t)(e ? MRB_ENV_VISIBILITY(e) : MRB_CI_VISIBILITY(ci)) << 25); + } + } + mt_put(mrb, h, mid, flags, ptr); + if (!mrb->bootstrapping) mc_clear_by_id(mrb, mid); } -MRB_API void -mrb_define_method_id(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_func_t func, mrb_aspec aspec) +static void +define_method_id(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_func_t func, mrb_aspec aspec, int vis) { mrb_method_t m; int ai = mrb_gc_arena_save(mrb); MRB_METHOD_FROM_FUNC(m, func); -#ifndef MRB_USE_METHOD_T_STRUCT - mrb_assert(MRB_METHOD_FUNC(m) == func); -#endif - if (aspec == MRB_ARGS_NONE()) { - MRB_METHOD_NOARG_SET(m); - } + m.flags |= aspec; + MRB_METHOD_SET_VISIBILITY(m, vis); mrb_define_method_raw(mrb, c, mid, m); mrb_gc_arena_restore(mrb, ai); } +/* + * Defines a public C function as a method for a class/module using a symbol for the name. + * + * @param mrb The mruby state. + * @param c The class/module in which to define the method. + * @param mid The symbol ID of the method name. + * @param func The C function pointer (mrb_func_t) for the method body. + * @param aspec The argument specification for the method (e.g., MRB_ARGS_REQ(1)). + * @sideeffect Modifies the method table of the class/module `c`. + * Clears the method cache for `mid`. + */ +MRB_API void +mrb_define_method_id(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_func_t func, mrb_aspec aspec) +{ + define_method_id(mrb, c, mid, func, aspec, MRB_METHOD_PUBLIC_FL); +} + +/* + * Defines a public C function as a method for a class/module. + * + * @param mrb The mruby state. + * @param c The class/module in which to define the method. + * @param name The C string name of the method. + * @param func The C function pointer (mrb_func_t) for the method body. + * @param aspec The argument specification for the method (e.g., MRB_ARGS_REQ(1)). + * @sideeffect Modifies the method table of the class/module `c`. + * Interns the method name string. + * Clears the method cache for the interned method name. + */ MRB_API void mrb_define_method(mrb_state *mrb, struct RClass *c, const char *name, mrb_func_t func, mrb_aspec aspec) { mrb_define_method_id(mrb, c, mrb_intern_cstr(mrb, name), func, aspec); } -/* a function to raise NotImplementedError with current method name */ +/* + * Defines a private C function as a method for a class/module using a symbol for the name. + * + * @param mrb The mruby state. + * @param c The class/module in which to define the method. + * @param mid The symbol ID of the method name. + * @param func The C function pointer (mrb_func_t) for the method body. + * @param aspec The argument specification for the method (e.g., MRB_ARGS_REQ(1)). + * @sideeffect Modifies the method table of the class/module `c`. + * Clears the method cache for `mid`. + */ +MRB_API void +mrb_define_private_method_id(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_func_t func, mrb_aspec aspec) +{ + define_method_id(mrb, c, mid, func, aspec, MRB_METHOD_PRIVATE_FL); +} + +/* + * Defines a private C function as a method for a class/module. + * + * @param mrb The mruby state. + * @param c The class/module in which to define the method. + * @param name The C string name of the method. + * @param func The C function pointer (mrb_func_t) for the method body. + * @param aspec The argument specification for the method (e.g., MRB_ARGS_REQ(1)). + * @sideeffect Modifies the method table of the class/module `c`. + * Interns the method name string. + * Clears the method cache for the interned method name. + */ +MRB_API void +mrb_define_private_method(mrb_state *mrb, struct RClass *c, const char *name, mrb_func_t func, mrb_aspec aspec) +{ + mrb_define_private_method_id(mrb, c, mrb_intern_cstr(mrb, name), func, aspec); +} + +/* + * Raises a NotImplementedError, typically indicating that the C function + * called by Ruby is not implemented for the current platform or build. + * The error message will include the name of the Ruby method that called this C function. + * + * @param mrb The mruby state. + * @sideeffect Raises a NotImplementedError exception. This function does not return. + * If a method name is available from the callinfo, it's included + * in the error message (e.g., "foo() function is unimplemented on this machine"). + */ MRB_API void mrb_notimplement(mrb_state *mrb) { @@ -813,7 +1170,15 @@ mrb_notimplement(mrb_state *mrb) } } -/* a function to be replacement of unimplemented method */ +/* + * A C function suitable for use as a method body (mrb_func_t) + * that raises a NotImplementedError. + * + * @param mrb The mruby state. + * @param self The receiver of the method call (unused). + * @return This function does not return, as it raises an exception. + * @sideeffect Raises a NotImplementedError exception via `mrb_notimplement`. + */ MRB_API mrb_value mrb_notimplement_m(mrb_state *mrb, mrb_value self) { @@ -832,6 +1197,17 @@ ensure_class_type(mrb_state *mrb, mrb_value val) #define to_sym(mrb, ss) mrb_obj_to_sym(mrb, ss) +/* + * Gets the number of arguments passed to the current C function call. + * + * This function retrieves the argument count from the current callinfo (`ci`) + * in the mruby state. It correctly handles the case where arguments might be + * packed into an array by the caller (indicated by `ci->n == 15`), in which + * case it gets the length of that array. + * + * @param mrb The mruby state. + * @return The number of arguments passed to the C function. + */ MRB_API mrb_int mrb_get_argc(mrb_state *mrb) { @@ -840,11 +1216,26 @@ mrb_get_argc(mrb_state *mrb) if (argc == 15) { struct RArray *a = mrb_ary_ptr(mrb->c->ci->stack[1]); + a->c = NULL; /* hide from ObjectSpace.each_object */ argc = ARY_LEN(a); } return argc; } +/* + * Gets a pointer to the array of arguments passed to the current C function call. + * + * This function retrieves the arguments from the current callinfo stack. + * It handles the case where arguments might be packed into an array + * (when `ci->n == 15`), returning a pointer to the elements of that array. + * Otherwise, it returns a pointer to the arguments on the stack. + * + * @param mrb The mruby state. + * @return A const pointer to the array of mrb_value arguments. + * The caller should not modify the contents of this array. + * @note If arguments were packed, the RArray object on stack has its class pointer + * temporarily set to NULL to hide it from `ObjectSpace.each_object`. + */ MRB_API const mrb_value* mrb_get_argv(mrb_state *mrb) { @@ -853,11 +1244,25 @@ mrb_get_argv(mrb_state *mrb) if (argc == 15) { struct RArray *a = mrb_ary_ptr(*array_argv); + a->c = NULL; /* hide from ObjectSpace.each_object */ array_argv = ARY_PTR(a); } return array_argv; } +/* + * Gets the first argument passed to the current C function call. + * + * This is a convenience function for directly accessing the first argument. + * It handles cases where arguments might be packed into an array or + * if the first argument is a keyword hash. + * + * @param mrb The mruby state. + * @return The first mrb_value argument. + * @raise ArgumentError if the number of positional arguments is not 1, + * unless there are no positional arguments but a keyword hash is present, + * in which case the keyword hash is returned. + */ MRB_API mrb_value mrb_get_arg1(mrb_state *mrb) { @@ -881,6 +1286,14 @@ mrb_get_arg1(mrb_state *mrb) return array_argv[0]; } +/* + * Checks if a block was passed to the current C function call. + * + * It inspects the current callinfo stack for a block argument. + * + * @param mrb The mruby state. + * @return TRUE if a block is present (i.e., not nil), FALSE otherwise. + */ MRB_API mrb_bool mrb_block_given_p(mrb_state *mrb) { @@ -890,11 +1303,171 @@ mrb_block_given_p(mrb_state *mrb) return !mrb_nil_p(b); } -#define GET_ARG(_type) (ptr ? ((_type)(*ptr++)) : va_arg(ap, _type)) +#define GET_ARG(_type) (ptr ? ((_type)(*ptr++)) : va_arg((*ap), _type)) + +/* + * Per-character validation for the mrb_get_args fast path. + * Written as a switch rather than an array-designator lookup table so + * that the file can be compiled as C++ (array-index designators are a + * C99-only feature). Modern compilers typically lower this to a jump + * table, giving the same effective O(1) behavior as the original table. + * Returns 1 for a valid arg specifier, 2 for the separator, 0 otherwise. + */ +static inline uint8_t +fast_fmt_ok(char c) +{ + switch (c) { + case 'o': case 'S': case 'A': case 'H': case 'i': case 'b': + case 'f': case 'n': case 'z': case 'c': case 's': case 'a': + return 1; + case '|': + return 2; + default: + return 0; + } +} + +/* + * Fast path for simple format strings (no *, :, !, +, &, ?). + * Handles the most common patterns directly in one pass, + * skipping the two-pass format scanning of the general path. + * + * Returns -1 if the format is not eligible for fast path. + */ +static mrb_int +get_args_fast(mrb_state *mrb, const char *format, void** ptr, va_list *ap) +{ + mrb_callinfo *ci = mrb->c->ci; + mrb_int argc = ci->n; + const mrb_value *argv; + mrb_int i = 0; + + /* fast path only for non-packed, non-keyword args */ + if (argc >= 15 || ci->nk > 0) return -1; + argv = ci->stack + 1; + + /* validate format and count args in one scan (table lookup, no switch) */ + const char *p = format; + int req = 0, opt = 0; + mrb_bool in_opt = FALSE; + while (*p) { + uint8_t v = fast_fmt_ok(*p); + if (v == 0) return -1; /* unsupported specifier */ + if (v == 2) { in_opt = TRUE; p++; continue; } + if (in_opt) opt++; else req++; + p++; + } + if (argc < req || argc > req + opt) { + mrb_argnum_error(mrb, argc, req, req + opt); + } + + /* all specifiers validated — safe to consume GET_ARG now */ + p = format; + while (*p) { + char c = *p++; + if (c == '|') continue; + if (i >= argc) { + /* skip remaining optional args (just consume GET_ARG pointers) */ + switch (c) { + case 's': case 'a': + (void)GET_ARG(void*); + (void)GET_ARG(void*); + break; + default: + (void)GET_ARG(void*); + break; + } + continue; + } + switch (c) { + case 'o': { + mrb_value *vp = GET_ARG(mrb_value*); + *vp = argv[i++]; + break; + } + case 'S': { + mrb_value *vp = GET_ARG(mrb_value*); + mrb_ensure_string_type(mrb, argv[i]); + *vp = argv[i++]; + break; + } + case 'A': { + mrb_value *vp = GET_ARG(mrb_value*); + mrb_ensure_array_type(mrb, argv[i]); + *vp = argv[i++]; + break; + } + case 'H': { + mrb_value *vp = GET_ARG(mrb_value*); + mrb_ensure_hash_type(mrb, argv[i]); + *vp = argv[i++]; + break; + } + case 'i': { + mrb_int *ip = GET_ARG(mrb_int*); + *ip = mrb_as_int(mrb, argv[i++]); + break; + } + case 'b': { + mrb_bool *bp = GET_ARG(mrb_bool*); + *bp = mrb_test(argv[i++]); + break; + } + case 'f': { + mrb_float *fp = GET_ARG(mrb_float*); + *fp = mrb_as_float(mrb, argv[i++]); + break; + } + case 'n': { + mrb_sym *np = GET_ARG(mrb_sym*); + *np = to_sym(mrb, argv[i++]); + break; + } + case 'z': { + const char **zp = GET_ARG(const char**); + mrb_ensure_string_type(mrb, argv[i]); + *zp = RSTRING_CSTR(mrb, argv[i++]); + break; + } + case 's': { + const char **sp = GET_ARG(const char**); + mrb_int *lp = GET_ARG(mrb_int*); + mrb_ensure_string_type(mrb, argv[i]); + *sp = RSTRING_PTR(argv[i]); + *lp = RSTRING_LEN(argv[i]); + i++; + break; + } + case 'a': { + const mrb_value **pb = GET_ARG(const mrb_value**); + mrb_int *pl = GET_ARG(mrb_int*); + mrb_ensure_array_type(mrb, argv[i]); + struct RArray *a = mrb_ary_ptr(argv[i]); + *pb = ARY_PTR(a); + *pl = ARY_LEN(a); + i++; + break; + } + case 'c': { + struct RClass **cp = GET_ARG(struct RClass**); + ensure_class_type(mrb, argv[i]); + *cp = mrb_class_ptr(argv[i++]); + break; + } + default: + return -1; /* unknown specifier, fall back to slow path */ + } + } + return i; +} static mrb_int -get_args_v(mrb_state *mrb, mrb_args_format format, void** ptr, va_list ap) +get_args_v(mrb_state *mrb, mrb_args_format format, void** ptr, va_list *ap) { + /* try fast path first */ + mrb_int fast = get_args_fast(mrb, format, ptr, ap); + if (fast >= 0) return fast; + const char *fmt = format; char c; mrb_int i = 0; @@ -971,6 +1544,7 @@ get_args_v(mrb_state *mrb, mrb_args_format format, void** ptr, va_list ap) struct RArray *a = mrb_ary_ptr(*argv); argv = ARY_PTR(a); argc = ARY_LEN(a); + a->c = NULL; /* hide from ObjectSpace.each_object */ } opt = FALSE; @@ -1006,10 +1580,7 @@ get_args_v(mrb_state *mrb, mrb_args_format format, void** ptr, va_list ap) if (i < argc) { pickarg = &argv[i++]; if (needmodify && !mrb_nil_p(*pickarg)) { - if (mrb_immediate_p(*pickarg)) { - mrb_raisef(mrb, E_FROZEN_ERROR, "can't modify frozen %t", *pickarg); - } - mrb_check_frozen(mrb, mrb_obj_ptr(*pickarg)); + mrb_check_frozen_value(mrb, *pickarg); } } else { @@ -1064,8 +1635,8 @@ get_args_v(mrb_state *mrb, mrb_args_format format, void** ptr, va_list ap) break; case 's': { - const char **ps = 0; - mrb_int *pl = 0; + const char **ps = NULL; + mrb_int *pl = NULL; ps = GET_ARG(const char**); pl = GET_ARG(mrb_int*); @@ -1111,7 +1682,7 @@ get_args_v(mrb_state *mrb, mrb_args_format format, void** ptr, va_list ap) if (needmodify) goto bad_needmodify; if (pickarg) { if (altmode && mrb_nil_p(*pickarg)) { - *pb = 0; + *pb = NULL; *pl = 0; } else { @@ -1173,7 +1744,7 @@ get_args_v(mrb_state *mrb, mrb_args_format format, void** ptr, va_list ap) type = GET_ARG(struct mrb_data_type const*); if (pickarg) { if (altmode && mrb_nil_p(*pickarg)) { - *datap = 0; + *datap = NULL; } else { *datap = mrb_data_get_ptr(mrb, *pickarg, type); @@ -1238,7 +1809,7 @@ get_args_v(mrb_state *mrb, mrb_args_format format, void** ptr, va_list ap) case ':': { - mrb_value ksrc = mrb_hash_p(kdict) ? mrb_hash_dup(mrb, kdict) : mrb_hash_new(mrb); + mrb_value ksrc = mrb_hash_p(kdict) ? kdict : mrb_hash_new(mrb); const mrb_kwargs *kwargs = GET_ARG(const mrb_kwargs*); mrb_value *rest; @@ -1246,18 +1817,19 @@ get_args_v(mrb_state *mrb, mrb_args_format format, void** ptr, va_list ap) rest = NULL; } else { - uint32_t kwnum = kwargs->num; - uint32_t required = kwargs->required; + mrb_int kwnum = kwargs->num; + mrb_int required = kwargs->required; const mrb_sym *kname = kwargs->table; mrb_value *values = kwargs->values; - uint32_t j; - const uint32_t keyword_max = 40; + const mrb_int keyword_max = 40; + mrb_assert(kwnum >= 0); + mrb_assert(required >= 0); if (kwnum > keyword_max || required > kwnum) { mrb_raise(mrb, E_ARGUMENT_ERROR, "keyword number is too large"); } - for (j = required; j > 0; j--, kname++, values++) { + for (mrb_int j = required; j > 0; j--, kname++, values++) { mrb_value k = mrb_symbol_value(*kname); if (!mrb_hash_key_p(mrb, ksrc, k)) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "missing keyword: %n", *kname); @@ -1266,7 +1838,7 @@ get_args_v(mrb_state *mrb, mrb_args_format format, void** ptr, va_list ap) mrb_gc_protect(mrb, *values); } - for (j = kwnum - required; j > 0; j--, kname++, values++) { + for (mrb_int j = kwnum - required; j > 0; j--, kname++, values++) { mrb_value k = mrb_symbol_value(*kname); if (mrb_hash_key_p(mrb, ksrc, k)) { *values = mrb_hash_delete_key(mrb, ksrc, k); @@ -1284,8 +1856,7 @@ get_args_v(mrb_state *mrb, mrb_args_format format, void** ptr, va_list ap) *rest = ksrc; } else if (!mrb_hash_empty_p(mrb, ksrc)) { - ksrc = mrb_hash_keys(mrb, ksrc); - ksrc = RARRAY_PTR(ksrc)[0]; + ksrc = mrb_hash_first_key(mrb, ksrc); mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown keyword: %v", ksrc); } } @@ -1324,7 +1895,7 @@ get_args_v(mrb_state *mrb, mrb_args_format format, void** ptr, va_list ap) s: String [const char*,mrb_int] Receive two arguments; s! gives (NULL,0) for nil z: String [const char*] NUL terminated string; z! gives NULL for nil a: Array [const mrb_value*,mrb_int] Receive two arguments; a! gives (NULL,0) for nil - c: Class/Module [strcut RClass*] c! gives NULL for nil + c: Class/Module [struct RClass*] c! gives NULL for nil f: Integer/Float [mrb_float] i: Integer/Float [mrb_int] b: boolean [mrb_bool] @@ -1343,36 +1914,108 @@ get_args_v(mrb_state *mrb, mrb_args_format format, void** ptr, va_list ap) !: Switch to the alternate mode; The behaviour changes depending on the specifier +: Request a not frozen object; However, except nil value */ +/* + * Retrieves and parses arguments passed to a C function based on a given format string. + * This is the primary and most flexible way for C extensions to handle arguments + * passed from Ruby method calls. + * + * @param mrb The mruby state. + * @param format A C string that specifies the expected arguments and their types. + * See below for detailed format specifiers and modifiers. + * @param ... A variable number of pointer arguments, corresponding to the types + * specified in the format string, where the parsed values will be stored. + * @return The number of arguments successfully parsed and assigned from the Ruby stack + * to the C variables. + * @raise ArgumentError if the passed arguments do not match the format string, + * if there are type mismatches, or if required arguments are missing. + * @sideeffect Arguments from the mruby stack are converted and stored in the C variables + * provided via `...`. The mruby garbage collector arena might be saved + * and restored during this process. Keyword argument processing might + * involve hash duplication or key deletion. + * + * Format Specifiers (within the `format` string): + * 'o': Object (expects mrb_value*) + * 'C': Class/Module (expects mrb_value*). Use 'c' for `struct RClass*`. + * 'S': String (expects mrb_value*) + * 'A': Array (expects mrb_value*) + * 'H': Hash (expects mrb_value*) + * 's': String (expects const char**, mrb_int* for pointer and length) + * 'z': String (expects const char** for a NUL-terminated string) + * 'a': Array (expects const mrb_value**, mrb_int* for pointer and length) + * 'c': Class/Module (expects struct RClass**) + * 'f': Float (expects mrb_float*) - available if MRB_NO_FLOAT is not defined. + * 'i': Integer (expects mrb_int*) + * 'b': Boolean (expects mrb_bool*) + * 'n': Symbol (expects mrb_sym*) - converts from String or Symbol argument. + * 'd': Data (expects void**, const struct mrb_data_type*). The second argument is used + * for type checking and is not modified. + * '&': Block (expects mrb_value*) - retrieves the block passed to the method. + * '*': Rest arguments (expects const mrb_value**, mrb_int*) - captures all remaining + * positional arguments into an array. + * '|': Optional arguments separator. Arguments following this are optional. + * '?': Optional given (expects mrb_bool*) - sets to TRUE if the preceding optional + * argument was provided, FALSE otherwise. + * ':': Keyword arguments (expects const mrb_kwargs*). Used to retrieve keyword arguments. + * See mrb_kwargs structure for details. + * + * Format Modifiers (prefix the specifier, e.g., "!s" or "c!"): + * '!': Alternate mode. Behavior changes depending on the specifier. + * For example, 's!' gives (NULL, 0) for a nil string. 'c!' gives NULL for nil. + * '&!' raises an ArgumentError if no block is given. + * '*!' avoids copying the rest arguments from the stack if possible. + * '+': Request a modifiable (not frozen) object. Raises a FrozenError if the + * retrieved object is frozen (this check does not apply to nil values). + */ MRB_API mrb_int mrb_get_args(mrb_state *mrb, mrb_args_format format, ...) { va_list ap; va_start(ap, format); - mrb_int rc = get_args_v(mrb, format, NULL, ap); + mrb_int rc = get_args_v(mrb, format, NULL, &ap); va_end(ap); return rc; } +/* + * Retrieves and parses arguments passed to a C function according to a format string, + * taking a `void**` array for the output variables instead of `va_list`. + * This version is useful when the argument parsing needs to be done in a more + * programmatic way, or when wrapping `mrb_get_args`. + * + * @param mrb The mruby state. + * @param format A C string specifying the expected arguments. See `mrb_get_args` + * documentation for format specifiers and modifiers. + * @param args An array of `void*` pointers to variables where the parsed arguments + * will be stored. The types of these variables must correspond to the + * specifiers in the `format` string. + * @return The number of arguments successfully parsed and assigned. + * @raise ArgumentError if arguments do not match the format string, or if there are + * type mismatches. + * @sideeffect Arguments from the mruby stack are converted and stored in the C variables + * pointed to by the elements of the `args` array. + * (See `mrb_get_args` for more details on side effects like GC arena handling + * and keyword argument processing). + */ MRB_API mrb_int -mrb_get_args_a(mrb_state *mrb, mrb_args_format format, void** args) +mrb_get_args_a(mrb_state *mrb, mrb_args_format format, void **args) { - va_list ap; - return get_args_v(mrb, format, args, ap); + return get_args_v(mrb, format, args, NULL); } static struct RClass* -boot_defclass(mrb_state *mrb, struct RClass *super) +boot_defclass(mrb_state *mrb, struct RClass *super, enum mrb_vtype tt) { - struct RClass *c; + struct RClass *c = MRB_OBJ_ALLOC(mrb, MRB_TT_CLASS, mrb->class_class); + MRB_SET_INSTANCE_TT(c, tt); - c = MRB_OBJ_ALLOC(mrb, MRB_TT_CLASS, mrb->class_class); if (super) { c->super = super; mrb_field_write_barrier(mrb, (struct RBasic*)c, (struct RBasic*)super); c->flags |= MRB_FL_CLASS_IS_INHERITED; } else { - c->super = mrb->object_class; + // limited to cases where BasicObject class is defined during mruby initialization + mrb_assert(mrb->object_class == NULL); } c->mt = mt_new(mrb); return c; @@ -1394,7 +2037,6 @@ include_class_new(mrb_state *mrb, struct RClass *m, struct RClass *super) m = m->c; } MRB_CLASS_ORIGIN(m); - ic->iv = m->iv; ic->mt = m->mt; ic->super = super; if (m->tt == MRB_TT_ICLASS) { @@ -1409,21 +2051,20 @@ include_class_new(mrb_state *mrb, struct RClass *m, struct RClass *super) static int include_module_at(mrb_state *mrb, struct RClass *c, struct RClass *ins_pos, struct RClass *m, int search_super) { - struct RClass *p, *ic; + struct RClass *ic; void *klass_mt = find_origin(c)->mt; while (m) { + struct RClass *p = c->super; int original_seen = FALSE; int superclass_seen = FALSE; if (c == ins_pos) original_seen = TRUE; if (m->flags & MRB_FL_CLASS_IS_PREPENDED) goto skip; - if (klass_mt && klass_mt == m->mt) return -1; - p = c->super; while (p) { if (c == p) original_seen = TRUE; if (p->tt == MRB_TT_ICLASS) { @@ -1433,7 +2074,8 @@ include_module_at(mrb_state *mrb, struct RClass *c, struct RClass *ins_pos, stru } goto skip; } - } else if (p->tt == MRB_TT_CLASS) { + } + else if (p->tt == MRB_TT_CLASS) { if (!search_super) break; superclass_seen = TRUE; } @@ -1448,7 +2090,7 @@ include_module_at(mrb_state *mrb, struct RClass *c, struct RClass *ins_pos, stru skip: m = m->super; } - mc_clear(mrb); + if (!mrb->bootstrapping) mrb_method_cache_clear(mrb); return 0; } @@ -1464,6 +2106,23 @@ fix_include_module(mrb_state *mrb, struct RBasic *obj, void *data) return MRB_EACH_OBJ_OK; } +/* + * Includes a module into a class or another module. + * This adds the methods and constants of module `m` to class `c` (or module `c`). + * The included module's instance methods become instance methods of `c`. + * + * @param mrb The mruby state. + * @param c The target class or module into which module `m` will be included. + * @param m The module to include. Must be a module (MRB_TT_MODULE). + * @raise ArgumentError if `m` is not a module or if a cyclic include is detected. + * @raise FrozenError if class/module `c` is frozen. + * @sideeffect Modifies the ancestor chain of `c` by inserting an ICLASS (inclusion class) + * that references `m`'s method table. + * Clears the method cache. + * If `m` defines an `included` hook, it will be called with `c` as an argument. + * If `c` is a module that has itself been included in other classes/modules, + * this operation will also propagate the inclusion of `m` to those descendants. + */ MRB_API void mrb_include_module(mrb_state *mrb, struct RClass *c, struct RClass *m) { @@ -1471,6 +2130,7 @@ mrb_include_module(mrb_state *mrb, struct RClass *c, struct RClass *m) if (include_module_at(mrb, c, find_origin(c), m, 1) < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "cyclic include detected"); } + mrb_const_cache_clear(mrb); if (c->tt == MRB_TT_MODULE && (c->flags & MRB_FL_CLASS_IS_INHERITED)) { struct RClass *data[2]; data[0] = c; @@ -1505,34 +2165,42 @@ fix_prepend_module(mrb_state *mrb, struct RBasic *obj, void *data) return MRB_EACH_OBJ_OK; } +/* + * Prepends a module to a class or another module. + * Methods in the prepended module `m` will override methods of the same name in `c`. + * In the ancestor chain, the prepended module appears before the class/module itself. + * + * @param mrb The mruby state. + * @param c The target class or module to which module `m` will be prepended. + * @param m The module to prepend. Must be a module (MRB_TT_MODULE). + * @raise ArgumentError if `m` is not a module or if a cyclic prepend is detected. + * @raise FrozenError if class/module `c` is frozen. + * @sideeffect Modifies the ancestor chain of `c`. If `c` hasn't been prepended before, + * an "origin" ICLASS is created to hold `c`'s original methods, and `c`'s + * method table is cleared. Then, an ICLASS for `m` is inserted above `c`. + * Clears the method cache. + * If `m` defines a `prepended` hook, it will be called with `c` as an argument. + * If `c` is a module that has been included/prepended elsewhere, this + * operation propagates the prepending of `m` to those descendants. + */ MRB_API void mrb_prepend_module(mrb_state *mrb, struct RClass *c, struct RClass *m) { - struct RClass *origin; - mrb_check_frozen(mrb, c); if (!(c->flags & MRB_FL_CLASS_IS_PREPENDED)) { - struct RClass *c0; - - if (c->tt == MRB_TT_ICLASS) { - c0 = c->c; - } - else { - c0 = c; - } - origin = MRB_OBJ_ALLOC(mrb, MRB_TT_ICLASS, c0); + struct RClass *origin = MRB_OBJ_ALLOC(mrb, MRB_TT_ICLASS, c); origin->flags |= MRB_FL_CLASS_IS_ORIGIN | MRB_FL_CLASS_IS_INHERITED; origin->super = c->super; c->super = origin; origin->mt = c->mt; c->mt = NULL; - origin->iv = c->iv; mrb_field_write_barrier(mrb, (struct RBasic*)c, (struct RBasic*)origin); c->flags |= MRB_FL_CLASS_IS_PREPENDED; } if (include_module_at(mrb, c, c, m, 0) < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "cyclic prepend detected"); } + mrb_const_cache_clear(mrb); if (c->tt == MRB_TT_MODULE && (c->flags & (MRB_FL_CLASS_IS_INHERITED|MRB_FL_CLASS_IS_PREPENDED))) { struct RClass *data[2]; @@ -1542,35 +2210,150 @@ mrb_prepend_module(mrb_state *mrb, struct RClass *c, struct RClass *m) } } +/* + * call-seq: + * mod.prepend(module, ...) -> self + * + * Invokes Module.prepend_features on each parameter in reverse order. + * + * module Mod + * def hello + * "Hello from Mod.\n" + * end + * end + * + * class Klass + * def hello + * "Hello from Klass.\n" + * end + * prepend Mod + * end + * Klass.new.hello #=> "Hello from Mod.\n" + */ static mrb_value -mrb_mod_prepend_features(mrb_state *mrb, mrb_value mod) +mrb_mod_prepend(mrb_state *mrb, mrb_value mod) { - struct RClass *c; + struct RClass *c = mrb_class_ptr(mod); + mrb_int argc; + mrb_value *argv; + mrb_sym prepended = MRB_SYM(prepended); - mrb_check_type(mrb, mod, MRB_TT_MODULE); - mrb_get_args(mrb, "c", &c); - mrb_prepend_module(mrb, c, mrb_class_ptr(mod)); + mrb_get_args(mrb, "*", &argv, &argc); + while (argc--) { + mrb_value m = argv[argc]; + mrb_check_type(mrb, m, MRB_TT_MODULE); + mrb_prepend_module(mrb, c, mrb_class_ptr(m)); + if (!mrb_func_basic_p(mrb, m, prepended, mrb_do_nothing)) { + mrb_funcall_argv(mrb, m, prepended, 1, &mod); + } + } return mod; } +/* + * call-seq: + * mod.include(module, ...) -> self + * + * Invokes Module.append_features on each parameter in reverse order. + * + * module Mod + * def hello + * "Hello from Mod.\n" + * end + * end + * + * class Klass + * include Mod + * end + * Klass.new.hello #=> "Hello from Mod.\n" + */ static mrb_value -mrb_mod_append_features(mrb_state *mrb, mrb_value mod) +mrb_mod_include(mrb_state *mrb, mrb_value mod) { - struct RClass *c; + struct RClass *c = mrb_class_ptr(mod); + mrb_int argc; + mrb_value *argv; + mrb_sym included = MRB_SYM(included); - mrb_check_type(mrb, mod, MRB_TT_MODULE); - mrb_get_args(mrb, "c", &c); - mrb_include_module(mrb, c, mrb_class_ptr(mod)); + mrb_get_args(mrb, "*", &argv, &argc); + while (argc--) { + mrb_value m = argv[argc]; + mrb_check_type(mrb, m, MRB_TT_MODULE); + mrb_include_module(mrb, c, mrb_class_ptr(m)); + if (!mrb_func_basic_p(mrb, m, included, mrb_do_nothing)) { + mrb_funcall_argv(mrb, m, included, 1, &mod); + } + } return mod; } +/* 15.3.1.3.13 */ +/* + * call-seq: + * obj.extend(module, ...) -> obj + * + * Adds to _obj_ the instance methods from each module given as a + * parameter. + * + * module Mod + * def hello + * "Hello from Mod.\n" + * end + * end + * + * class Klass + * def hello + * "Hello from Klass.\n" + * end + * end + * + * k = Klass.new + * k.hello #=> "Hello from Klass.\n" + * k.extend(Mod) #=> # + * k.hello #=> "Hello from Mod.\n" + * + */ +/* + * Adds the instance methods from one or more modules to the given object `obj`. + * This is achieved by including the specified modules into `obj`'s singleton class. + * + * @param mrb The mruby state. + * @param obj The object to extend. + * @return The extended object `obj`. + * @raise TypeError if any of the arguments passed for extension are not modules. + * @sideeffect Modifies the singleton class of `obj`. If the singleton class doesn't exist, + * it is created. The given modules are included into this singleton class. + * If any of the included modules define an `extended` hook, it is called + * with `obj` as an argument. + */ +mrb_value +mrb_obj_extend(mrb_state *mrb, mrb_value obj) +{ + mrb_int argc; + mrb_value *argv; + mrb_sym extended = MRB_SYM(extended); + + mrb_get_args(mrb, "*", &argv, &argc); + + mrb_value cc = mrb_singleton_class(mrb, obj); + while (argc--) { + mrb_value mod = argv[argc]; + mrb_check_type(mrb, mod, MRB_TT_MODULE); + mrb_include_module(mrb, mrb_class_ptr(cc), mrb_class_ptr(mod)); + if (!mrb_func_basic_p(mrb, mod, extended, mrb_do_nothing)) { + mrb_funcall_argv(mrb, mod, extended, 1, &obj); + } + } + return obj; +} + /* 15.2.2.4.28 */ /* * call-seq: * mod.include?(module) -> true or false * - * Returns true if module is included in - * mod or one of mod's ancestors. + * Returns `true` if *module* is included in + * *mod* or one of *mod*'s ancestors. * * module A * end @@ -1601,12 +2384,28 @@ mrb_mod_include_p(mrb_state *mrb, mrb_value mod) return mrb_false_value(); } +/* + * call-seq: + * mod.ancestors -> array + * + * Returns a list of modules included/prepended in mod (including mod itself). + * + * module Mod + * include Math + * include Comparable + * prepend Enumerable + * end + * + * Mod.ancestors #=> [Enumerable, Mod, Comparable, Math] + * Math.ancestors #=> [Math] + * Numeric.ancestors #=> [Numeric, Comparable] + */ static mrb_value mrb_mod_ancestors(mrb_state *mrb, mrb_value self) { - mrb_value result; struct RClass *c = mrb_class_ptr(self); - result = mrb_ary_new(mrb); + mrb_value result = mrb_ary_new(mrb); + while (c) { if (c->tt == MRB_TT_ICLASS) { mrb_ary_push(mrb, result, mrb_obj_value(c->c)); @@ -1620,16 +2419,6 @@ mrb_mod_ancestors(mrb_state *mrb, mrb_value self) return result; } -static mrb_value -mrb_mod_extend_object(mrb_state *mrb, mrb_value mod) -{ - mrb_value obj = mrb_get_arg1(mrb); - - mrb_check_type(mrb, mod, MRB_TT_MODULE); - mrb_include_module(mrb, mrb_class_ptr(mrb_singleton_class(mrb, obj)), mrb_class_ptr(mod)); - return mod; -} - static mrb_value mrb_mod_initialize(mrb_state *mrb, mrb_value mod) { @@ -1643,14 +2432,117 @@ mrb_mod_initialize(mrb_state *mrb, mrb_value mod) return mod; } +static void +mrb_mod_visibility(mrb_state *mrb, mrb_value mod, int vis) +{ + mrb_assert((vis&MT_VMASK)==vis); + mrb_int argc; + mrb_value *argv; + struct RClass *c = mrb_class_ptr(mod); + + mrb_get_args(mrb, "*!", &argv, &argc); + if (argc == 0) { + mrb_callinfo *ci; + struct REnv *e; + find_visibility_scope(mrb, NULL, 1, &ci, &e); + if (e) { + MRB_ENV_SET_VISIBILITY(e, vis >> 25); + } + else { + MRB_CI_SET_VISIBILITY(ci, vis >> 25); + } + } + else { + mrb_mt_tbl *h = c->mt; + for (int i=0; iobject_class); + mrb_mod_visibility(mrb, self, MRB_METHOD_PUBLIC_FL); + return self; +} + +static mrb_value +top_private(mrb_state *mrb, mrb_value self) +{ + self = mrb_obj_value(mrb->object_class); + mrb_mod_visibility(mrb, self, MRB_METHOD_PRIVATE_FL); + return self; +} + static mrb_value -mrb_mod_dummy_visibility(mrb_state *mrb, mrb_value mod) +top_protected(mrb_state *mrb, mrb_value self) { - return mod; + self = mrb_obj_value(mrb->object_class); + mrb_mod_visibility(mrb, self, MT_PROTECTED); + return self; } -/* returns mrb_class_ptr(mrb_singleton_class()) */ -/* except that it return NULL for immediate values */ +/* + * Retrieves a pointer to the singleton class (also known as metaclass or eigenclass) + * of a given object `v`. If the singleton class does not yet exist, it is created. + * + * Singleton classes are anonymous classes associated with a specific object, + * allowing that object to have its own unique methods. + * + * @param mrb The mruby state. + * @param v The `mrb_value` for which to get the singleton class. + * @return A pointer to the `RClass` structure of the singleton class. + * Returns `NULL` for immediate values (e.g., Symbols, Integers, + * Floats if not word-boxed, C pointers) as they cannot have singleton classes. + * For `nil`, `true`, and `false`, it returns their respective predefined + * classes (`mrb->nil_class`, `mrb->true_class`, `mrb->false_class`), + * which effectively act as their singleton classes. + * @sideeffect If the singleton class doesn't exist for `v` (and `v` can have one, + * i.e., it's not an immediate value or one of the special singletons), + * this function will: + * 1. Allocate a new `RClass` of type `MRB_TT_SCLASS`. + * 2. Set its superclass appropriately (e.g., the object's original class, + * or the class of the superclass for class singletons). + * 3. Link this new singleton class to the object `v`. + * 4. Set an internal `__attached__` instance variable on the singleton + * class to point back to `v`. + * 5. The `MRB_FL_CLASS_IS_INHERITED` flag is set on the new singleton class. + */ MRB_API struct RClass* mrb_singleton_class_ptr(mrb_state *mrb, mrb_value v) { @@ -1679,6 +2571,24 @@ mrb_singleton_class_ptr(mrb_state *mrb, mrb_value v) return obj->c; } +/* + * Retrieves the singleton class (also known as metaclass or eigenclass) of a given object + * as an mrb_value. If the singleton class does not exist, it is created. + * + * This function is a wrapper around `mrb_singleton_class_ptr` that returns the + * singleton class as an `mrb_value`. + * + * @param mrb The mruby state. + * @param v The `mrb_value` for which to get the singleton class. + * @return An `mrb_value` representing the singleton class. + * @raise TypeError if `v` is an object that cannot have a singleton class + * (e.g., immediate values like Symbols or Integers under certain configurations, + * or C pointers). This exception is raised by the underlying + * `mrb_singleton_class_ptr` if it returns NULL. + * @sideeffect If the singleton class doesn't exist for `v` (and `v` can have one), + * it will be created via `mrb_singleton_class_ptr`, which involves + * memory allocation and modification of the object's class pointer. + */ MRB_API mrb_value mrb_singleton_class(mrb_state *mrb, mrb_value v) { @@ -1690,6 +2600,25 @@ mrb_singleton_class(mrb_state *mrb, mrb_value v) return mrb_obj_value(c); } +/* + * Defines a singleton method for a specific object `o`. + * A singleton method is a method that belongs only to a single object, + * not to all instances of its class. It's defined in the object's singleton class. + * + * @param mrb The mruby state. + * @param o A pointer to the RObject for which the singleton method is being defined. + * @param name The C string name of the method. + * @param func The C function (mrb_func_t) that implements the method. + * @param aspec The argument specification for the method (e.g., MRB_ARGS_REQ(1)). + * @sideeffect + * 1. Ensures that the singleton class for object `o` exists, creating it if necessary. + * This might involve memory allocation. + * 2. Defines the method specified by `name`, `func`, and `aspec` into this + * singleton class. + * 3. The method name `name` is interned into a symbol. + * 4. The method cache for the newly defined method is cleared. + * @raise TypeError if `o` is an object that cannot have a singleton class (e.g., immediate values). + */ MRB_API void mrb_define_singleton_method(mrb_state *mrb, struct RObject *o, const char *name, mrb_func_t func, mrb_aspec aspec) { @@ -1697,6 +2626,24 @@ mrb_define_singleton_method(mrb_state *mrb, struct RObject *o, const char *name, mrb_define_method_id(mrb, o->c, mrb_intern_cstr(mrb, name), func, aspec); } +/* + * Defines a singleton method for a specific object `o` using a symbol for the method name. + * A singleton method is a method that belongs only to a single object, + * not to all instances of its class. It's defined in the object's singleton class. + * + * @param mrb The mruby state. + * @param o A pointer to the RObject for which the singleton method is being defined. + * @param name The symbol ID (`mrb_sym`) of the method name. + * @param func The C function (mrb_func_t) that implements the method. + * @param aspec The argument specification for the method (e.g., MRB_ARGS_REQ(1)). + * @sideeffect + * 1. Ensures that the singleton class for object `o` exists, creating it if necessary. + * This might involve memory allocation. + * 2. Defines the method specified by `name`, `func`, and `aspec` into this + * singleton class. + * 3. The method cache for the method `name` is cleared. + * @raise TypeError if `o` is an object that cannot have a singleton class (e.g., immediate values). + */ MRB_API void mrb_define_singleton_method_id(mrb_state *mrb, struct RObject *o, mrb_sym name, mrb_func_t func, mrb_aspec aspec) { @@ -1704,25 +2651,95 @@ mrb_define_singleton_method_id(mrb_state *mrb, struct RObject *o, mrb_sym name, mrb_define_method_id(mrb, o->c, name, func, aspec); } +/* + * Defines a class method for a class/module `c`. + * Class methods are effectively singleton methods defined on the class object itself. + * + * @param mrb The mruby state. + * @param c The class/module (`RClass*`) for which to define the class method. + * @param name The C string name of the class method. + * @param func The C function (mrb_func_t) that implements the method. + * @param aspec The argument specification for the method. + * @sideeffect This function internally calls `mrb_define_singleton_method` on the + * class object `c`. This involves: + * 1. Ensuring `c`'s singleton class exists (creating it if needed). + * 2. Defining the method in `c`'s singleton class. + * 3. Interning the `name` string. + * 4. Clearing the relevant method cache. + * @raise TypeError if `c` itself is an object that cannot have a singleton class (highly unlikely for RClass). + */ MRB_API void mrb_define_class_method(mrb_state *mrb, struct RClass *c, const char *name, mrb_func_t func, mrb_aspec aspec) { mrb_define_singleton_method(mrb, (struct RObject*)c, name, func, aspec); } +/* + * Defines a class method for a class/module `c` using a symbol for the method name. + * Class methods are effectively singleton methods defined on the class object itself. + * + * @param mrb The mruby state. + * @param c The class/module (`RClass*`) for which to define the class method. + * @param name The symbol ID (`mrb_sym`) of the class method name. + * @param func The C function (mrb_func_t) that implements the method. + * @param aspec The argument specification for the method. + * @sideeffect This function internally calls `mrb_define_singleton_method_id` on the + * class object `c`. This involves: + * 1. Ensuring `c`'s singleton class exists (creating it if needed). + * 2. Defining the method in `c`'s singleton class. + * 3. Clearing the relevant method cache. + * @raise TypeError if `c` itself is an object that cannot have a singleton class (highly unlikely for RClass). + */ MRB_API void mrb_define_class_method_id(mrb_state *mrb, struct RClass *c, mrb_sym name, mrb_func_t func, mrb_aspec aspec) { mrb_define_singleton_method_id(mrb, (struct RObject*)c, name, func, aspec); } +/* + * Defines a module function for a module `c` using a symbol for the name. + * A module function is a shorthand for defining a method that is both a + * public class method (callable on the module itself) and a private + * instance method (callable within the context of classes that include the module). + * + * @param mrb The mruby state. + * @param c The module (`RClass*` where `c->tt` should be `MRB_TT_MODULE`) + * for which to define the module function. + * @param name The symbol ID (`mrb_sym`) of the module function name. + * @param func The C function (mrb_func_t) that implements the function. + * @param aspec The argument specification for the function. + * @sideeffect + * 1. Defines a public class method on `c` with the given `name`, `func`, and `aspec`. + * This involves creating/accessing `c`'s singleton class. + * 2. Defines a private instance method on `c` with the same `name`, `func`, and `aspec`. + * 3. Clears the method cache for `name` in both contexts. + */ MRB_API void mrb_define_module_function_id(mrb_state *mrb, struct RClass *c, mrb_sym name, mrb_func_t func, mrb_aspec aspec) { mrb_define_class_method_id(mrb, c, name, func, aspec); - mrb_define_method_id(mrb, c, name, func, aspec); + mrb_define_private_method_id(mrb, c, name, func, aspec); } +/* + * Defines a module function for a module `c` using a C string for the name. + * A module function is a shorthand for defining a method that is both a + * public class method (callable on the module itself) and a private + * instance method (callable within the context of classes that include the module). + * + * @param mrb The mruby state. + * @param c The module (`RClass*` where `c->tt` should be `MRB_TT_MODULE`) + * for which to define the module function. + * @param name The C string name of the module function. This name will be interned. + * @param func The C function (mrb_func_t) that implements the function. + * @param aspec The argument specification for the function. + * @sideeffect + * 1. Interns the `name` string to a symbol. + * 2. Calls `mrb_define_module_function_id` with the interned symbol, which in turn: + * a. Defines a public class method on `c`. + * b. Defines a private instance method on `c`. + * 3. Clears the method cache for the method name in both contexts. + */ MRB_API void mrb_define_module_function(mrb_state *mrb, struct RClass *c, const char *name, mrb_func_t func, mrb_aspec aspec) { @@ -1731,8 +2748,8 @@ mrb_define_module_function(mrb_state *mrb, struct RClass *c, const char *name, m #ifndef MRB_NO_METHOD_CACHE /* clear whole method cache table */ -static void -mc_clear(mrb_state *mrb) +MRB_API void +mrb_method_cache_clear(mrb_state *mrb) { static const struct mrb_cache_entry ce_zero ={0}; @@ -1748,7 +2765,7 @@ mrb_mc_clear_by_class(mrb_state *mrb, struct RClass *c) struct mrb_cache_entry *mc = mrb->cache; for (int i=0; ic == c || mc->c0 == c) mc->c = 0; + if (mc->c == c || mc->c0 == c) mc->c = NULL; } } @@ -1758,21 +2775,27 @@ mc_clear_by_id(mrb_state *mrb, mrb_sym id) struct mrb_cache_entry *mc = mrb->cache; for (int i=0; imid == id) mc->c = 0; + if (mc->mid == id) mc->c = NULL; } } -#endif +#endif // MRB_NO_METHOD_CACHE -MRB_API mrb_method_t -mrb_method_search_vm(mrb_state *mrb, struct RClass **cp, mrb_sym mid) +mrb_method_t +mrb_vm_find_method(mrb_state *mrb, struct RClass *c, struct RClass **cp, mrb_sym mid) { mrb_method_t m; - struct RClass *c = *cp; #ifndef MRB_NO_METHOD_CACHE struct RClass *oc = c; - int h = kh_int_hash_func(mrb, ((intptr_t)oc) ^ mid) & (MRB_METHOD_CACHE_SIZE-1); - struct mrb_cache_entry *mc = &mrb->cache[h]; + int h = mrb_int_hash_func(mrb, ((intptr_t)oc >> 4) ^ mid) & (MRB_METHOD_CACHE_SIZE/2-1); + struct mrb_cache_entry *mc = &mrb->cache[h * 2]; + /* check way 0 */ + if (mc->c == c && mc->mid == mid) { + *cp = mc->c0; + return mc->m; + } + /* check way 1 */ + mc++; if (mc->c == c && mc->mid == mid) { *cp = mc->c0; return mc->m; @@ -1780,24 +2803,19 @@ mrb_method_search_vm(mrb_state *mrb, struct RClass **cp, mrb_sym mid) #endif while (c) { - mt_tbl *h = c->mt; + mrb_mt_tbl *h = c->mt; if (h) { - union mt_ptr ptr; - mrb_sym ret = mt_get(mrb, h, mid, &ptr); - if (ret) { + union mrb_mt_ptr ptr; + uint32_t flags; + if (mt_get(mrb, h, mid, &ptr, &flags)) { if (ptr.proc == 0) break; *cp = c; - if (ret & MT_FUNC_P) { - MRB_METHOD_FROM_FUNC(m, ptr.func); - } - else { - MRB_METHOD_FROM_PROC(m, ptr.proc); - } - if (ret & MT_NOARG_P) { - MRB_METHOD_NOARG_SET(m); - } + m = create_method_value(mrb, flags, ptr); #ifndef MRB_NO_METHOD_CACHE + mc--; /* back to way 0 */ + if (mc->c != NULL && mc[1].c == NULL) + mc++; /* way 1 is empty, use it */ mc->c = oc; mc->c0 = c; mc->mid = mid; @@ -1812,8 +2830,45 @@ mrb_method_search_vm(mrb_state *mrb, struct RClass **cp, mrb_sym mid) return m; /* no method */ } +/* + * Searches for a method in the method table of a class and its ancestors + * within the context of the current virtual machine. + * This function is primarily used internally by the VM and dispatch mechanism. + * + * @param mrb The mruby state. + * @param cp A pointer to a pointer to the class (`RClass*`) from which to start + * the method search. On successful find, the `RClass*` pointed to by `cp` + * is updated to the class where the method was actually found. + * @param mid The symbol ID of the method name to search for. + * @return An `mrb_method_t` structure representing the found method. + * If the method is not found or is undefined, the returned `mrb_method_t` + * will have its `proc` field set to NULL (use `MRB_METHOD_UNDEF_P` to check). + * @sideeffect If the method is found and method caching is enabled (i.e., + * `MRB_NO_METHOD_CACHE` is not defined), this function will update + * the VM's method cache with the found method for the original class + * in `*cp` and the method ID `mid`. + */ +MRB_API mrb_method_t +mrb_method_search_vm(mrb_state *mrb, struct RClass **cp, mrb_sym mid) +{ + return mrb_vm_find_method(mrb, *cp, cp, mid); +} + +/* + * Searches for a method in a class `c` and its ancestors. + * This is a higher-level wrapper around `mrb_method_search_vm`. + * + * @param mrb The mruby state. + * @param c The class (`RClass*`) in which to start the search. + * @param mid The symbol ID (`mrb_sym`) of the method name. + * @return An `mrb_method_t` structure for the found method. + * @raise NameError if the method specified by `mid` is not found or is undefined + * in class `c` or its ancestors. + * @sideeffect May update the method cache if the method is found (via the + * internal call to `mrb_method_search_vm`). + */ MRB_API mrb_method_t -mrb_method_search(mrb_state *mrb, struct RClass* c, mrb_sym mid) +mrb_method_search(mrb_state *mrb, struct RClass *c, mrb_sym mid) { mrb_method_t m; @@ -1835,7 +2890,7 @@ prepare_name_common(mrb_state *mrb, mrb_sym sym, const char *prefix, const char size_t prefix_len = prefix ? strlen(prefix) : 0; size_t suffix_len = suffix ? strlen(suffix) : 0; size_t name_len = sym_len + prefix_len + suffix_len; - char *buf = name_len > sizeof(onstack) ? (char *)mrb_alloca(mrb, name_len) : onstack; + char *buf = name_len > sizeof(onstack) ? (char*)mrb_alloca(mrb, name_len) : onstack; char *p = buf; if (prefix_len > 0) { @@ -1848,7 +2903,6 @@ prepare_name_common(mrb_state *mrb, mrb_sym sym, const char *prefix, const char if (suffix_len > 0) { memcpy(p, suffix, suffix_len); - p += suffix_len; } return mrb_intern(mrb, buf, name_len); @@ -1869,28 +2923,25 @@ prepare_writer_name(mrb_state *mrb, mrb_sym sym) } static mrb_value -mod_attr_define(mrb_state *mrb, mrb_value mod, mrb_value (*accessor)(mrb_state *, mrb_value), mrb_sym (*access_name)(mrb_state *, mrb_sym)) +mod_attr_define(mrb_state *mrb, mrb_value mod, mrb_int aargc, mrb_value (*accessor)(mrb_state*, mrb_value), mrb_sym (*access_name)(mrb_state*, mrb_sym)) { struct RClass *c = mrb_class_ptr(mod); const mrb_value *argv; - mrb_int argc, i; - int ai; + mrb_int argc; mrb_get_args(mrb, "*", &argv, &argc); - ai = mrb_gc_arena_save(mrb); - for (i=0; iflags |= aargc == 0 ? MRB_PROC_NOARG : 0; + mrb_method_t m; MRB_METHOD_FROM_PROC(m, p); mrb_define_method_raw(mrb, c, method, m); mrb_gc_arena_restore(mrb, ai); @@ -1908,7 +2959,7 @@ attr_reader(mrb_state *mrb, mrb_value obj) static mrb_value mrb_mod_attr_reader(mrb_state *mrb, mrb_value mod) { - return mod_attr_define(mrb, mod, attr_reader, NULL); + return mod_attr_define(mrb, mod, 0, attr_reader, NULL); } static mrb_value @@ -1924,24 +2975,39 @@ attr_writer(mrb_state *mrb, mrb_value obj) static mrb_value mrb_mod_attr_writer(mrb_state *mrb, mrb_value mod) { - return mod_attr_define(mrb, mod, attr_writer, prepare_writer_name); + return mod_attr_define(mrb, mod, 1, attr_writer, prepare_writer_name); +} + +static mrb_value +mrb_mod_attr_accessor(mrb_state *mrb, mrb_value mod) +{ + mrb_mod_attr_reader(mrb, mod); + return mrb_mod_attr_writer(mrb, mod); } static mrb_value mrb_instance_alloc(mrb_state *mrb, mrb_value cv) { struct RClass *c = mrb_class_ptr(cv); - struct RObject *o; enum mrb_vtype ttype = MRB_INSTANCE_TT(c); if (c->tt == MRB_TT_SCLASS) mrb_raise(mrb, E_TYPE_ERROR, "can't create instance of singleton class"); - if (ttype == 0) ttype = MRB_TT_OBJECT; + if (c == mrb->nil_class || c == mrb->false_class) { + mrb_assert(ttype == 0); + } + else if (ttype == 0) { + ttype = MRB_TT_OBJECT; + } + if (MRB_UNDEF_ALLOCATOR_P(c)) { + mrb_raisef(mrb, E_TYPE_ERROR, "allocator undefined for %v", cv); + } if (ttype <= MRB_TT_CPTR) { mrb_raisef(mrb, E_TYPE_ERROR, "can't create instance of %v", cv); } - o = (struct RObject*)mrb_obj_alloc(mrb, ttype, c); + + struct RObject *o = (struct RObject*)mrb_obj_alloc(mrb, ttype, c); return mrb_obj_value(o); } @@ -1949,9 +3015,9 @@ mrb_instance_alloc(mrb_state *mrb, mrb_value cv) * call-seq: * class.new(args, ...) -> obj * - * Creates a new object of class's class, then - * invokes that object's initialize method, - * passing it args. This is the method that ends + * Creates a new object of *class*'s class, then + * invokes that object's `initialize` method, + * passing it *args*. This is the method that ends * up getting called whenever an object is constructed using * `.new`. * @@ -1960,28 +3026,46 @@ mrb_instance_alloc(mrb_state *mrb, mrb_value cv) mrb_value mrb_instance_new(mrb_state *mrb, mrb_value cv) { - mrb_value obj, blk; const mrb_value *argv; mrb_int argc; - mrb_sym init; + mrb_value blk; mrb_get_args(mrb, "*!&", &argv, &argc, &blk); - obj = mrb_instance_alloc(mrb, cv); - init = MRB_SYM(initialize); + mrb_value obj = mrb_instance_alloc(mrb, cv); + mrb_sym init = MRB_SYM(initialize); if (!mrb_func_basic_p(mrb, obj, init, mrb_do_nothing)) { mrb_funcall_with_block(mrb, obj, init, argc, argv, blk); } return obj; } +/* + * Creates a new instance of class `c` and initializes it by calling its `initialize` method. + * + * This function first allocates a new object of the given class `c`. + * Then, it calls the `initialize` method on this new object, passing + * `argc` and `argv` as arguments. If the `initialize` method is not + * explicitly defined or is the default (empty) one, it is not called. + * + * @param mrb The mruby state. + * @param c A pointer to the `RClass` structure of the class to instantiate. + * @param argc The number of arguments to pass to the `initialize` method. + * @param argv A pointer to an array of `mrb_value` arguments for `initialize`. + * @return The newly created and initialized `mrb_value` object. + * @raise TypeError if `c` is a singleton class, or if its allocator is undefined, + * or if it's a built-in type that cannot be instantiated this way + * (e.g., `MRB_TT_CPTR`). This check occurs in `mrb_instance_alloc`. + * @sideeffect + * 1. Allocates a new object on the mruby heap. + * 2. Calls the `initialize` method of the new object if it's a user-defined one. + * This `initialize` call can have arbitrary side effects. + */ MRB_API mrb_value mrb_obj_new(mrb_state *mrb, struct RClass *c, mrb_int argc, const mrb_value *argv) { - mrb_value obj; - mrb_sym mid; + mrb_value obj = mrb_instance_alloc(mrb, mrb_obj_value(c)); + mrb_sym mid = MRB_SYM(initialize); - obj = mrb_instance_alloc(mrb, mrb_obj_value(c)); - mid = MRB_SYM(initialize); if (!mrb_func_basic_p(mrb, obj, mid, mrb_do_nothing)) { mrb_funcall_argv(mrb, obj, mid, argc, argv); } @@ -1989,47 +3073,44 @@ mrb_obj_new(mrb_state *mrb, struct RClass *c, mrb_int argc, const mrb_value *arg } static mrb_value -mrb_class_initialize(mrb_state *mrb, mrb_value c) +mrb_class_initialize(mrb_state *mrb, mrb_value obj) { - mrb_value a, b; + struct RClass *c = mrb_class_ptr(obj); + mrb_value a, b; mrb_get_args(mrb, "|C&", &a, &b); if (!mrb_nil_p(b)) { - mrb_yield_with_class(mrb, b, 1, &c, c, mrb_class_ptr(c)); + mrb_yield_with_class(mrb, b, 1, &obj, obj, c); } - return c; + return obj; } static mrb_value mrb_class_new_class(mrb_state *mrb, mrb_value cv) { - mrb_int n; mrb_value super, blk; - mrb_value new_class; - mrb_sym mid; + mrb_int n = mrb_get_args(mrb, "|C&", &super, &blk); - n = mrb_get_args(mrb, "|C&", &super, &blk); if (n == 0) { super = mrb_obj_value(mrb->object_class); } - new_class = mrb_obj_value(mrb_class_new(mrb, mrb_class_ptr(super))); - mid = MRB_SYM(initialize); + mrb_value new_class = mrb_obj_value(mrb_class_new(mrb, mrb_class_ptr(super))); + mrb_class_inherited(mrb, mrb_class_ptr(super), mrb_class_ptr(new_class)); + mrb_sym mid = MRB_SYM(initialize); if (mrb_func_basic_p(mrb, new_class, mid, mrb_class_initialize)) { mrb_class_initialize(mrb, new_class); } else { mrb_funcall_with_block(mrb, new_class, mid, n, &super, blk); } - mrb_class_inherited(mrb, mrb_class_ptr(super), mrb_class_ptr(new_class)); return new_class; } static mrb_value mrb_class_superclass(mrb_state *mrb, mrb_value klass) { - struct RClass *c; + struct RClass *c = mrb_class_ptr(klass); - c = mrb_class_ptr(klass); c = find_origin(c)->super; while (c && c->tt == MRB_TT_ICLASS) { c = find_origin(c)->super; @@ -2059,24 +3140,24 @@ mrb_bob_not(mrb_state *mrb, mrb_value cv) * obj.equal?(other) -> true or false * obj.eql?(other) -> true or false * - * Equality---At the Object level, == returns - * true only if obj and other are the + * Equality---At the `Object` level, `==` returns + * `true` only if *obj* and *other* are the * same object. Typically, this method is overridden in descendant * classes to provide class-specific meaning. * - * Unlike ==, the equal? method should never be + * Unlike `==`, the `equal?` method should never be * overridden by subclasses: it is used to determine object identity - * (that is, a.equal?(b) iff a is the same - * object as b). - * - * The eql? method returns true if - * obj and anObject have the same value. Used by - * Hash to test members for equality. For objects of - * class Object, eql? is synonymous with - * ==. Subclasses normally continue this tradition, but - * there are exceptions. Numeric types, for example, - * perform type conversion across ==, but not across - * eql?, so: + * (that is, `a.equal?(b)` iff `a` is the same + * object as `b`). + * + * The `eql?` method returns `true` if + * *obj* and *anObject* have the same value. Used by + * `Hash` to test members for equality. For objects of + * class `Object`, `eql?` is synonymous with + * `==`. Subclasses normally continue this tradition, but + * there are exceptions. `Numeric` types, for example, + * perform type conversion across `==`, but not across + * `eql?`, so: * * 1 == 1.0 #=> true * 1.eql? 1.0 #=> false @@ -2089,31 +3170,80 @@ mrb_obj_equal_m(mrb_state *mrb, mrb_value self) return mrb_bool_value(mrb_obj_equal(mrb, self, arg)); } +/* + * Checks if instances of a class `c` (or its ancestors) respond to a given method. + * + * This function searches for the method `mid` in the method table of class `c` + * and its ancestor classes and included modules. + * + * @param mrb The mruby state. + * @param c The `RClass*` representing the class of the object. + * @param mid The symbol ID (`mrb_sym`) of the method name. + * @return `TRUE` if an object of class `c` would respond to the method `mid` + * (i.e., the method is found and not undefined). + * `FALSE` otherwise. + * @sideeffect May update the method cache if the method is found (due to the + * internal call to `mrb_method_search_vm`). + */ MRB_API mrb_bool mrb_obj_respond_to(mrb_state *mrb, struct RClass* c, mrb_sym mid) { - mrb_method_t m; + mrb_method_t m = mrb_method_search_vm(mrb, &c, mid); - m = mrb_method_search_vm(mrb, &c, mid); if (MRB_METHOD_UNDEF_P(m)) { return FALSE; } return TRUE; } +/* + * Checks if a given mruby object `obj` responds to a method specified by `mid`. + * + * This function first determines the class of `obj` and then calls + * `mrb_obj_respond_to` to perform the method lookup. + * + * @param mrb The mruby state. + * @param obj The `mrb_value` object to check. + * @param mid The symbol ID (`mrb_sym`) of the method name. + * @return `TRUE` if the object `obj` responds to the method `mid`, `FALSE` otherwise. + * @sideeffect This function calls `mrb_class(mrb, obj)` which might have side effects + * if `obj` is a proxy object or has unusual class resolution. + * It also has the side effects of `mrb_obj_respond_to` (e.g., method + * cache updates). + */ MRB_API mrb_bool mrb_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym mid) { return mrb_obj_respond_to(mrb, mrb_class(mrb, obj), mid); } +/* + * Returns the name (path) of a class or module `c`. + * + * If the class/module has a cached name (typically set when it's assigned to a + * constant), that name is returned. + * For top-level classes/modules, this is their direct name. + * For nested classes/modules, it's the fully qualified name (e.g., `Outer::Inner`). + * If no name is cached (e.g., for anonymous classes/modules), this function + * attempts to find or construct a path representation (e.g., `#`). + * + * @param mrb The mruby state. + * @param c The `RClass*` structure of the class or module. + * @return An `mrb_value` (String) representing the path of the class/module. + * - If a name is cached as a symbol (toplevel), it returns the symbol's string representation. + * - If a name is cached as a string (nested), it returns a duplicate of that string. + * - If no name is cached, it calls `mrb_class_find_path` to get or construct one. + * The returned string is suitable for modification by the caller as it's either + * a new string or a duplicate of an internal one. + * @sideeffect May allocate a new string on the mruby heap if duplication or construction + * of the path string is necessary. + */ MRB_API mrb_value mrb_class_path(mrb_state *mrb, struct RClass *c) { - mrb_value path; mrb_sym nsym = MRB_SYM(__classname__); + mrb_value path = mrb_obj_iv_get(mrb, (struct RObject*)c, nsym); - path = mrb_obj_iv_get(mrb, (struct RObject*)c, nsym); if (mrb_nil_p(path)) { /* no name (yet) */ return mrb_class_find_path(mrb, c); @@ -2125,6 +3255,21 @@ mrb_class_path(mrb_state *mrb, struct RClass *c) return mrb_str_dup(mrb, path); } +/* + * Returns the "real" class of a given class pointer `cl`. + * + * The "real" class is the underlying, non-singleton, non-iclass `RClass`. + * This function traverses up the superclass chain, skipping any `MRB_TT_SCLASS` + * (singleton class) or `MRB_TT_ICLASS` (module inclusion class / i-class) + * encountered, until it finds an `RClass` that is a `MRB_TT_CLASS` or + * `MRB_TT_MODULE`. + * + * @param cl A pointer to an `RClass` structure. + * @return A pointer to the "real" `RClass` structure. + * Returns `NULL` if the input `cl` is `NULL` or if the superclass + * chain leads to `NULL` before a real class is found (which typically + * should not happen for valid class structures). + */ MRB_API struct RClass* mrb_class_real(struct RClass* cl) { @@ -2136,23 +3281,60 @@ mrb_class_real(struct RClass* cl) return cl; } +/* + * Returns the name of a class/module `c` as a C string. + * + * This function provides a C string representation of the class/module name. + * It typically calls `mrb_class_path` internally and then returns a pointer + * to the string data of the resulting `mrb_value`. + * + * @param mrb The mruby state. + * @param c The `RClass*` structure of the class or module. + * @return A `const char*` pointing to the name of the class/module. + * This could be the class name, a fully qualified name for nested + * modules/classes, or a representation like "#" for + * anonymous ones. Returns `NULL` if `c` is `NULL`. + * @sideeffect This function may allocate memory on the mruby heap if `mrb_class_path` + * needs to construct the name string (e.g., for anonymous classes or + * if the name is not cached). The returned pointer is to the internal + * buffer of an `mrb_value` string; its validity is tied to the lifetime + * of that string value, which is subject to garbage collection unless + * explicitly protected. + */ MRB_API const char* mrb_class_name(mrb_state *mrb, struct RClass* c) { - mrb_value name; - if (c == NULL) return NULL; - name = class_name_str(mrb, c); + + mrb_value name = class_name_str(mrb, c); return RSTRING_PTR(name); } +/* + * Returns the class name of a given mruby object `obj` as a C string. + * + * This function first retrieves the class of the object using `mrb_obj_class` + * (which gets the "real" class, traversing SCLASS/ICLASS), and then + * gets the name of that class using `mrb_class_name`. + * + * @param mrb The mruby state. + * @param obj The `mrb_value` object whose class name is to be retrieved. + * @return A `const char*` pointing to the name of the object's class. + * See `mrb_class_name` for details on the format of the name. + * @sideeffect This function has the combined side effects of `mrb_obj_class` + * and `mrb_class_name`. This may include memory allocation on the + * mruby heap for constructing the class name string or for class + * structure creation if the object's class or metaclass components + * are not yet fully initialized. The lifetime of the returned pointer + * is tied to the underlying string `mrb_value`. + */ MRB_API const char* mrb_obj_classname(mrb_state *mrb, mrb_value obj) { return mrb_class_name(mrb, mrb_obj_class(mrb, obj)); } -/*! +/* * Ensures a class can be derived from super. * * \param super a reference to an object. @@ -2172,31 +3354,60 @@ mrb_check_inheritable(mrb_state *mrb, struct RClass *super) } } -/*! - * Creates a new class. - * \param super a class from which the new class derives. - * \exception TypeError \a super is not inheritable. - * \exception TypeError \a super is the Class class. +/* + * Creates a new, unnamed class. + * + * This function is the core mechanism for creating new classes in mruby. + * The created class will not have a name (i.e., it's anonymous) until it is + * assigned to a constant. + * + * @param mrb The mruby state. + * @param super A pointer to the `RClass` structure of the superclass. + * If `super` is `NULL`, `Object` (mrb->object_class) will be used + * as the superclass by default (though `boot_defclass` handles this). + * @return A pointer to the `RClass` structure of the newly created class. + * @raise TypeError if `super` is not a valid class to inherit from (e.g., it's a + * singleton class, or it's the `Class` class itself). + * This check is performed by `mrb_check_inheritable`. + * @sideeffect + * 1. Allocates a new `RClass` object on the mruby heap. + * 2. Initializes its method table (`mt`). + * 3. Sets its superclass to the provided `super` (or `Object` if `super` is `NULL`). + * 4. Copies instance type information (`MRB_INSTANCE_TT`) and the + * `MRB_FL_UNDEF_ALLOCATE` flag from the superclass if `super` is provided. + * 5. Creates and attaches a metaclass (singleton class) to the new class. */ MRB_API struct RClass* mrb_class_new(mrb_state *mrb, struct RClass *super) { - struct RClass *c; - if (super) { mrb_check_inheritable(mrb, super); } - c = boot_defclass(mrb, super); - if (super) { - MRB_SET_INSTANCE_TT(c, MRB_INSTANCE_TT(super)); + else { + super = mrb->object_class; } + + struct RClass *c = boot_defclass(mrb, super, MRB_INSTANCE_TT(super)); + c->flags |= super->flags & MRB_FL_UNDEF_ALLOCATE; make_metaclass(mrb, c); return c; } -/*! - * Creates a new module. +/* + * Creates a new, unnamed module. + * + * This function is the core mechanism for creating new modules in mruby. + * The created module will not have a name (i.e., it's anonymous) until it is + * assigned to a constant. + * + * @param mrb The mruby state. + * @return A pointer to the `RClass` structure of the newly created module. + * The `tt` field of this `RClass` will be `MRB_TT_MODULE`. + * @sideeffect + * 1. Allocates a new `RClass` object on the mruby heap. + * 2. Sets its class to `mrb->module_class`. + * 3. Initializes its method table (`mt`). */ MRB_API struct RClass* mrb_module_new(mrb_state *mrb) @@ -2210,22 +3421,65 @@ mrb_module_new(mrb_state *mrb) * call-seq: * obj.class => class * - * Returns the class of obj, now preferred over - * Object#type, as an object's type in Ruby is only + * Returns the class of *obj*, now preferred over + * `Object#type`, as an object's type in Ruby is only * loosely tied to that object's class. This method must always be - * called with an explicit receiver, as class is also a + * called with an explicit receiver, as `class` is also a * reserved word in Ruby. * * 1.class #=> Integer * self.class #=> Object */ +/* + * Returns the "real" class of an object. + * This is the preferred way to get the class of an object in C extension code. + * It correctly handles various mruby internal object structures by first calling + * `mrb_class` (which gets the direct class, potentially a singleton or i-class) + * and then `mrb_class_real` to resolve it to the actual user-facing class. + * + * @param mrb The mruby state. + * @param obj The `mrb_value` object whose class is to be retrieved. + * @return A pointer to the `RClass` structure of the object's "real" class. + * For example, for an instance of a regular class, it returns the class itself. + * For an instance of a class that includes modules, it still returns the class itself, + * not the i-classes. For a class object, it returns `Class`. + * @sideeffect This function itself has minimal side effects, but the underlying + * `mrb_class` and `mrb_class_real` might perform lookups or traversals. + */ MRB_API struct RClass* mrb_obj_class(mrb_state *mrb, mrb_value obj) { return mrb_class_real(mrb_class(mrb, obj)); } +/* + * Defines an alias for an existing method within a class or module `c`. + * The new method `a` will be an alias of the old method `b`. + * + * @param mrb The mruby state. + * @param c The class or module (`RClass*`) in which to define the alias. + * @param a The symbol ID (`mrb_sym`) for the new method name (the alias). + * @param b The symbol ID (`mrb_sym`) for the original method name to be aliased. + * @return This function does not return a value. + * @raise NameError if the original method `b` is not found in class `c` or its ancestors. + * @sideeffect + * 1. Searches for the original method `b` in class `c` and its ancestors. + * 2. If `b` is found: + * a. If `b` is a C function (`MRB_METHOD_CFUNC_P` is true), or if `b` is already + * an alias proc (`MRB_PROC_ALIAS_P` is true for the proc body), the method `m` + * (representing `b`) is directly used for the new alias `a`. + * b. If `b` is a Ruby-defined method (a non-CFUNC, non-alias `RProc`), a new `RProc` + * of type `MRB_PROC_ALIAS` is created. This new proc stores the original + * method's symbol `b` and its `RProc` as its `upper`. This ensures that + * the alias `a` continues to point to the definition of `b` at the time of + * aliasing, even if `b` is later redefined. The visibility of `b` is copied + * to this new alias proc. + * c. The method (either the original `m` or the new alias proc) is then defined + * in class `c` under the new name `a` using `mrb_define_method_raw`. + * 3. The method cache for the new alias name `a` is cleared. + * 4. If `a` and `b` are the same, the function does nothing and returns early. + */ MRB_API void mrb_alias_method(mrb_state *mrb, struct RClass *c, mrb_sym a, mrb_sym b) { @@ -2233,34 +3487,37 @@ mrb_alias_method(mrb_state *mrb, struct RClass *c, mrb_sym a, mrb_sym b) mrb_method_t m = mrb_method_search(mrb, c, b); if (!MRB_METHOD_CFUNC_P(m)) { - struct RProc *p = MRB_METHOD_PROC(m); - - if (MRB_PROC_ENV_P(p)) { - MRB_PROC_ENV(p)->mid = b; - } - else if (p->color != MRB_GC_RED) { - struct RClass *tc = MRB_PROC_TARGET_CLASS(p); - struct REnv *e = MRB_OBJ_ALLOC(mrb, MRB_TT_ENV, NULL); - - e->mid = b; - if (tc) { - e->c = tc; - mrb_field_write_barrier(mrb, (struct RBasic*)e, (struct RBasic*)tc); - } - p->e.env = e; - p->flags |= MRB_PROC_ENVSET; - mrb_field_write_barrier(mrb, (struct RBasic*)p, (struct RBasic*)e); + const struct RProc *p = MRB_METHOD_PROC(m); + if (!MRB_PROC_CFUNC_P(p) && !MRB_PROC_ALIAS_P(p)) { + struct RProc *pnew = MRB_OBJ_ALLOC(mrb, MRB_TT_PROC, mrb->proc_class); + int vis = MRB_METHOD_VISIBILITY(m); + + pnew->body.mid = b; + pnew->upper = p; + pnew->e.env = NULL; + pnew->flags |= MRB_PROC_ALIAS; + MRB_METHOD_FROM_PROC(m, pnew); + MRB_METHOD_SET_VISIBILITY(m, vis); } } mrb_define_method_raw(mrb, c, a, m); } -/*! - * Defines an alias of a method. - * \param mrb the mruby state - * \param klass the class which the original method belongs to - * \param name1 a new name for the method - * \param name2 the original name of the method +/* + * Defines an alias for an existing method within a class or module `klass`. + * This version takes C string names for both the new alias and the original method. + * + * @param mrb The mruby state. + * @param klass The class or module (`RClass*`) in which to define the alias. + * @param name1 The C string for the new method name (the alias). + * @param name2 The C string for the original method name to be aliased. + * @return This function does not return a value. + * @raise NameError if the original method `name2` (after being interned) is not found. + * @sideeffect + * 1. Interns both `name1` and `name2` to get their `mrb_sym` IDs. + * 2. Calls `mrb_alias_method` with the class `klass` and the obtained symbols. + * (See `mrb_alias_method` for further side effects like method table modification + * and cache clearing). */ MRB_API void mrb_define_alias(mrb_state *mrb, struct RClass *klass, const char *name1, const char *name2) @@ -2268,6 +3525,22 @@ mrb_define_alias(mrb_state *mrb, struct RClass *klass, const char *name1, const mrb_alias_method(mrb, klass, mrb_intern_cstr(mrb, name1), mrb_intern_cstr(mrb, name2)); } +/* + * Defines an alias for an existing method within a class or module `klass`, + * using symbol IDs for both names. + * + * This function is a direct call to `mrb_alias_method`. + * + * @param mrb The mruby state. + * @param klass The class or module (`RClass*`) in which to define the alias. + * @param a The symbol ID (`mrb_sym`) for the new method name (the alias). + * @param b The symbol ID (`mrb_sym`) for the original method name to be aliased. + * @return This function does not return a value. + * @raise NameError if the original method `b` is not found in `klass` or its ancestors. + * @sideeffect Calls `mrb_alias_method`, which modifies the method table of `klass` + * and clears the method cache for the new alias `a`. + * (See `mrb_alias_method` for more detailed side effects). + */ MRB_API void mrb_define_alias_id(mrb_state *mrb, struct RClass *klass, mrb_sym a, mrb_sym b) { @@ -2318,12 +3591,47 @@ mrb_mod_alias(mrb_state *mrb, mrb_value mod) static void undef_method(mrb_state *mrb, struct RClass *c, mrb_sym a) { + mrb_sym undefined; + mrb_value recv; mrb_method_t m; MRB_METHOD_FROM_PROC(m, NULL); mrb_define_method_raw(mrb, c, a, m); + if (c->tt == MRB_TT_SCLASS) { + undefined = MRB_SYM(singleton_method_undefined); + recv = mrb_iv_get(mrb, mrb_obj_value(c), MRB_SYM(__attached__)); + } + else { + undefined = MRB_SYM(method_undefined); + recv = mrb_obj_value(c); + } + if (!mrb_func_basic_p(mrb, recv, undefined, mrb_do_nothing)) { + mrb_value sym = mrb_symbol_value(a); + mrb_funcall_argv(mrb, recv, undefined, 1, &sym); + } } +/* + * Undefines a method specified by symbol `a` in class/module `c`. + * + * This action prevents objects of class `c` (or classes including module `c`) + * from responding to the method `a`. If the method was inherited, the version + * in the superclass will no longer be accessible through `c`. + * A special "undefined" entry is added to `c`'s method table for `a`. + * + * @param mrb The mruby state. + * @param c The class or module (`RClass*`) in which to undefine the method. + * @param a The symbol ID (`mrb_sym`) of the method to undefine. + * @return This function does not return a value. + * @raise NameError if the method `a` is not defined in `c` or its ancestors + * (i.e., if `c` does not respond to `a` before undefinition). + * @sideeffect + * 1. Modifies the method table of `c` by adding an entry that marks `a` as undefined. + * 2. Triggers `method_undefined` (for regular classes/modules) or + * `singleton_method_undefined` (for singleton classes) callbacks on `c` + * or its attached object, if these hooks are defined. + * 3. Clears the method cache for the method symbol `a`. + */ MRB_API void mrb_undef_method_id(mrb_state *mrb, struct RClass *c, mrb_sym a) { @@ -2333,37 +3641,143 @@ mrb_undef_method_id(mrb_state *mrb, struct RClass *c, mrb_sym a) undef_method(mrb, c, a); } +/* + * Undefines a method specified by a C string `name` in class/module `c`. + * + * This function interns the C string `name` to a symbol and then calls + * `mrb_undef_method_id` to perform the undefinition. + * + * @param mrb The mruby state. + * @param c The class or module (`RClass*`) in which to undefine the method. + * @param name The C string name of the method to undefine. + * @return This function does not return a value. + * @raise NameError if the method `name` (after interned to a symbol) is not + * defined in `c` or its ancestors. + * @sideeffect + * 1. Interns the `name` string. + * 2. All side effects of `mrb_undef_method_id` apply (method table modification, + * callback triggering, cache clearing). + */ MRB_API void mrb_undef_method(mrb_state *mrb, struct RClass *c, const char *name) { undef_method(mrb, c, mrb_intern_cstr(mrb, name)); } +/* + * Undefines a class method specified by symbol `name` for class/module `c`. + * + * Class methods are singleton methods of the class object. This function + * retrieves the singleton class of `c` and then undefines the method there. + * + * @param mrb The mruby state. + * @param c The class or module (`RClass*`) whose class method is to be undefined. + * @param name The symbol ID (`mrb_sym`) of the class method to undefine. + * @return This function does not return a value. + * @raise TypeError if `c` cannot have a singleton class (e.g., if it's an + * immediate value, though highly unlikely for an `RClass*`). + * @raise NameError if the class method `name` is not defined on `c`. + * @sideeffect + * 1. Retrieves or creates the singleton class of `c`. + * 2. All side effects of `mrb_undef_method_id` apply to this singleton class + * (method table modification, callback triggering, cache clearing). + */ MRB_API void mrb_undef_class_method_id(mrb_state *mrb, struct RClass *c, mrb_sym name) { mrb_undef_method_id(mrb, mrb_class_ptr(mrb_singleton_class(mrb, mrb_obj_value(c))), name); } +/* + * Undefines a class method specified by a C string `name` for class/module `c`. + * + * This function interns the C string `name` to a symbol and then calls + * `mrb_undef_class_method_id` (which undefines the method on `c`'s singleton class). + * + * @param mrb The mruby state. + * @param c The class or module (`RClass*`) whose class method is to be undefined. + * @param name The C string name of the class method to undefine. + * @return This function does not return a value. + * @raise TypeError if `c` cannot have a singleton class. + * @raise NameError if the class method `name` (after interned) is not defined on `c`. + * @sideeffect + * 1. Interns the `name` string. + * 2. Retrieves or creates the singleton class of `c`. + * 3. All side effects of `mrb_undef_method_id` apply to this singleton class. + */ MRB_API void mrb_undef_class_method(mrb_state *mrb, struct RClass *c, const char *name) { mrb_undef_method(mrb, mrb_class_ptr(mrb_singleton_class(mrb, mrb_obj_value(c))), name); } +/* + * Removes a method specified by symbol `mid` directly from class/module `c0`. + * + * Unlike `mrb_undef_method_id`, this function only removes the method definition + * from the specified class/module `c0`. If the method is defined in an ancestor, + * that inherited method will become active after the removal from `c0`. + * + * @param mrb The mruby state. + * @param c0 The class or module (`RClass*`) from which to remove the method. + * The method is removed from the "origin" of this class if it's an ICLASS/SCLASS. + * @param mid The symbol ID (`mrb_sym`) of the method to remove. + * @return This function does not return a value. + * @raise NameError if the method `mid` is not defined directly in the method + * table of `c0` (or its origin). + * @sideeffect + * 1. Removes the method entry for `mid` from `c0`'s (or its origin's) method table. + * 2. Triggers `method_removed` (for regular classes/modules) or + * `singleton_method_removed` (for singleton classes) callbacks on `c0` + * or its attached object, if these hooks are defined. + * 3. Clears the method cache for the method symbol `mid`. + */ MRB_API void -mrb_remove_method(mrb_state *mrb, struct RClass *c, mrb_sym mid) +mrb_remove_method(mrb_state *mrb, struct RClass *c0, mrb_sym mid) { - mt_tbl *h; + struct RClass *c = c0; + mrb_bool found = FALSE; MRB_CLASS_ORIGIN(c); - h = c->mt; - - if (h && mt_del(mrb, h, mid)) { - mc_clear_by_id(mrb, mid); - return; + mrb_mt_tbl *h = c->mt; + if (h) { + found = mt_del(mrb, h, mid); + /* insert removed tombstone to block ROM chain lookup */ + if (h->next && mt_chain_has(h->next, mid)) { + union mrb_mt_ptr tombstone; + tombstone.func = NULL; + found = TRUE; + if (mt_frozen_p(h)) { + h->alloc &= ~MRB_MT_FROZEN_BIT; + } + else if (mt_readonly_p(h)) { + mrb_mt_tbl *top = mt_new(mrb); + top->next = h; + h = c->mt = top; + } + mt_put(mrb, h, mid, MRB_MT_FUNC, tombstone); + } + } + if (!found) { + mrb_name_error(mrb, mid, "method '%n' not defined in %C", mid, c); + } + mc_clear_by_id(mrb, mid); + if (c0->tt == MRB_TT_SCLASS) { + mrb_sym cb = MRB_SYM(singleton_method_removed); + mrb_value recv = mrb_iv_get(mrb, mrb_obj_value(c0), MRB_SYM(__attached__)); + if (!mrb_func_basic_p(mrb, recv, cb, mrb_do_nothing)) { + mrb_value sym = mrb_symbol_value(mid); + mrb_funcall_argv(mrb, recv, cb, 1, &sym); + } + } + else { + mrb_sym cb = MRB_SYM(method_removed); + mrb_value recv = mrb_obj_value(c0); + if (!mrb_func_basic_p(mrb, recv, cb, mrb_do_nothing)) { + mrb_value sym = mrb_symbol_value(mid); + mrb_funcall_argv(mrb, recv, cb, 1, &sym); + } } - mrb_name_error(mrb, mid, "method '%n' not defined in %C", mid, c); } static mrb_value @@ -2416,26 +3830,23 @@ static mrb_value mrb_mod_const_get(mrb_state *mrb, mrb_value mod) { mrb_value path = mrb_get_arg1(mrb); - mrb_sym id; - char *ptr; - mrb_int off, end, len; if (mrb_symbol_p(path)) { /* const get with symbol */ - id = mrb_symbol(path); - return mrb_const_get_sym(mrb, mod, id); + return mrb_const_get_sym(mrb, mod, mrb_symbol(path)); } /* const get with class path string */ mrb_ensure_string_type(mrb, path); - ptr = RSTRING_PTR(path); - len = RSTRING_LEN(path); - off = 0; + + char *ptr = RSTRING_PTR(path); + mrb_int len = RSTRING_LEN(path); + mrb_int off = 0; while (off < len) { - end = mrb_str_index_lit(mrb, path, "::", off); - end = (end == -1) ? len : end; - id = mrb_intern(mrb, ptr+off, end-off); + mrb_int end = mrb_str_index_lit(mrb, path, "::", off); + if (end == -1) end = len; + mrb_sym id = mrb_intern(mrb, ptr+off, end-off); mod = mrb_const_get_sym(mrb, mod, id); if (end == len) off = end; @@ -2466,25 +3877,20 @@ static mrb_value mrb_mod_remove_const(mrb_state *mrb, mrb_value mod) { mrb_sym id; - mrb_value val; mrb_get_args(mrb, "n", &id); check_const_name_sym(mrb, id); - val = mrb_iv_remove(mrb, mod, id); + + mrb_value val = mrb_iv_remove(mrb, mod, id); if (mrb_undef_p(val)) { mrb_name_error(mrb, id, "constant %n not defined", id); } return val; } -static mrb_value -mrb_mod_const_missing(mrb_state *mrb, mrb_value mod) +mrb_value +mrb_const_missing(mrb_state *mrb, mrb_value mod, mrb_sym sym) { - mrb_sym sym; - - mrb_get_args(mrb, "n", &sym); - mrb->c->ci->mid = 0; - if (mrb_class_real(mrb_class_ptr(mod)) != mrb->object_class) { mrb_name_error(mrb, sym, "uninitialized constant %v::%n", mod, sym); } @@ -2495,12 +3901,22 @@ mrb_mod_const_missing(mrb_state *mrb, mrb_value mod) return mrb_nil_value(); } +mrb_value +mrb_mod_const_missing(mrb_state *mrb, mrb_value mod) +{ + mrb_sym sym; + + mrb_get_args(mrb, "n", &sym); + mrb->c->ci->mid = 0; + return mrb_const_missing(mrb, mod, sym); +} + /* 15.2.2.4.34 */ /* * call-seq: * mod.method_defined?(symbol) -> true or false * - * Returns +true+ if the named method is defined by + * Returns `true` if the named method is defined by * _mod_ (or its included modules and, if _mod_ is a class, * its ancestors). Public and protected methods are matched. * @@ -2544,14 +3960,15 @@ mrb_method_added(mrb_state *mrb, struct RClass *c, mrb_sym mid) else { added = MRB_SYM(method_added); } - mrb_funcall_id(mrb, recv, added, 1, mrb_symbol_value(mid)); + if (!mrb_func_basic_p(mrb, recv, added, mrb_do_nothing)) { + mrb_value sym = mrb_symbol_value(mid); + mrb_funcall_argv(mrb, recv, added, 1, &sym); + } } -mrb_value -mrb_mod_define_method_m(mrb_state *mrb, struct RClass *c) +static mrb_value +define_method_m(mrb_state *mrb, struct RClass *c, int vis) { - struct RProc *p; - mrb_method_t m; mrb_sym mid; mrb_value proc = mrb_undef_value(); mrb_value blk; @@ -2571,15 +3988,24 @@ mrb_mod_define_method_m(mrb_state *mrb, struct RClass *c) if (mrb_nil_p(blk)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "no block given"); } - p = MRB_OBJ_ALLOC(mrb, MRB_TT_PROC, mrb->proc_class); + struct RProc *p = MRB_OBJ_ALLOC(mrb, MRB_TT_PROC, mrb->proc_class); mrb_proc_copy(mrb, p, mrb_proc_ptr(blk)); p->flags |= MRB_PROC_STRICT; + + mrb_method_t m; MRB_METHOD_FROM_PROC(m, p); + MRB_METHOD_SET_VISIBILITY(m, vis); mrb_define_method_raw(mrb, c, mid, m); mrb_method_added(mrb, c, mid); return mrb_symbol_value(mid); } +mrb_value +mrb_mod_define_method_m(mrb_state *mrb, struct RClass *c) +{ + return define_method_m(mrb, c, MRB_METHOD_PUBLIC_FL); +} + static mrb_value mod_define_method(mrb_state *mrb, mrb_value self) { @@ -2589,16 +4015,14 @@ mod_define_method(mrb_state *mrb, mrb_value self) static mrb_value top_define_method(mrb_state *mrb, mrb_value self) { - return mrb_mod_define_method_m(mrb, mrb->object_class); + return define_method_m(mrb, mrb->object_class, MRB_METHOD_PRIVATE_FL); } static mrb_value mrb_mod_eqq(mrb_state *mrb, mrb_value mod) { mrb_value obj = mrb_get_arg1(mrb); - mrb_bool eqq; - - eqq = mrb_obj_is_kind_of(mrb, obj, mrb_class_ptr(mod)); + mrb_bool eqq = mrb_obj_is_kind_of(mrb, obj, mrb_class_ptr(mod)); return mrb_bool_value(eqq); } @@ -2607,7 +4031,7 @@ static mrb_value mrb_mod_dup(mrb_state *mrb, mrb_value self) { mrb_value mod = mrb_obj_clone(mrb, self); - MRB_UNSET_FROZEN_FLAG(mrb_obj_ptr(mod)); + mrb_obj_ptr(mod)->frozen = 0; return mod; } @@ -2615,11 +4039,7 @@ static mrb_value mrb_mod_module_function(mrb_state *mrb, mrb_value mod) { const mrb_value *argv; - mrb_int argc, i; - mrb_sym mid; - mrb_method_t m; - struct RClass *rclass; - int ai; + mrb_int argc; mrb_check_type(mrb, mod, MRB_TT_MODULE); @@ -2632,15 +4052,16 @@ mrb_mod_module_function(mrb_state *mrb, mrb_value mod) /* set PRIVATE method visibility if implemented */ /* mrb_mod_dummy_visibility(mrb, mod); */ - for (i=0; ic, mid, m); mrb_gc_arena_restore(mrb, ai); } @@ -2704,32 +4125,26 @@ copy_class(mrb_state *mrb, mrb_value dst, mrb_value src) c1->super->flags |= MRB_FL_CLASS_IS_ORIGIN; } if (sc->mt) { - dc->mt = mt_copy(mrb, sc->mt); - } - else { - dc->mt = mt_new(mrb); + if (sc->tt == MRB_TT_ICLASS && !(sc->flags & MRB_FL_CLASS_IS_ORIGIN)) { + dc->mt = sc->mt; + } + else { + dc->mt = mt_copy(mrb, sc->mt); + } } dc->super = sc->super; - MRB_SET_INSTANCE_TT(dc, MRB_INSTANCE_TT(sc)); + dc->flags = sc->flags; + dc->frozen = 0; } /* 15.3.1.3.16 */ -static mrb_value -mrb_obj_init_copy(mrb_state *mrb, mrb_value self) -{ - mrb_value orig = mrb_get_arg1(mrb); - - if (mrb_obj_equal(mrb, self, orig)) return self; - if ((mrb_type(self) != mrb_type(orig)) || (mrb_obj_class(mrb, self) != mrb_obj_class(mrb, orig))) { - mrb_raise(mrb, E_TYPE_ERROR, "initialize_copy should take same class object"); - } - return self; -} +mrb_value mrb_obj_init_copy(mrb_state *mrb, mrb_value self); static void init_copy(mrb_state *mrb, mrb_value dest, mrb_value obj) { - switch (mrb_type(obj)) { + mrb_assert((mrb_type(dest) == mrb_type(obj))); + switch (mrb_unboxed_type(obj)) { case MRB_TT_ICLASS: copy_class(mrb, dest, obj); return; @@ -2742,13 +4157,21 @@ init_copy(mrb_state *mrb, mrb_value dest, mrb_value obj) case MRB_TT_OBJECT: case MRB_TT_SCLASS: case MRB_TT_HASH: - case MRB_TT_DATA: + case MRB_TT_CDATA: case MRB_TT_EXCEPTION: mrb_iv_copy(mrb, dest, obj); break; case MRB_TT_ISTRUCT: mrb_istruct_copy(dest, obj); break; +#if !defined(MRB_NO_FLOAT) && defined(MRB_WORD_BOXING) + case MRB_TT_FLOAT: + { + struct RFloat *f = (struct RFloat*)mrb_obj_ptr(dest); + mrb_rfloat_set(f, mrb_float(obj)); + } + break; +#endif #ifdef MRB_USE_BIGINT case MRB_TT_BIGINT: mrb_bint_copy(mrb, dest, obj); @@ -2769,7 +4192,7 @@ init_copy(mrb_state *mrb, mrb_value dest, mrb_value obj) break; } if (!mrb_func_basic_p(mrb, dest, MRB_SYM(initialize_copy), mrb_obj_init_copy)) { - mrb_funcall_id(mrb, dest, MRB_SYM(initialize_copy), 1, obj); + mrb_funcall_argv(mrb, dest, MRB_SYM(initialize_copy), 1, &obj); } } @@ -2778,10 +4201,10 @@ init_copy(mrb_state *mrb, mrb_value dest, mrb_value obj) * call-seq: * obj.clone -> an_object * - * Produces a shallow copy of obj---the instance variables of - * obj are copied, but not the objects they reference. Copies - * the frozen state of obj. See also the discussion - * under Object#dup. + * Produces a shallow copy of *obj*---the instance variables of + * *obj* are copied, but not the objects they reference. Copies + * the frozen state of *obj*. See also the discussion + * under `Object#dup`. * * class Klass * attr_accessor :str @@ -2794,29 +4217,53 @@ init_copy(mrb_state *mrb, mrb_value dest, mrb_value obj) * s2.inspect #=> "#" * * This method may have class-specific behavior. If so, that - * behavior will be documented under the #+initialize_copy+ method of + * behavior will be documented under the #`initialize_copy` method of * the class. * * Some Class(True False Nil Symbol Integer Float) Object cannot clone. */ +/* + * Creates a shallow copy of the given object `self`. + * + * This function performs a shallow copy, meaning instance variables are copied, + * but the objects they refer to are not duplicated. The frozen state of the + * original object is also copied to the clone. If the object has a singleton + * class, that singleton class is also cloned and associated with the new object. + * After the new object is created and its basic state is copied, its + * `initialize_copy` method is called with the original object as an argument, + * allowing for class-specific adjustments to the cloning process. + * + * @param mrb The mruby state. + * @param self The `mrb_value` object to clone. + * @return A new `mrb_value` which is a clone of `self`. + * @raise TypeError if `self` is an immediate value (e.g., Fixnum, Symbol in some + * configurations) or if `self` is a singleton class itself, as these + * cannot be cloned. + * @sideeffect + * 1. Allocates a new object on the mruby heap. + * 2. Copies instance variables from `self` to the new object. + * 3. If `self` has a singleton class, it is cloned and assigned to the new object. + * This involves further allocations and setup for the new singleton class. + * 4. The `frozen` state of `self` is propagated to the clone. + * 5. Calls the `initialize_copy` method on the newly created clone, passing `self` + * as an argument. This method can have arbitrary side effects. + */ MRB_API mrb_value mrb_obj_clone(mrb_state *mrb, mrb_value self) { - struct RObject *p; - mrb_value clone; - if (mrb_immediate_p(self)) { return self; } if (mrb_sclass_p(self)) { mrb_raise(mrb, E_TYPE_ERROR, "can't clone singleton class"); } - p = (struct RObject*)mrb_obj_alloc(mrb, mrb_type(self), mrb_obj_class(mrb, self)); + struct RObject *p = (struct RObject*)mrb_obj_alloc(mrb, mrb_unboxed_type(self), mrb_obj_class(mrb, self)); p->c = mrb_singleton_class_clone(mrb, self); mrb_field_write_barrier(mrb, (struct RBasic*)p, (struct RBasic*)p->c); - clone = mrb_obj_value(p); + + mrb_value clone = mrb_obj_value(p); init_copy(mrb, clone, self); - p->flags |= mrb_obj_ptr(self)->flags & MRB_FL_OBJ_IS_FROZEN; + p->frozen = mrb_obj_ptr(self)->frozen; return clone; } @@ -2826,34 +4273,56 @@ mrb_obj_clone(mrb_state *mrb, mrb_value self) * call-seq: * obj.dup -> an_object * - * Produces a shallow copy of obj---the instance variables of - * obj are copied, but not the objects they reference. - * dup copies the frozen state of obj. See also - * the discussion under Object#clone. In general, - * clone and dup may have different semantics - * in descendant classes. While clone is used to duplicate - * an object, including its internal state, dup typically + * Produces a shallow copy of *obj*---the instance variables of + * *obj* are copied, but not the objects they reference. + * `dup` copies the frozen state of *obj*. See also + * the discussion under `Object#clone`. In general, + * `clone` and `dup` may have different semantics + * in descendant classes. While `clone` is used to duplicate + * an object, including its internal state, `dup` typically * uses the class of the descendant object to create the new instance. * * This method may have class-specific behavior. If so, that - * behavior will be documented under the #+initialize_copy+ method of + * behavior will be documented under the #`initialize_copy` method of * the class. */ +/* + * Creates a shallow copy of the given object `obj`. + * + * This function performs a shallow copy, meaning instance variables are copied, + * but the objects they refer to are not duplicated. Unlike `mrb_obj_clone`, + * `mrb_obj_dup` does *not* copy the frozen state of the original object; the + * duplicated object is always unfrozen. Also, it does not copy the singleton class. + * After the new object is created and its basic state is copied, its + * `initialize_copy` method is called with the original object as an argument. + * + * @param mrb The mruby state. + * @param obj The `mrb_value` object to duplicate. + * @return A new `mrb_value` which is a duplicate of `obj`. + * @raise TypeError if `obj` is an immediate value (e.g., Fixnum, Symbol in some + * configurations) or if `obj` is a singleton class itself, as these + * cannot be duplicated. + * @sideeffect + * 1. Allocates a new object on the mruby heap with the same class as `obj`. + * 2. Copies instance variables from `obj` to the new object. + * 3. The new object is *not* frozen, regardless of `obj`'s frozen state. + * 4. The singleton class of `obj` (if any) is *not* copied. + * 5. Calls the `initialize_copy` method on the newly created duplicate, passing `obj` + * as an argument. This method can have arbitrary side effects. + */ MRB_API mrb_value mrb_obj_dup(mrb_state *mrb, mrb_value obj) { - struct RBasic *p; - mrb_value dup; - if (mrb_immediate_p(obj)) { return obj; } if (mrb_sclass_p(obj)) { mrb_raise(mrb, E_TYPE_ERROR, "can't dup singleton class"); } - p = mrb_obj_alloc(mrb, mrb_type(obj), mrb_obj_class(mrb, obj)); - dup = mrb_obj_value(p); + + struct RBasic *p = mrb_obj_alloc(mrb, mrb_type(obj), mrb_obj_class(mrb, obj)); + mrb_value dup = mrb_obj_value(p); init_copy(mrb, dup, obj); return dup; @@ -2865,7 +4334,7 @@ mrb_value mrb_obj_id_m(mrb_state *mrb, mrb_value self); mrb_noreturn void mrb_method_missing(mrb_state *mrb, mrb_sym name, mrb_value self, mrb_value args) { - mrb_no_method_error(mrb, name, args, "undefined method '%n'", name); + mrb_no_method_error(mrb, name, args, "undefined method '%n' for %T", name, self); } /* 15.3.1.3.30 */ @@ -2873,16 +4342,16 @@ mrb_method_missing(mrb_state *mrb, mrb_sym name, mrb_value self, mrb_value args) * call-seq: * obj.method_missing(symbol [, *args] ) -> result * - * Invoked by Ruby when obj is sent a message it cannot handle. - * symbol is the symbol for the method called, and args + * Invoked by Ruby when *obj* is sent a message it cannot handle. + * *symbol* is the symbol for the method called, and *args* * are any arguments that were passed to it. By default, the interpreter * raises an error when this method is called. However, it is possible * to override the method to provide more dynamic behavior. * If it is decided that a particular method should not be handled, then - * super should be called, so that ancestors can pick up the + * *super* should be called, so that ancestors can pick up the * missing method. * The example below creates - * a class Roman, which responds to methods with names + * a class `Roman`, which responds to methods with names * consisting of roman numerals, returning the corresponding integer * values. * @@ -2890,8 +4359,8 @@ mrb_method_missing(mrb_state *mrb, mrb_sym name, mrb_value self, mrb_value args) * def romanToInt(str) * # ... * end - * def method_missing(methId) - * str = methId.to_s + * def method_missing(sym) + * str = sym.to_s * romanToInt(str) * end * end @@ -2922,15 +4391,14 @@ inspect_main(mrb_state *mrb, mrb_value mod) } static const mrb_code new_iseq[] = { - OP_ENTER, 0x0, 0x10, 0x3, // OP_ENTER 0:0:1:0:0:1:1 - OP_LOADSELF, 4, // OP_LOADSELF R4 - OP_SEND, 4, 0, 0, // OP_SEND R4 :allocate n=0 - OP_MOVE, 0, 4, // OP_MOVE R0 R4 - OP_MOVE, 4, 3, // OP_MOVE R4 R3 (&) - OP_MOVE, 3, 2, // OP_MOVE R3 R2 (**) - OP_MOVE, 2, 1, // OP_MOVE R2 R1 (*) - OP_SSENDB, 1, 1, 255, // OP_SSENDB R1 :initialize n=*|nk=* - OP_RETURN, 0 // OP_RETURN R0 + OP_ENTER, 0x0, 0x10, 0x3, // 000 OP_ENTER 0:0:1:0:0:1:1 + OP_SSEND, 4, 0, 0, // 004 OP_SSEND R4 :allocate n=0 + OP_MOVE, 0, 4, // 008 OP_MOVE R0 R4 + OP_MOVE, 4, 3, // 011 OP_MOVE R4 R3 ; & + OP_MOVE, 3, 2, // 014 OP_MOVE R3 R2 ; ** + OP_MOVE, 2, 1, // 017 OP_MOVE R2 R1 ; * + OP_SSENDB, 1, 1, 255, // 020 OP_SSENDB R1 :initialize n=*|nk=* + OP_RETURN, 0 // 024 OP_RETURN R0 }; MRB_PRESYM_DEFINE_VAR_AND_INITER(new_syms, 2, MRB_SYM(allocate), MRB_SYM(initialize)) @@ -2941,8 +4409,9 @@ static const mrb_irep new_irep = { sizeof(new_iseq), 0, 2, 0, 0, }; +mrb_alignas(8) static const struct RProc new_proc = { - NULL, NULL, MRB_TT_PROC, MRB_GC_RED, MRB_FL_OBJ_IS_FROZEN | MRB_PROC_SCOPE | MRB_PROC_STRICT, + NULL, MRB_TT_PROC, MRB_GC_RED, MRB_OBJ_IS_FROZEN, MRB_PROC_SCOPE | MRB_PROC_STRICT, { &new_irep }, NULL, { NULL } }; @@ -2956,8 +4425,86 @@ init_class_new(mrb_state *mrb, struct RClass *cls) mrb_define_method_raw(mrb, cls, MRB_SYM(new), m); } -/* implementation of #send method */ -mrb_value mrb_f_send(mrb_state *mrb, mrb_value self); +static const mrb_code neq_iseq[] = { + OP_ENTER, 0x4, 0, 0, // 000 OP_ENTER 1:0:0:0:0:0:0 + OP_EQ, 0, // 004 OP_EQ R0 (R1) + OP_JMPNOT, 0, 0, 5, // 006 OP_JMPNOT R0 015 + OP_LOADFALSE, 0, // 010 OP_LOADFALSE R0 (false) + OP_JMP, 0, 2, // 012 OP_JMP 017 + OP_LOADTRUE, 0, // 015 OP_LOADTRUE R0 (true) + OP_RETURN, 0 // 017 OP_RETURN R0 +}; + +static const mrb_irep neq_irep = { + 4, 6, 0, MRB_IREP_STATIC, + neq_iseq, NULL, NULL, NULL, NULL, NULL, + sizeof(neq_iseq), 0, 2, 0, 0, +}; + +mrb_alignas(8) +static const struct RProc neq_proc = { + NULL, MRB_TT_PROC, MRB_GC_RED, MRB_OBJ_IS_FROZEN, MRB_PROC_SCOPE | MRB_PROC_STRICT, + { &neq_irep }, NULL, { NULL } +}; + +/* ---------------------------*/ +static const mrb_mt_entry bob_rom_entries[] = { + MRB_MT_ENTRY(mrb_obj_equal_m, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), /* 15.3.1.3.1 */ + MRB_MT_ENTRY(mrb_bob_not, MRB_OPSYM(not), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_obj_id_m, MRB_SYM(__id__), MRB_ARGS_NONE()), /* 15.3.1.3.4 */ + MRB_MT_ENTRY(mrb_f_send, MRB_SYM(__send__), MRB_ARGS_REQ(1)|MRB_ARGS_REST()|MRB_ARGS_BLOCK()), /* 15.3.1.3.5 */ + MRB_MT_ENTRY(mrb_obj_equal_m, MRB_SYM_Q(equal), MRB_ARGS_REQ(1)), /* 15.3.1.3.11 */ + MRB_MT_ENTRY(mrb_do_nothing, MRB_SYM(initialize), MRB_ARGS_NONE() | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_obj_instance_eval, MRB_SYM(instance_eval), MRB_ARGS_OPT(1)|MRB_ARGS_BLOCK()), /* 15.3.1.3.18 */ + MRB_MT_ENTRY(mrb_obj_missing, MRB_SYM(method_missing), MRB_ARGS_ANY() | MRB_MT_PRIVATE), /* 15.3.1.3.30 */ + MRB_MT_ENTRY(mrb_do_nothing, MRB_SYM(singleton_method_added), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_do_nothing, MRB_SYM(singleton_method_removed), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_do_nothing, MRB_SYM(singleton_method_undefined), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), +}; + +static const mrb_mt_entry cls_rom_entries[] = { + MRB_MT_ENTRY(mrb_instance_alloc, MRB_SYM(allocate), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_do_nothing, MRB_SYM(inherited), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_class_initialize, MRB_SYM(initialize), MRB_ARGS_OPT(1) | MRB_MT_PRIVATE), /* 15.2.3.3.1 */ + MRB_MT_ENTRY(mrb_class_superclass, MRB_SYM(superclass), MRB_ARGS_NONE()), /* 15.2.3.3.4 */ +}; + +static const mrb_mt_entry mod_rom_entries[] = { + MRB_MT_ENTRY(mrb_mod_eqq, MRB_OPSYM(eqq), MRB_ARGS_REQ(1)), /* 15.2.2.4.7 */ + MRB_MT_ENTRY(mrb_mod_alias, MRB_SYM(alias_method), MRB_ARGS_ANY()), /* 15.2.2.4.8 */ + MRB_MT_ENTRY(mrb_mod_ancestors, MRB_SYM(ancestors), MRB_ARGS_NONE()), /* 15.2.2.4.9 */ + MRB_MT_ENTRY(mrb_mod_attr_accessor, MRB_SYM(attr_accessor), MRB_ARGS_ANY()), /* 15.2.2.4.12 */ + MRB_MT_ENTRY(mrb_mod_attr_reader, MRB_SYM(attr_reader), MRB_ARGS_ANY()), /* 15.2.2.4.13 */ + MRB_MT_ENTRY(mrb_mod_attr_writer, MRB_SYM(attr_writer), MRB_ARGS_ANY()), /* 15.2.2.4.14 */ + MRB_MT_ENTRY(mrb_mod_module_eval, MRB_SYM(class_eval), MRB_ARGS_ANY()), /* 15.2.2.4.15 */ + MRB_MT_ENTRY(mrb_do_nothing, MRB_SYM(const_added), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_mod_const_defined, MRB_SYM_Q(const_defined), MRB_ARGS_ARG(1,1)), /* 15.2.2.4.20 */ + MRB_MT_ENTRY(mrb_mod_const_get, MRB_SYM(const_get), MRB_ARGS_REQ(1)), /* 15.2.2.4.21 */ + MRB_MT_ENTRY(mrb_mod_const_missing, MRB_SYM(const_missing), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_mod_const_set, MRB_SYM(const_set), MRB_ARGS_REQ(2)), /* 15.2.2.4.23 */ + MRB_MT_ENTRY(mod_define_method, MRB_SYM(define_method), MRB_ARGS_ARG(1,1)), + MRB_MT_ENTRY(mrb_mod_dup, MRB_SYM(dup), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_do_nothing, MRB_SYM(extended), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), /* 15.2.2.4.26 */ + MRB_MT_ENTRY(mrb_mod_include, MRB_SYM(include), MRB_ARGS_ANY()), /* 15.2.2.4.27 */ + MRB_MT_ENTRY(mrb_mod_include_p, MRB_SYM_Q(include), MRB_ARGS_REQ(1)), /* 15.2.2.4.28 */ + MRB_MT_ENTRY(mrb_do_nothing, MRB_SYM(included), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), /* 15.2.2.4.29 */ + MRB_MT_ENTRY(mrb_mod_initialize, MRB_SYM(initialize), MRB_ARGS_NONE() | MRB_MT_PRIVATE), /* 15.2.2.4.31 */ + MRB_MT_ENTRY(mrb_mod_to_s, MRB_SYM(inspect), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_do_nothing, MRB_SYM(method_added), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_do_nothing, MRB_SYM(method_removed), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_mod_method_defined, MRB_SYM_Q(method_defined), MRB_ARGS_REQ(1)), /* 15.2.2.4.34 */ + MRB_MT_ENTRY(mrb_do_nothing, MRB_SYM(method_undefined), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_mod_module_eval, MRB_SYM(module_eval), MRB_ARGS_ANY()), /* 15.2.2.4.35 */ + MRB_MT_ENTRY(mrb_mod_module_function, MRB_SYM(module_function), MRB_ARGS_ANY() | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_mod_prepend, MRB_SYM(prepend), MRB_ARGS_ANY()), + MRB_MT_ENTRY(mrb_do_nothing, MRB_SYM(prepended), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(mrb_mod_private, MRB_SYM(private), MRB_ARGS_ANY() | MRB_MT_PRIVATE), /* 15.2.2.4.36 */ + MRB_MT_ENTRY(mrb_mod_protected, MRB_SYM(protected), MRB_ARGS_ANY() | MRB_MT_PRIVATE), /* 15.2.2.4.37 */ + MRB_MT_ENTRY(mrb_mod_public, MRB_SYM(public), MRB_ARGS_ANY() | MRB_MT_PRIVATE), /* 15.2.2.4.38 */ + MRB_MT_ENTRY(mrb_mod_remove_const, MRB_SYM(remove_const), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), /* 15.2.2.4.40 */ + MRB_MT_ENTRY(mrb_mod_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_mod_undef, MRB_SYM(undef_method), MRB_ARGS_ANY()), /* 15.2.2.4.41 */ +}; void mrb_init_class(mrb_state *mrb) @@ -2968,10 +4515,11 @@ mrb_init_class(mrb_state *mrb) struct RClass *cls; /* Class */ /* boot class hierarchy */ - bob = boot_defclass(mrb, 0); - obj = boot_defclass(mrb, bob); mrb->object_class = obj; - mod = boot_defclass(mrb, obj); mrb->module_class = mod;/* obj -> mod */ - cls = boot_defclass(mrb, mod); mrb->class_class = cls; /* obj -> cls */ + bob = boot_defclass(mrb, 0, MRB_TT_OBJECT); + obj = boot_defclass(mrb, bob, MRB_TT_OBJECT); mrb->object_class = obj; + mod = boot_defclass(mrb, obj, MRB_TT_MODULE); mrb->module_class = mod;/* obj -> mod */ + cls = boot_defclass(mrb, mod, MRB_TT_CLASS); mrb->class_class = cls; /* obj -> cls */ + /* fix-up loose ends */ bob->c = obj->c = mod->c = cls->c = cls; make_metaclass(mrb, bob); @@ -2991,68 +4539,27 @@ mrb_init_class(mrb_state *mrb) mrb_class_name_class(mrb, NULL, mod, MRB_SYM(Module)); /* 15.2.2 */ mrb_class_name_class(mrb, NULL, cls, MRB_SYM(Class)); /* 15.2.3 */ - mrb->proc_class = mrb_define_class(mrb, "Proc", mrb->object_class); /* 15.2.17 */ - MRB_SET_INSTANCE_TT(mrb->proc_class, MRB_TT_PROC); - - MRB_SET_INSTANCE_TT(cls, MRB_TT_CLASS); - mrb_define_method(mrb, bob, "initialize", mrb_do_nothing, MRB_ARGS_NONE()); - mrb_define_method(mrb, bob, "!", mrb_bob_not, MRB_ARGS_NONE()); - mrb_define_method(mrb, bob, "==", mrb_obj_equal_m, MRB_ARGS_REQ(1)); /* 15.3.1.3.1 */ - mrb_define_method(mrb, bob, "__id__", mrb_obj_id_m, MRB_ARGS_NONE()); /* 15.3.1.3.4 */ - mrb_define_method(mrb, bob, "__send__", mrb_f_send, MRB_ARGS_REQ(1)|MRB_ARGS_REST()|MRB_ARGS_BLOCK()); /* 15.3.1.3.5 */ - mrb_define_method(mrb, bob, "equal?", mrb_obj_equal_m, MRB_ARGS_REQ(1)); /* 15.3.1.3.11 */ - mrb_define_method(mrb, bob, "instance_eval", mrb_obj_instance_eval, MRB_ARGS_OPT(1)|MRB_ARGS_BLOCK()); /* 15.3.1.3.18 */ - mrb_define_method(mrb, bob, "singleton_method_added", mrb_do_nothing, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, bob, "method_missing", mrb_obj_missing, MRB_ARGS_ANY()); /* 15.3.1.3.30 */ - - mrb_define_class_method(mrb, cls, "new", mrb_class_new_class, MRB_ARGS_OPT(1)|MRB_ARGS_BLOCK()); - mrb_define_method(mrb, cls, "allocate", mrb_instance_alloc, MRB_ARGS_NONE()); - mrb_define_method(mrb, cls, "superclass", mrb_class_superclass, MRB_ARGS_NONE()); /* 15.2.3.3.4 */ - mrb_define_method(mrb, cls, "initialize", mrb_class_initialize, MRB_ARGS_OPT(1)); /* 15.2.3.3.1 */ - mrb_define_method(mrb, cls, "inherited", mrb_do_nothing, MRB_ARGS_REQ(1)); + MRB_MT_INIT_ROM(mrb, bob, bob_rom_entries); + + mrb_method_t m; + MRB_METHOD_FROM_PROC(m, &neq_proc); + mrb_define_method_raw(mrb, bob, MRB_OPSYM(neq), m); + + mrb_define_class_method_id(mrb, cls, MRB_SYM(new), mrb_class_new_class, MRB_ARGS_OPT(1)|MRB_ARGS_BLOCK()); + MRB_MT_INIT_ROM(mrb, cls, cls_rom_entries); init_class_new(mrb, cls); - MRB_SET_INSTANCE_TT(mod, MRB_TT_MODULE); - mrb_define_method(mrb, mod, "extend_object", mrb_mod_extend_object, MRB_ARGS_REQ(1)); /* 15.2.2.4.25 */ - mrb_define_method(mrb, mod, "extended", mrb_do_nothing, MRB_ARGS_REQ(1)); /* 15.2.2.4.26 */ - mrb_define_method(mrb, mod, "prepended", mrb_do_nothing, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, mod, "prepend_features", mrb_mod_prepend_features, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, mod, "include?", mrb_mod_include_p, MRB_ARGS_REQ(1)); /* 15.2.2.4.28 */ - mrb_define_method(mrb, mod, "append_features", mrb_mod_append_features, MRB_ARGS_REQ(1)); /* 15.2.2.4.10 */ - mrb_define_method(mrb, mod, "class_eval", mrb_mod_module_eval, MRB_ARGS_ANY()); /* 15.2.2.4.15 */ - mrb_define_method(mrb, mod, "included", mrb_do_nothing, MRB_ARGS_REQ(1)); /* 15.2.2.4.29 */ - mrb_define_method(mrb, mod, "initialize", mrb_mod_initialize, MRB_ARGS_NONE()); /* 15.2.2.4.31 */ - mrb_define_method(mrb, mod, "module_eval", mrb_mod_module_eval, MRB_ARGS_ANY()); /* 15.2.2.4.35 */ - mrb_define_method(mrb, mod, "module_function", mrb_mod_module_function, MRB_ARGS_ANY()); - mrb_define_method(mrb, mod, "private", mrb_mod_dummy_visibility, MRB_ARGS_ANY()); /* 15.2.2.4.36 */ - mrb_define_method(mrb, mod, "protected", mrb_mod_dummy_visibility, MRB_ARGS_ANY()); /* 15.2.2.4.37 */ - mrb_define_method(mrb, mod, "public", mrb_mod_dummy_visibility, MRB_ARGS_ANY()); /* 15.2.2.4.38 */ - mrb_define_method(mrb, mod, "attr_reader", mrb_mod_attr_reader, MRB_ARGS_ANY()); /* 15.2.2.4.13 */ - mrb_define_method(mrb, mod, "attr_writer", mrb_mod_attr_writer, MRB_ARGS_ANY()); /* 15.2.2.4.14 */ - mrb_define_method(mrb, mod, "to_s", mrb_mod_to_s, MRB_ARGS_NONE()); - mrb_define_method(mrb, mod, "inspect", mrb_mod_to_s, MRB_ARGS_NONE()); - mrb_define_method(mrb, mod, "alias_method", mrb_mod_alias, MRB_ARGS_ANY()); /* 15.2.2.4.8 */ - mrb_define_method(mrb, mod, "ancestors", mrb_mod_ancestors, MRB_ARGS_NONE()); /* 15.2.2.4.9 */ - mrb_define_method(mrb, mod, "undef_method", mrb_mod_undef, MRB_ARGS_ANY()); /* 15.2.2.4.41 */ - mrb_define_method(mrb, mod, "const_defined?", mrb_mod_const_defined, MRB_ARGS_ARG(1,1)); /* 15.2.2.4.20 */ - mrb_define_method(mrb, mod, "const_get", mrb_mod_const_get, MRB_ARGS_REQ(1)); /* 15.2.2.4.21 */ - mrb_define_method(mrb, mod, "const_set", mrb_mod_const_set, MRB_ARGS_REQ(2)); /* 15.2.2.4.23 */ - mrb_define_method(mrb, mod, "remove_const", mrb_mod_remove_const, MRB_ARGS_REQ(1)); /* 15.2.2.4.40 */ - mrb_define_method(mrb, mod, "const_missing", mrb_mod_const_missing, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, mod, "method_defined?", mrb_mod_method_defined, MRB_ARGS_REQ(1)); /* 15.2.2.4.34 */ - mrb_define_method(mrb, mod, "define_method", mod_define_method, MRB_ARGS_ARG(1,1)); - mrb_define_method(mrb, mod, "===", mrb_mod_eqq, MRB_ARGS_REQ(1)); /* 15.2.2.4.7 */ - mrb_define_method(mrb, mod, "dup", mrb_mod_dup, MRB_ARGS_NONE()); - mrb_define_method(mrb, mod, "method_added", mrb_do_nothing, MRB_ARGS_REQ(1)); - - mrb_undef_method(mrb, cls, "append_features"); - mrb_undef_method(mrb, cls, "prepend_features"); - mrb_undef_method(mrb, cls, "extend_object"); - mrb_undef_method(mrb, cls, "module_function"); + MRB_MT_INIT_ROM(mrb, mod, mod_rom_entries); + mrb_define_alias_id(mrb, mod, MRB_SYM(attr), MRB_SYM(attr_reader)); /* 15.2.2.4.11 */ + + mrb_undef_method_id(mrb, cls, MRB_SYM(module_function)); mrb->top_self = MRB_OBJ_ALLOC(mrb, MRB_TT_OBJECT, mrb->object_class); - mrb_define_singleton_method(mrb, mrb->top_self, "inspect", inspect_main, MRB_ARGS_NONE()); - mrb_define_singleton_method(mrb, mrb->top_self, "to_s", inspect_main, MRB_ARGS_NONE()); - mrb_define_singleton_method(mrb, mrb->top_self, "define_method", top_define_method, MRB_ARGS_ARG(1,1)); + mrb_define_singleton_method_id(mrb, mrb->top_self, MRB_SYM(inspect), inspect_main, MRB_ARGS_NONE()); + mrb_define_singleton_method_id(mrb, mrb->top_self, MRB_SYM(to_s), inspect_main, MRB_ARGS_NONE()); + mrb_define_singleton_method_id(mrb, mrb->top_self, MRB_SYM(define_method), top_define_method, MRB_ARGS_ARG(1,1)); + mrb_define_singleton_method_id(mrb, mrb->top_self, MRB_SYM(public), top_public, MRB_ARGS_ANY()); + mrb_define_singleton_method_id(mrb, mrb->top_self, MRB_SYM(private), top_private, MRB_ARGS_ANY()); + mrb_define_singleton_method_id(mrb, mrb->top_self, MRB_SYM(protected), top_protected, MRB_ARGS_ANY()); } diff --git a/src/codedump.c b/src/codedump.c index 6abb953fd0..4b0f294ac7 100644 --- a/src/codedump.c +++ b/src/codedump.c @@ -8,117 +8,118 @@ #include #ifndef MRB_NO_STDIO +static mrb_bool +print_r_p(mrb_state *mrb, const mrb_irep *irep, size_t n) +{ + if (n == 0) return FALSE; + if (!irep->lv) return FALSE; + if (n >= irep->nlocals) return FALSE; + if (!irep->lv[n-1]) return FALSE; + return TRUE; +} + static void -print_r(mrb_state *mrb, const mrb_irep *irep, size_t n) +print_r(mrb_state *mrb, const mrb_irep *irep, size_t n, FILE *out) { if (n == 0) return; if (n >= irep->nlocals) return; if (!irep->lv[n-1]) return; - printf(" R%d:%s", (int)n, mrb_sym_dump(mrb, irep->lv[n-1])); + fprintf(out, " R%d:%s", (int)n, mrb_sym_dump(mrb, irep->lv[n-1])); } static void -print_lv_a(mrb_state *mrb, const mrb_irep *irep, uint16_t a) +print_lv_a(mrb_state *mrb, const mrb_irep *irep, uint16_t a, FILE *out) { - if (!irep->lv || a >= irep->nlocals || a == 0) { - printf("\n"); - return; + if (print_r_p(mrb, irep, a)) { + fprintf(out, "\t;"); + print_r(mrb, irep, a, out); } - printf("\t;"); - print_r(mrb, irep, a); - printf("\n"); + fprintf(out, "\n"); } static void -print_lv_ab(mrb_state *mrb, const mrb_irep *irep, uint16_t a, uint16_t b) +print_lv_ab(mrb_state *mrb, const mrb_irep *irep, uint16_t a, uint16_t b, FILE *out) { - if (!irep->lv || (a >= irep->nlocals && b >= irep->nlocals) || a+b == 0) { - printf("\n"); - return; + if (print_r_p(mrb, irep, a) || print_r_p(mrb, irep, b)) { + fprintf(out, "\t;"); + print_r(mrb, irep, a, out); + print_r(mrb, irep, b, out); } - printf("\t;"); - if (a > 0) print_r(mrb, irep, a); - if (b > 0) print_r(mrb, irep, b); - printf("\n"); + fprintf(out, "\n"); } static void -print_header(mrb_state *mrb, const mrb_irep *irep, ptrdiff_t i) +print_header(mrb_state *mrb, const mrb_irep *irep, ptrdiff_t i, FILE *out) { int32_t line; mrb_assert(i <= UINT32_MAX); line = mrb_debug_get_line(mrb, irep, (uint32_t)i); if (line < 0) { - printf(" "); + fprintf(out, " "); } else { - printf("%5d ", line); + fprintf(out, "%5d ", line); } - printf("%03d ", (int)i); + fprintf(out, "%03d ", (int)i); } static void -print_args(uint16_t i) +print_args(uint16_t i, FILE *out) { mrb_assert(i <= 255); uint8_t n = i&0xf; uint8_t nk = (i>>4)&0xf; if (n == 15) { - printf("n=*"); + fprintf(out, "n=*"); } else { - printf("n=%d", n); + fprintf(out, "n=%d", n); } if (nk > 0) { - printf("|"); + fprintf(out, "|"); if (nk == 15) { - printf("nk=*"); + fprintf(out, "nk=*"); } else { - printf("nk=%d", nk); + fprintf(out, "nk=%d", nk); } } - printf("\n"); + fprintf(out, "\n"); } #define CASE(insn,ops) case insn: FETCH_ ## ops (); L_ ## insn static void -codedump(mrb_state *mrb, const mrb_irep *irep) +codedump(mrb_state *mrb, const mrb_irep *irep, FILE *out) { - int ai; - const mrb_code *pc, *pcend; - mrb_code ins; - const char *file = NULL, *next_file; + const char *file = NULL; if (!irep) return; - printf("irep %p nregs=%d nlocals=%d pools=%d syms=%d reps=%d ilen=%d\n", (void*)irep, - irep->nregs, irep->nlocals, (int)irep->plen, (int)irep->slen, (int)irep->rlen, (int)irep->ilen); + fprintf(out, "irep %p nregs=%d nlocals=%d pools=%d syms=%d reps=%d ilen=%d\n", (void*)irep, + irep->nregs, irep->nlocals, (int)irep->plen, (int)irep->slen, (int)irep->rlen, (int)irep->ilen); if (irep->lv) { - int i; int head = FALSE; - for (i = 1; i < irep->nlocals; ++i) { + for (int i = 1; i < irep->nlocals; i++) { char const *s = mrb_sym_dump(mrb, irep->lv[i - 1]); if (s) { if (!head) { head = TRUE; - printf("local variable names:\n"); + fprintf(out, "local variable names:\n"); } - printf(" R%d:%s\n", i, s); + fprintf(out, " R%d:%s\n", i, s); } } } if (irep->clen > 0) { - int i = irep->clen; const struct mrb_irep_catch_handler *e = mrb_irep_catch_handler_table(irep); - for (; i > 0; i --, e ++) { + for (int i = irep->clen; i > 0; i--,e++) { uint32_t begin = mrb_irep_catch_handler_unpack(e->begin); uint32_t end = mrb_irep_catch_handler_unpack(e->end); uint32_t target = mrb_irep_catch_handler_unpack(e->target); @@ -138,77 +139,77 @@ codedump(mrb_state *mrb, const mrb_irep *irep) type = buf; break; } - printf("catch type: %-8s begin: %04" PRIu32 " end: %04" PRIu32 " target: %04" PRIu32 "\n", type, begin, end, target); + fprintf(out, "catch type: %-8s begin: %04" PRIu32 " end: %04" PRIu32 " target: %04" PRIu32 "\n", type, begin, end, target); } } - pc = irep->iseq; - pcend = pc + irep->ilen; + const mrb_code *pc = irep->iseq; + const mrb_code *pcend = pc + irep->ilen; while (pc < pcend) { - ptrdiff_t i; uint32_t a; uint16_t b; uint16_t c; + mrb_code ins; - ai = mrb_gc_arena_save(mrb); + int ai = mrb_gc_arena_save(mrb); + ptrdiff_t i = pc - irep->iseq; - i = pc - irep->iseq; - next_file = mrb_debug_get_filename(mrb, irep, (uint32_t)i); + const char *next_file = mrb_debug_get_filename(mrb, irep, (uint32_t)i); if (next_file && file != next_file) { - printf("file: %s\n", next_file); + fprintf(out, "file: %s\n", next_file); file = next_file; } - print_header(mrb, irep, i); + print_header(mrb, irep, i, out); ins = READ_B(); switch (ins) { CASE(OP_NOP, Z): - printf("NOP\n"); + fprintf(out, "NOP\n"); break; CASE(OP_MOVE, BB): - printf("MOVE\t\tR%d\tR%d\t", a, b); - print_lv_ab(mrb, irep, a, b); + fprintf(out, "MOVE\t\tR%d\tR%d", a, b); + print_lv_ab(mrb, irep, a, b, out); break; CASE(OP_LOADL, BB): switch (irep->pool[b].tt) { #ifndef MRB_NO_FLOAT case IREP_TT_FLOAT: - printf("LOADL\t\tR%d\tL(%d)\t; %f", a, b, (double)irep->pool[b].u.f); + fprintf(out, "LOADL\t\tR%d\tL[%d]\t; %f", a, b, (double)irep->pool[b].u.f); break; #endif case IREP_TT_INT32: - printf("LOADL\t\tR%d\tL(%d)\t; %" PRId32, a, b, irep->pool[b].u.i32); + fprintf(out, "LOADL\t\tR%d\tL[%d]\t; %" PRId32, a, b, irep->pool[b].u.i32); break; #ifdef MRB_64BIT case IREP_TT_INT64: - printf("LOADL\t\tR%d\tL(%d)\t; %" PRId64, a, b, irep->pool[b].u.i64); + fprintf(out, "LOADL\t\tR%d\tL[%d]\t; %" PRId64, a, b, irep->pool[b].u.i64); break; #endif default: - printf("LOADL\t\tR%d\tL(%d)\t", a, b); + fprintf(out, "LOADL\t\tR%d\tL[%d]", a, b); break; } - print_lv_a(mrb, irep, a); + print_lv_a(mrb, irep, a, out); break; - CASE(OP_LOADI, BB): - printf("LOADI\t\tR%d\t%d\t", a, b); - print_lv_a(mrb, irep, a); + CASE(OP_LOADI8, BB): + fprintf(out, "LOADI8\tR%d\t%d", a, b); + print_lv_a(mrb, irep, a, out); break; CASE(OP_LOADINEG, BB): - printf("LOADI\tR%d\t-%d\t", a, b); - print_lv_a(mrb, irep, a); + fprintf(out, "LOADINEG\tR%d\t-%d", a, b); + print_lv_a(mrb, irep, a, out); break; CASE(OP_LOADI16, BS): - printf("LOADI16\tR%d\t%d\t", a, (int)(int16_t)b); - print_lv_a(mrb, irep, a); + fprintf(out, "LOADI16\tR%d\t%d", a, (int)(int16_t)b); + print_lv_a(mrb, irep, a, out); break; CASE(OP_LOADI32, BSS): - printf("LOADI32\tR%d\t%d\t", a, (int32_t)(((uint32_t)b<<16)+c)); - print_lv_a(mrb, irep, a); + fprintf(out, "LOADI32\tR%d\t%d", a, (int32_t)(((uint32_t)b<<16)+c)); + print_lv_a(mrb, irep, a, out); break; CASE(OP_LOADI__1, B): - printf("LOADI__1\tR%d\t\t", a); - print_lv_a(mrb, irep, a); + fprintf(out, "LOADI__1\tR%d\t(-1)", a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_LOADI_0, B): goto L_LOADI; CASE(OP_LOADI_1, B): goto L_LOADI; @@ -219,421 +220,474 @@ codedump(mrb_state *mrb, const mrb_irep *irep) CASE(OP_LOADI_6, B): goto L_LOADI; CASE(OP_LOADI_7, B): L_LOADI: - printf("LOADI_%d\tR%d\t\t", ins-(int)OP_LOADI_0, a); - print_lv_a(mrb, irep, a); + b = ins-(int)OP_LOADI_0; + fprintf(out, "LOADI_%d\tR%d\t(%d)", b, a, b); + print_lv_a(mrb, irep, a, out); break; CASE(OP_LOADSYM, BB): - printf("LOADSYM\tR%d\t:%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); - print_lv_a(mrb, irep, a); + fprintf(out, "LOADSYM\tR%d\t:%s", a, mrb_sym_dump(mrb, irep->syms[b])); + print_lv_a(mrb, irep, a, out); break; CASE(OP_LOADNIL, B): - printf("LOADNIL\tR%d\t\t", a); - print_lv_a(mrb, irep, a); + fprintf(out, "LOADNIL\tR%d\t(nil)", a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_LOADSELF, B): - printf("LOADSELF\tR%d\t\t", a); - print_lv_a(mrb, irep, a); + fprintf(out, "LOADSELF\tR%d\t(R0)", a); + print_lv_a(mrb, irep, a, out); break; - CASE(OP_LOADT, B): - printf("LOADT\t\tR%d\t\t", a); - print_lv_a(mrb, irep, a); + CASE(OP_LOADTRUE, B): + fprintf(out, "LOADTRUE\tR%d\t(true)", a); + print_lv_a(mrb, irep, a, out); break; - CASE(OP_LOADF, B): - printf("LOADF\t\tR%d\t\t", a); - print_lv_a(mrb, irep, a); + CASE(OP_LOADFALSE, B): + fprintf(out, "LOADFALSE\tR%d\t(false)", a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_GETGV, BB): - printf("GETGV\t\tR%d\t%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); - print_lv_a(mrb, irep, a); + fprintf(out, "GETGV\t\tR%d\t%s", a, mrb_sym_dump(mrb, irep->syms[b])); + print_lv_a(mrb, irep, a, out); break; CASE(OP_SETGV, BB): - printf("SETGV\t\t%s\tR%d\t", mrb_sym_dump(mrb, irep->syms[b]), a); - print_lv_a(mrb, irep, a); + fprintf(out, "SETGV\t\t%s\tR%d", mrb_sym_dump(mrb, irep->syms[b]), a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_GETSV, BB): - printf("GETSV\t\tR%d\t%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); - print_lv_a(mrb, irep, a); + fprintf(out, "GETSV\t\tR%d\t%s", a, mrb_sym_dump(mrb, irep->syms[b])); + print_lv_a(mrb, irep, a, out); break; CASE(OP_SETSV, BB): - printf("SETSV\t\t%s\tR%d\t", mrb_sym_dump(mrb, irep->syms[b]), a); - print_lv_a(mrb, irep, a); + fprintf(out, "SETSV\t\t%s\tR%d", mrb_sym_dump(mrb, irep->syms[b]), a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_GETCONST, BB): - printf("GETCONST\tR%d\t%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); - print_lv_a(mrb, irep, a); + fprintf(out, "GETCONST\tR%d\t%s", a, mrb_sym_dump(mrb, irep->syms[b])); + print_lv_a(mrb, irep, a, out); break; CASE(OP_SETCONST, BB): - printf("SETCONST\t%s\tR%d\t", mrb_sym_dump(mrb, irep->syms[b]), a); - print_lv_a(mrb, irep, a); + fprintf(out, "SETCONST\t%s\tR%d", mrb_sym_dump(mrb, irep->syms[b]), a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_GETMCNST, BB): - printf("GETMCNST\tR%d\tR%d::%s\t", a, a, mrb_sym_dump(mrb, irep->syms[b])); - print_lv_a(mrb, irep, a); + fprintf(out, "GETMCNST\tR%d\t(R%d)::%s", a, a, mrb_sym_dump(mrb, irep->syms[b])); + print_lv_a(mrb, irep, a, out); break; CASE(OP_SETMCNST, BB): - printf("SETMCNST\tR%d::%s\tR%d\t", a+1, mrb_sym_dump(mrb, irep->syms[b]), a); - print_lv_a(mrb, irep, a); + fprintf(out, "SETMCNST\t(R%d)::%s\tR%d", a+1, mrb_sym_dump(mrb, irep->syms[b]), a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_GETIV, BB): - printf("GETIV\t\tR%d\t%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); - print_lv_a(mrb, irep, a); + fprintf(out, "GETIV\t\tR%d\t%s", a, mrb_sym_dump(mrb, irep->syms[b])); + print_lv_a(mrb, irep, a, out); break; CASE(OP_SETIV, BB): - printf("SETIV\t\t%s\tR%d\t", mrb_sym_dump(mrb, irep->syms[b]), a); - print_lv_a(mrb, irep, a); + fprintf(out, "SETIV\t\t%s\tR%d", mrb_sym_dump(mrb, irep->syms[b]), a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_GETUPVAR, BBB): - printf("GETUPVAR\tR%d\t%d\t%d\t", a, b, c); - print_lv_a(mrb, irep, a); + fprintf(out, "GETUPVAR\tR%d\t%d\t%d", a, b, c); + print_lv_a(mrb, irep, a, out); break; CASE(OP_SETUPVAR, BBB): - printf("SETUPVAR\tR%d\t%d\t%d\t", a, b, c); - print_lv_a(mrb, irep, a); + fprintf(out, "SETUPVAR\tR%d\t%d\t%d", a, b, c); + print_lv_a(mrb, irep, a, out); break; CASE(OP_GETCV, BB): - printf("GETCV\t\tR%d\t%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); - print_lv_a(mrb, irep, a); + fprintf(out, "GETCV\t\tR%d\t%s", a, mrb_sym_dump(mrb, irep->syms[b])); + print_lv_a(mrb, irep, a, out); break; CASE(OP_SETCV, BB): - printf("SETCV\t\t%s\tR%d\t", mrb_sym_dump(mrb, irep->syms[b]), a); - print_lv_a(mrb, irep, a); + fprintf(out, "SETCV\t\t%s\tR%d", mrb_sym_dump(mrb, irep->syms[b]), a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_GETIDX, B): - printf("GETIDX\tR%d\tR%d\n", a, a+1); + fprintf(out, "GETIDX\tR%d\t(R%d)\n", a, a+1); + break; + CASE(OP_GETIDX0, BB): + fprintf(out, "GETIDX0\tR%d\tR%d[0]\n", a, b); break; CASE(OP_SETIDX, B): - printf("SETIDX\tR%d\tR%d\tR%d\n", a, a+1, a+2); + fprintf(out, "SETIDX\tR%d\t(R%d)\t(R%d)\n", a, a+1, a+2); break; CASE(OP_JMP, S): i = pc - irep->iseq; - printf("JMP\t\t%03d\n", (int)i+(int16_t)a); + fprintf(out, "JMP\t\t%03d\n", (int)i+(int16_t)a); break; CASE(OP_JMPUW, S): i = pc - irep->iseq; - printf("JMPUW\t\t%03d\n", (int)i+(int16_t)a); + fprintf(out, "JMPUW\t\t%03d\n", (int)i+(int16_t)a); break; CASE(OP_JMPIF, BS): i = pc - irep->iseq; - printf("JMPIF\t\tR%d\t%03d\t", a, (int)i+(int16_t)b); - print_lv_a(mrb, irep, a); + fprintf(out, "JMPIF\t\tR%d\t%03d", a, (int)i+(int16_t)b); + print_lv_a(mrb, irep, a, out); break; CASE(OP_JMPNOT, BS): i = pc - irep->iseq; - printf("JMPNOT\tR%d\t%03d\t", a, (int)i+(int16_t)b); - print_lv_a(mrb, irep, a); + fprintf(out, "JMPNOT\tR%d\t%03d", a, (int)i+(int16_t)b); + print_lv_a(mrb, irep, a, out); break; CASE(OP_JMPNIL, BS): i = pc - irep->iseq; - printf("JMPNIL\tR%d\t%03d\t", a, (int)i+(int16_t)b); - print_lv_a(mrb, irep, a); + fprintf(out, "JMPNIL\tR%d\t%03d", a, (int)i+(int16_t)b); + print_lv_a(mrb, irep, a, out); break; CASE(OP_SSEND, BBB): - printf("SSEND\t\tR%d\t:%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); - print_args(c); + fprintf(out, "SSEND\t\tR%d\t:%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); + print_args(c, out); + break; + CASE(OP_SSEND0, BB): + fprintf(out, "SSEND0\tR%d\t:%s\n", a, mrb_sym_dump(mrb, irep->syms[b])); break; CASE(OP_SSENDB, BBB): - printf("SSENDB\tR%d\t:%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); - print_args(c); + fprintf(out, "SSENDB\tR%d\t:%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); + print_args(c, out); break; CASE(OP_SEND, BBB): - printf("SEND\t\tR%d\t:%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); - print_args(c); + fprintf(out, "SEND\t\tR%d\t:%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); + print_args(c, out); + break; + CASE(OP_SEND0, BB): + fprintf(out, "SEND0\t\tR%d\t:%s\n", a, mrb_sym_dump(mrb, irep->syms[b])); break; CASE(OP_SENDB, BBB): - printf("SENDB\t\tR%d\t:%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); - print_args(c); + fprintf(out, "SENDB\t\tR%d\t:%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); + print_args(c, out); break; CASE(OP_CALL, Z): - printf("CALL\n"); + fprintf(out, "CALL\n"); + break; + CASE(OP_BLKCALL, BB): + fprintf(out, "BLKCALL\t\tR%d\t%d\n", a, b); break; CASE(OP_SUPER, BB): - printf("SUPER\t\tR%d\t", a); - print_args(b); + fprintf(out, "SUPER\t\tR%d\t", a); + print_args(b, out); break; CASE(OP_ARGARY, BS): - printf("ARGARY\tR%d\t%d:%d:%d:%d (%d)\t", a, + fprintf(out, "ARGARY\tR%d\t%d:%d:%d:%d (%d)", a, (b>>11)&0x3f, (b>>10)&0x1, (b>>5)&0x1f, (b>>4)&0x1, (b>>0)&0xf); - print_lv_a(mrb, irep, a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_ENTER, W): - printf("ENTER\t\t%d:%d:%d:%d:%d:%d:%d (0x%x)\n", - MRB_ASPEC_REQ(a), - MRB_ASPEC_OPT(a), - MRB_ASPEC_REST(a), - MRB_ASPEC_POST(a), - MRB_ASPEC_KEY(a), - MRB_ASPEC_KDICT(a), - MRB_ASPEC_BLOCK(a), a); + fprintf(out, "ENTER\t\t%d:%d:%d:%d:%d:%d:%d:%d (0x%x)\n", + MRB_ASPEC_REQ(a), + MRB_ASPEC_OPT(a), + MRB_ASPEC_REST(a), + MRB_ASPEC_POST(a), + MRB_ASPEC_KEY(a), + MRB_ASPEC_KDICT(a), + MRB_ASPEC_BLOCK(a), + MRB_ASPEC_NOBLOCK(a), a); break; CASE(OP_KEY_P, BB): - printf("KEY_P\t\tR%d\t:%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); - print_lv_a(mrb, irep, a); + fprintf(out, "KEY_P\t\tR%d\t:%s", a, mrb_sym_dump(mrb, irep->syms[b])); + print_lv_a(mrb, irep, a, out); break; CASE(OP_KEYEND, Z): - printf("KEYEND\n"); + fprintf(out, "KEYEND\n"); break; CASE(OP_KARG, BB): - printf("KARG\t\tR%d\t:%s\t", a, mrb_sym_dump(mrb, irep->syms[b])); - print_lv_a(mrb, irep, a); + fprintf(out, "KARG\t\tR%d\t:%s", a, mrb_sym_dump(mrb, irep->syms[b])); + print_lv_a(mrb, irep, a, out); break; CASE(OP_RETURN, B): - printf("RETURN\tR%d\t\t", a); - print_lv_a(mrb, irep, a); + fprintf(out, "RETURN\tR%d\t", a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_RETURN_BLK, B): - printf("RETURN_BLK\tR%d\t\t", a); - print_lv_a(mrb, irep, a); + fprintf(out, "RETURN_BLK\tR%d\t", a); + print_lv_a(mrb, irep, a, out); + break; + CASE(OP_RETSELF, Z): + fprintf(out, "RETSELF\n"); + break; + CASE(OP_RETNIL, Z): + fprintf(out, "RETNIL\n"); + break; + CASE(OP_RETTRUE, Z): + fprintf(out, "RETTRUE\n"); + break; + CASE(OP_RETFALSE, Z): + fprintf(out, "RETFALSE\n"); break; CASE(OP_BREAK, B): - printf("BREAK\t\tR%d\t\t", a); - print_lv_a(mrb, irep, a); + fprintf(out, "BREAK\t\tR%d\t", a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_BLKPUSH, BS): - printf("BLKPUSH\tR%d\t%d:%d:%d:%d (%d)\t", a, + fprintf(out, "BLKPUSH\tR%d\t%d:%d:%d:%d (%d)", a, (b>>11)&0x3f, (b>>10)&0x1, (b>>5)&0x1f, (b>>4)&0x1, (b>>0)&0xf); - print_lv_a(mrb, irep, a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_LAMBDA, BB): - printf("LAMBDA\tR%d\tI(%d:%p)\n", a, b, (void*)irep->reps[b]); + fprintf(out, "LAMBDA\tR%d\tI[%d]\n", a, b); break; CASE(OP_BLOCK, BB): - printf("BLOCK\t\tR%d\tI(%d:%p)\n", a, b, (void*)irep->reps[b]); + fprintf(out, "BLOCK\t\tR%d\tI[%d]\n", a, b); break; CASE(OP_METHOD, BB): - printf("METHOD\tR%d\tI(%d:%p)\n", a, b, (void*)irep->reps[b]); + fprintf(out, "METHOD\tR%d\tI[%d]\n", a, b); break; CASE(OP_RANGE_INC, B): - printf("RANGE_INC\tR%d\n", a); + fprintf(out, "RANGE_INC\tR%d\n", a); break; CASE(OP_RANGE_EXC, B): - printf("RANGE_EXC\tR%d\n", a); + fprintf(out, "RANGE_EXC\tR%d\n", a); break; CASE(OP_DEF, BB): - printf("DEF\t\tR%d\t:%s\n", a, mrb_sym_dump(mrb, irep->syms[b])); + fprintf(out, "DEF\t\tR%d\t:%s\t(R%d)\n", a, mrb_sym_dump(mrb, irep->syms[b]),a+1); + break; + CASE(OP_TDEF, BBB): + fprintf(out, "TDEF\t\tR%d\t:%s\tI[%d]\n", a, mrb_sym_dump(mrb, irep->syms[b]), c); + break; + CASE(OP_SDEF, BBB): + fprintf(out, "SDEF\t\tR%d\t:%s\tI[%d]\n", a, mrb_sym_dump(mrb, irep->syms[b]), c); break; CASE(OP_UNDEF, B): - printf("UNDEF\t\t:%s\n", mrb_sym_dump(mrb, irep->syms[a])); + fprintf(out, "UNDEF\t\t:%s\n", mrb_sym_dump(mrb, irep->syms[a])); break; CASE(OP_ALIAS, BB): - printf("ALIAS\t\t:%s\t%s\n", mrb_sym_dump(mrb, irep->syms[a]), mrb_sym_dump(mrb, irep->syms[b])); + fprintf(out, "ALIAS\t\t:%s\t%s\n", mrb_sym_dump(mrb, irep->syms[a]), mrb_sym_dump(mrb, irep->syms[b])); break; CASE(OP_ADD, B): - printf("ADD\t\tR%d\tR%d\n", a, a+1); + fprintf(out, "ADD\t\tR%d\t(R%d)\n", a, a+1); break; CASE(OP_ADDI, BB): - printf("ADDI\t\tR%d\t%d\t", a, b); - print_lv_a(mrb, irep, a); + fprintf(out, "ADDI\t\tR%d\t%d", a, b); + print_lv_a(mrb, irep, a, out); break; CASE(OP_SUB, B): - printf("SUB\t\tR%d\tR%d\n", a, a+1); + fprintf(out, "SUB\t\tR%d\t(R%d)\n", a, a+1); break; CASE(OP_SUBI, BB): - printf("SUBI\t\tR%d\t%d\t", a, b); - print_lv_a(mrb, irep, a); + fprintf(out, "SUBI\t\tR%d\t%d", a, b); + print_lv_a(mrb, irep, a, out); + break; + CASE(OP_ADDILV, BBB): + fprintf(out, "ADDILV\tR%d\tR%d\t%d", a, b, c); + print_lv_a(mrb, irep, a, out); + break; + CASE(OP_SUBILV, BBB): + fprintf(out, "SUBILV\tR%d\tR%d\t%d", a, b, c); + print_lv_a(mrb, irep, a, out); break; CASE(OP_MUL, B): - printf("MUL\t\tR%d\tR%d\n", a, a+1); + fprintf(out, "MUL\t\tR%d\t(R%d)\n", a, a+1); break; CASE(OP_DIV, B): - printf("DIV\t\tR%d\tR%d\n", a, a+1); + fprintf(out, "DIV\t\tR%d\t(R%d)\n", a, a+1); break; CASE(OP_LT, B): - printf("LT\t\tR%d\tR%d\n", a, a+1); + fprintf(out, "LT\t\tR%d\t(R%d)\n", a, a+1); break; CASE(OP_LE, B): - printf("LE\t\tR%d\tR%d\n", a, a+1); + fprintf(out, "LE\t\tR%d\t(R%d)\n", a, a+1); break; CASE(OP_GT, B): - printf("GT\t\tR%d\tR%d\n", a, a+1); + fprintf(out, "GT\t\tR%d\t(R%d)\n", a, a+1); break; CASE(OP_GE, B): - printf("GE\t\tR%d\tR%d\n", a, a+1); + fprintf(out, "GE\t\tR%d\t(R%d)\n", a, a+1); break; CASE(OP_EQ, B): - printf("EQ\t\tR%d\tR%d\n", a, a+1); + fprintf(out, "EQ\t\tR%d\t(R%d)\n", a, a+1); break; CASE(OP_ARRAY, BB): - printf("ARRAY\t\tR%d\tR%d\t%d", a, a, b); - print_lv_a(mrb, irep, a); + fprintf(out, "ARRAY\t\tR%d\t%d", a, b); + print_lv_a(mrb, irep, a, out); break; CASE(OP_ARRAY2, BBB): - printf("ARRAY\t\tR%d\tR%d\t%d", a, b, c); - print_lv_ab(mrb, irep, a, b); + fprintf(out, "ARRAY\t\tR%d\tR%d\t%d", a, b, c); + print_lv_ab(mrb, irep, a, b, out); break; CASE(OP_ARYCAT, B): - printf("ARYCAT\tR%d\tR%d\t", a, a+1); - print_lv_a(mrb, irep, a); + fprintf(out, "ARYCAT\tR%d\t(R%d)", a, a+1); + print_lv_a(mrb, irep, a, out); break; CASE(OP_ARYPUSH, BB): - printf("ARYPUSH\tR%d\t%d\t", a, b); - print_lv_a(mrb, irep, a); + fprintf(out, "ARYPUSH\tR%d\t%d", a, b); + print_lv_a(mrb, irep, a, out); break; - CASE(OP_ARYDUP, B): - printf("ARYDUP\tR%d\t", a); - print_lv_a(mrb, irep, a); + CASE(OP_ARYSPLAT, B): + fprintf(out, "ARYSPLAT\tR%d", a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_AREF, BBB): - printf("AREF\t\tR%d\tR%d\t%d", a, b, c); - print_lv_ab(mrb, irep, a, b); + fprintf(out, "AREF\t\tR%d\tR%d\t%d", a, b, c); + print_lv_ab(mrb, irep, a, b, out); break; CASE(OP_ASET, BBB): - printf("ASET\t\tR%d\tR%d\t%d", a, b, c); - print_lv_ab(mrb, irep, a, b); + fprintf(out, "ASET\t\tR%d\tR%d\t%d", a, b, c); + print_lv_ab(mrb, irep, a, b, out); break; CASE(OP_APOST, BBB): - printf("APOST\t\tR%d\t%d\t%d", a, b, c); - print_lv_a(mrb, irep, a); + fprintf(out, "APOST\t\tR%d\t%d\t%d", a, b, c); + print_lv_a(mrb, irep, a, out); break; CASE(OP_INTERN, B): - printf("INTERN\tR%d\t\t", a); - print_lv_a(mrb, irep, a); + fprintf(out, "INTERN\tR%d\t", a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_SYMBOL, BB): mrb_assert((irep->pool[b].tt&IREP_TT_NFLAG)==0); - printf("SYMBOL\tR%d\tL(%d)\t; %s", a, b, irep->pool[b].u.str); - print_lv_a(mrb, irep, a); + fprintf(out, "SYMBOL\tR%d\tL[%d]\t; %s", a, b, irep->pool[b].u.str); + print_lv_a(mrb, irep, a, out); break; CASE(OP_STRING, BB): mrb_assert((irep->pool[b].tt&IREP_TT_NFLAG)==0); - printf("STRING\tR%d\tL(%d)\t; %s", a, b, irep->pool[b].u.str); - print_lv_a(mrb, irep, a); + fprintf(out, "STRING\tR%d\tL[%d]", a, b); + if (irep->pool[b].u.str[0]) { + fprintf(out, "\t; %s", irep->pool[b].u.str); + } + print_lv_a(mrb, irep, a, out); break; CASE(OP_STRCAT, B): - printf("STRCAT\tR%d\tR%d\t", a, a+1); - print_lv_a(mrb, irep, a); + fprintf(out, "STRCAT\tR%d\t(R%d)", a, a+1); + print_lv_a(mrb, irep, a, out); break; CASE(OP_HASH, BB): - printf("HASH\t\tR%d\t%d\t", a, b); - print_lv_a(mrb, irep, a); + fprintf(out, "HASH\t\tR%d\t%d", a, b); + print_lv_a(mrb, irep, a, out); break; CASE(OP_HASHADD, BB): - printf("HASHADD\tR%d\t%d\t", a, b); - print_lv_a(mrb, irep, a); + fprintf(out, "HASHADD\tR%d\t%d", a, b); + print_lv_a(mrb, irep, a, out); break; CASE(OP_HASHCAT, B): - printf("HASHCAT\tR%d\tR%d\t", a, a+1); - print_lv_a(mrb, irep, a); + fprintf(out, "HASHCAT\tR%d\t(R%d)", a, a+1); + print_lv_a(mrb, irep, a, out); break; CASE(OP_OCLASS, B): - printf("OCLASS\tR%d\t\t", a); - print_lv_a(mrb, irep, a); + fprintf(out, "OCLASS\tR%d\t", a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_CLASS, BB): - printf("CLASS\t\tR%d\t:%s", a, mrb_sym_dump(mrb, irep->syms[b])); - print_lv_a(mrb, irep, a); + fprintf(out, "CLASS\t\tR%d\t:%s", a, mrb_sym_dump(mrb, irep->syms[b])); + print_lv_a(mrb, irep, a, out); break; CASE(OP_MODULE, BB): - printf("MODULE\tR%d\t:%s", a, mrb_sym_dump(mrb, irep->syms[b])); - print_lv_a(mrb, irep, a); + fprintf(out, "MODULE\tR%d\t:%s", a, mrb_sym_dump(mrb, irep->syms[b])); + print_lv_a(mrb, irep, a, out); break; CASE(OP_EXEC, BB): - printf("EXEC\t\tR%d\tI(%d:%p)", a, b, (void*)irep->reps[b]); - print_lv_a(mrb, irep, a); + fprintf(out, "EXEC\t\tR%d\tI[%d]", a, b); + print_lv_a(mrb, irep, a, out); break; CASE(OP_SCLASS, B): - printf("SCLASS\t\tR%d\t", a); - print_lv_a(mrb, irep, a); + fprintf(out, "SCLASS\tR%d", a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_TCLASS, B): - printf("TCLASS\tR%d\t\t", a); - print_lv_a(mrb, irep, a); + fprintf(out, "TCLASS\tR%d\t", a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_ERR, B): if ((irep->pool[a].tt & IREP_TT_NFLAG) == 0) { - printf("ERR\t\t%s\n", irep->pool[a].u.str); + fprintf(out, "ERR\t\t%s\n", irep->pool[a].u.str); } else { - printf("ERR\tL(%d)\n", a); + fprintf(out, "ERR\tL[%d]\n", a); } break; CASE(OP_EXCEPT, B): - printf("EXCEPT\tR%d\t\t", a); - print_lv_a(mrb, irep, a); + fprintf(out, "EXCEPT\tR%d\t", a); + print_lv_a(mrb, irep, a, out); break; CASE(OP_RESCUE, BB): - printf("RESCUE\tR%d\tR%d", a, b); - print_lv_ab(mrb, irep, a, b); + fprintf(out, "RESCUE\tR%d\tR%d", a, b); + print_lv_ab(mrb, irep, a, b, out); break; CASE(OP_RAISEIF, B): - printf("RAISEIF\tR%d\t\t", a); - print_lv_a(mrb, irep, a); + fprintf(out, "RAISEIF\tR%d\t", a); + print_lv_a(mrb, irep, a, out); + break; + CASE(OP_MATCHERR, B): + fprintf(out, "MATCHERR\tR%d\n", a); break; CASE(OP_DEBUG, BBB): - printf("DEBUG\t\t%d\t%d\t%d\n", a, b, c); + fprintf(out, "DEBUG\t\t%d\t%d\t%d\n", a, b, c); break; CASE(OP_STOP, Z): - printf("STOP\n"); + fprintf(out, "STOP\n"); break; CASE(OP_EXT1, Z): - printf("EXT1\n"); - print_header(mrb, irep, pc-irep->iseq); + fprintf(out, "EXT1\n"); + print_header(mrb, irep, pc-irep->iseq, out); ins = READ_B(); switch (ins) { #define OPCODE(i,x) case OP_ ## i: FETCH_ ## x ## _1 (); goto L_OP_ ## i; -#include "mruby/ops.h" +#include #undef OPCODE } break; CASE(OP_EXT2, Z): - printf("EXT2\n"); - print_header(mrb, irep, pc-irep->iseq); + fprintf(out, "EXT2\n"); + print_header(mrb, irep, pc-irep->iseq, out); ins = READ_B(); switch (ins) { #define OPCODE(i,x) case OP_ ## i: FETCH_ ## x ## _2 (); goto L_OP_ ## i; -#include "mruby/ops.h" +#include #undef OPCODE } break; CASE(OP_EXT3, Z): - printf("EXT3\n"); - print_header(mrb, irep, pc-irep->iseq); + fprintf(out, "EXT3\n"); + print_header(mrb, irep, pc-irep->iseq, out); ins = READ_B(); switch (ins) { #define OPCODE(i,x) case OP_ ## i: FETCH_ ## x ## _3 (); goto L_OP_ ## i; -#include "mruby/ops.h" +#include #undef OPCODE } break; default: - printf("unknown_op (0x%x)\n", ins); + fprintf(out, "unknown_op (0x%x)\n", ins); break; } mrb_gc_arena_restore(mrb, ai); } - printf("\n"); + fprintf(out, "\n"); } static void -codedump_recur(mrb_state *mrb, const mrb_irep *irep) +codedump_recur(mrb_state *mrb, const mrb_irep *irep, FILE *out) { - int i; - - codedump(mrb, irep); + codedump(mrb, irep, out); if (irep->reps) { - for (i=0; irlen; i++) { - codedump_recur(mrb, irep->reps[i]); + for (int i=0; irlen; i++) { + codedump_recur(mrb, irep->reps[i], out); } } } + +void +mrb_codedump_all_file(mrb_state *mrb, struct RProc *proc, FILE *out) +{ + codedump_recur(mrb, proc->body.irep, out); + fflush(out); +} + #endif void mrb_codedump_all(mrb_state *mrb, struct RProc *proc) { #ifndef MRB_NO_STDIO - codedump_recur(mrb, proc->body.irep); + mrb_codedump_all_file(mrb, proc, stdout); #endif } +#undef CASE diff --git a/src/compar.c b/src/compar.c deleted file mode 100644 index 0032fc8592..0000000000 --- a/src/compar.c +++ /dev/null @@ -1,13 +0,0 @@ -/* -** compar.c - Comparable module -** -** See Copyright Notice in mruby.h -*/ - -#include - -void -mrb_init_comparable(mrb_state *mrb) -{ - mrb_define_module(mrb, "Comparable"); /* 15.3.3 */ -} diff --git a/src/debug.c b/src/debug.c index d0f5ab1244..6bc6c01f1c 100644 --- a/src/debug.c +++ b/src/debug.c @@ -6,13 +6,11 @@ static mrb_irep_debug_info_file* get_file(mrb_irep_debug_info *info, uint32_t pc) { - mrb_irep_debug_info_file **ret; - int32_t count; - if (pc >= info->pc_count) { return NULL; } /* get upper bound */ - ret = info->files; - count = info->flen; + mrb_irep_debug_info_file **ret = info->files; + int32_t count = info->flen; + while (count > 0) { int32_t step = count / 2; mrb_irep_debug_info_file **it = ret + step; @@ -35,6 +33,10 @@ get_file(mrb_irep_debug_info *info, uint32_t pc) return *ret; } +/* + * Calculates the number of bytes that `mrb_packed_int_encode` will write + * to store the given 32-bit unsigned integer `num`. + */ size_t mrb_packed_int_len(uint32_t num) { @@ -46,8 +48,17 @@ mrb_packed_int_len(uint32_t num) return llen; } +/* + * Encodes a 32-bit unsigned integer `num` into a variable-length packed format + * and writes it to the byte array `p`. + * The most significant bit of each byte is used as a continuation flag: + * - 1 indicates that more bytes follow. + * - 0 indicates the last byte. + * The lower 7 bits of each byte store parts of the number. + * Returns the number of bytes written to `p`. + */ size_t -mrb_packed_int_encode(uint32_t num, uint8_t *p, uint8_t *pend) +mrb_packed_int_encode(uint32_t num, uint8_t *p) { size_t llen = 0; @@ -55,13 +66,21 @@ mrb_packed_int_encode(uint32_t num, uint8_t *p, uint8_t *pend) uint8_t byte = num & 0x7f; num >>= 7; if (num != 0) byte |= 0x80; - if (p < pend) *p++ = byte; + *p++ = byte; llen++; } while (num != 0); return llen; } +/* + * Decodes a 32-bit unsigned integer from the variable-length packed format + * in the byte array `p`. It reads bytes until it finds one where the most + * significant bit is 0. + * If `newpos` is not NULL, it will be updated to point to the byte + * following the last byte read. + * Returns the decoded 32-bit unsigned integer. + */ uint32_t mrb_packed_int_decode(const uint8_t *p, const uint8_t **newpos) { @@ -77,100 +96,125 @@ mrb_packed_int_decode(const uint8_t *p, const uint8_t **newpos) return n; } +static char const* +debug_get_filename(mrb_state *mrb, mrb_irep_debug_info_file* f) +{ + if (f == NULL) return NULL; + return mrb_sym_name_len(mrb, f->filename_sym, NULL); +} + +static int32_t +debug_get_line(mrb_state *mrb, mrb_irep_debug_info_file* f, uint32_t pc) +{ + if (f == NULL) return -1; + switch (f->line_type) { + case mrb_debug_line_ary: + case mrb_debug_line_flat_map: + default: + break; + + case mrb_debug_line_packed_map: + { + const uint8_t *p = f->lines.packed_map; + const uint8_t *pend = p + f->line_entry_count; + uint32_t pos = 0, line = 0; + while (p < pend) { + pos += mrb_packed_int_decode(p, &p); + uint32_t line_diff = mrb_packed_int_decode(p, &p); + if (pc < pos) break; + line += line_diff; + } + return line; + } + } + return -1; +} + +/* + * Retrieves the filename for a given instruction pointer (pc) + * within a given mruby interpreter state (mrb) and mruby bytecode (irep). + * Returns NULL if the information is not available. + */ MRB_API char const* mrb_debug_get_filename(mrb_state *mrb, const mrb_irep *irep, uint32_t pc) { if (irep && pc < irep->ilen) { - mrb_irep_debug_info_file* f = NULL; if (!irep->debug_info) return NULL; - else if ((f = get_file(irep->debug_info, pc))) { - return mrb_sym_name_len(mrb, f->filename_sym, NULL); - } + return debug_get_filename(mrb, get_file(irep->debug_info, pc)); } return NULL; } +/* + * Retrieves the line number for a given instruction pointer (pc) + * within a given mruby interpreter state (mrb) and mruby bytecode (irep). + * Returns -1 if the information is not available. + */ MRB_API int32_t mrb_debug_get_line(mrb_state *mrb, const mrb_irep *irep, uint32_t pc) { if (irep && pc < irep->ilen) { - mrb_irep_debug_info_file* f = NULL; - if (!irep->debug_info) { - return -1; - } - else if ((f = get_file(irep->debug_info, pc))) { - switch (f->line_type) { - case mrb_debug_line_ary: - mrb_assert(f->start_pos <= pc && pc < (f->start_pos + f->line_entry_count)); - return f->lines.ary[pc - f->start_pos]; - - case mrb_debug_line_flat_map: { - /* get upper bound */ - const mrb_irep_debug_info_line *ret = f->lines.flat_map; - uint32_t count = f->line_entry_count; - while (count > 0) { - int32_t step = count / 2; - const mrb_irep_debug_info_line *it = ret + step; - if (!(pc < it->start_pos)) { - ret = it + 1; - count -= step + 1; - } - else { count = step; } - } - - --ret; - - /* check line entry pointer range */ - mrb_assert(f->lines.flat_map <= ret && ret < (f->lines.flat_map + f->line_entry_count)); - /* check pc range */ - mrb_assert(ret->start_pos <= pc && - pc < (((uint32_t)(ret + 1 - f->lines.flat_map) < f->line_entry_count) - ? (ret+1)->start_pos : irep->debug_info->pc_count)); - - return ret->line; - } - - case mrb_debug_line_packed_map: { - const uint8_t *p = f->lines.packed_map; - const uint8_t *pend = p + f->line_entry_count; - uint32_t pos = 0, line = 0, line_diff; - while (p < pend) { - pos += mrb_packed_int_decode(p, &p); - line_diff = mrb_packed_int_decode(p, &p); - if (pc < pos) break; - line += line_diff; - } - return line; - } - } - } + if (!irep->debug_info) return -1; + return debug_get_line(mrb, get_file(irep->debug_info, pc), pc); } return -1; } +/* + * Retrieves both the filename and line number for a given instruction pointer (pc) + * within a given mruby interpreter state (mrb) and mruby bytecode (irep). + * The line number is stored in the `lp` output parameter and the filename in the `fp` output parameter. + * Returns TRUE if the information is successfully retrieved, and FALSE otherwise. + * In case of failure, `lp` is set to -1 and `fp` is set to NULL. + */ +MRB_API mrb_bool +mrb_debug_get_position(mrb_state *mrb, const mrb_irep *irep, uint32_t pc, int32_t *lp, const char **fp) +{ + if (irep && pc < irep->ilen && irep->debug_info) { + mrb_irep_debug_info_file *f = get_file(irep->debug_info, pc); + *lp = debug_get_line(mrb, f, pc); + if (*lp > 0) { + *fp = debug_get_filename(mrb, f); + if (*fp) return TRUE; + } + } + *lp = -1; *fp = NULL; + return FALSE; +} + +/* + * Allocates and initializes a new `mrb_irep_debug_info` structure + * for a given mruby interpreter state (mrb) and mruby bytecode (irep). + * This function asserts that debug_info is not already allocated for the irep. + * Returns a pointer to the newly allocated `mrb_irep_debug_info` structure. + */ MRB_API mrb_irep_debug_info* mrb_debug_info_alloc(mrb_state *mrb, mrb_irep *irep) { static const mrb_irep_debug_info initial = { 0, 0, NULL }; - mrb_irep_debug_info *ret; mrb_assert(!irep->debug_info); - ret = (mrb_irep_debug_info *)mrb_malloc(mrb, sizeof(*ret)); + mrb_irep_debug_info *ret = (mrb_irep_debug_info*)mrb_malloc(mrb, sizeof(*ret)); *ret = initial; irep->debug_info = ret; return ret; } +/* + * Appends a new file's debug information to an existing `mrb_irep_debug_info` structure `d`. + * It takes the mruby state `mrb`, the debug info structure `d`, the `filename`, + * an array of `lines` numbers, the `start_pos` (starting program counter for this file), + * and `end_pos` (ending program counter for this file). + * `filename` and `lines` must not be NULL. + * Returns a pointer to the newly created `mrb_irep_debug_info_file` structure, + * or NULL if `d` is NULL, `start_pos` equals `end_pos`, or if the filename is + * the same as the previously appended file. + */ MRB_API mrb_irep_debug_info_file* mrb_debug_info_append_file(mrb_state *mrb, mrb_irep_debug_info *d, const char *filename, uint16_t *lines, uint32_t start_pos, uint32_t end_pos) { - mrb_irep_debug_info_file *f; - uint32_t file_pc_count; - size_t fn_len; - uint32_t i; - if (!d) return NULL; if (start_pos == end_pos) return NULL; @@ -183,16 +227,15 @@ mrb_debug_info_append_file(mrb_state *mrb, mrb_irep_debug_info *d, return NULL; } - f = (mrb_irep_debug_info_file*)mrb_malloc(mrb, sizeof(*f)); + mrb_irep_debug_info_file *f = (mrb_irep_debug_info_file*)mrb_malloc(mrb, sizeof(*f)); d->files = (mrb_irep_debug_info_file**)mrb_realloc(mrb, d->files, sizeof(mrb_irep_debug_info_file*) * (d->flen + 1)); d->files[d->flen++] = f; - file_pc_count = end_pos - start_pos; - + uint32_t file_pc_count = end_pos - start_pos; f->start_pos = start_pos; d->pc_count = end_pos; - fn_len = strlen(filename); + size_t fn_len = strlen(filename); f->filename_sym = mrb_intern(mrb, filename, fn_len); f->line_type = mrb_debug_line_packed_map; f->lines.ptr = NULL; @@ -200,9 +243,9 @@ mrb_debug_info_append_file(mrb_state *mrb, mrb_irep_debug_info *d, uint16_t prev_line = 0; uint32_t prev_pc = 0; size_t packed_size = 0; - uint8_t *p, *pend; + uint8_t *p; - for (i = 0; i < file_pc_count; ++i) { + for (uint32_t i = 0; i < file_pc_count; i++) { if (lines[start_pos + i] == prev_line) continue; packed_size += mrb_packed_int_len(start_pos+i-prev_pc); prev_pc = start_pos+i; @@ -210,13 +253,12 @@ mrb_debug_info_append_file(mrb_state *mrb, mrb_irep_debug_info *d, prev_line = lines[start_pos + i]; } f->lines.packed_map = p = (uint8_t*)mrb_malloc(mrb, packed_size); - pend = p + packed_size; prev_line = 0; prev_pc = 0; - for (i = 0; i < file_pc_count; ++i) { + for (uint32_t i = 0; i < file_pc_count; i++) { if (lines[start_pos + i] == prev_line) continue; - p += mrb_packed_int_encode(start_pos+i-prev_pc, p, pend); + p += mrb_packed_int_encode(start_pos+i-prev_pc, p); prev_pc = start_pos + i; - p += mrb_packed_int_encode(lines[start_pos + i]-prev_line, p, pend); + p += mrb_packed_int_encode(lines[start_pos + i]-prev_line, p); prev_line = lines[start_pos + i]; } f->line_entry_count = (uint32_t)packed_size; @@ -224,15 +266,19 @@ mrb_debug_info_append_file(mrb_state *mrb, mrb_irep_debug_info *d, return f; } +/* + * Frees the memory allocated for an `mrb_irep_debug_info` structure `d` + * and all its associated data, including file information and line data. + * It takes the mruby state `mrb` and the debug info structure `d` to be freed. + * If `d` is NULL, the function does nothing. + */ MRB_API void mrb_debug_info_free(mrb_state *mrb, mrb_irep_debug_info *d) { - uint32_t i; - if (!d) { return; } if (d->files) { - for (i = 0; i < d->flen; ++i) { + for (uint32_t i = 0; i < d->flen; i++) { if (d->files[i]) { mrb_free(mrb, d->files[i]->lines.ptr); mrb_free(mrb, d->files[i]); diff --git a/src/dump.c b/src/dump.c index c0ad4b6899..b6e6e90d64 100644 --- a/src/dump.c +++ b/src/dump.c @@ -32,6 +32,16 @@ get_irep_header_size(mrb_state *mrb) return size; } +/** + * Writes the header of an IREP (Intermediate Representation) record to the provided buffer. + * This header includes information like the record size, number of local variables, + * number of registers, and number of child IREPs. + * + * @param mrb The mruby state. (Primarily used for `get_irep_record_size_1`) + * @param irep Pointer to the IREP structure whose header is to be written. + * @param buf Pointer to the buffer where the header will be written. + * @return `ptrdiff_t` representing the number of bytes written to the buffer. + */ static ptrdiff_t write_irep_header(mrb_state *mrb, const mrb_irep *irep, uint8_t *buf) { @@ -58,6 +68,17 @@ get_iseq_block_size(mrb_state *mrb, const mrb_irep *irep) return size; } +/** + * Writes the instruction sequence (iseq) block of an IREP to the provided buffer. + * This block includes the number of catch handlers, the number of opcodes, + * and the instruction sequence itself along with catch handler data. + * + * @param mrb The mruby state (currently unused in the function body but good to document). + * @param irep Pointer to the IREP structure whose instruction sequence is to be written. + * @param buf Pointer to the buffer where the instruction sequence block will be written. + * @param flags Flags to control the dump process (currently unused in this specific function but part of its signature). + * @return `ptrdiff_t` representing the number of bytes written to the buffer. + */ static ptrdiff_t write_iseq_block(mrb_state *mrb, const mrb_irep *irep, uint8_t *buf, uint8_t flags) { @@ -74,6 +95,15 @@ write_iseq_block(mrb_state *mrb, const mrb_irep *irep, uint8_t *buf, uint8_t fla } #ifndef MRB_NO_FLOAT +/** + * Dumps an `mrb_float` value into the provided buffer as a `double` in IEEE 754 + * binary format, ensuring little-endian byte order. If the system is already + * little-endian, it uses `memcpy`. Otherwise, it manually reverses the bytes. + * + * @param mrb The mruby state (currently unused in the function body but good to document). + * @param buf Pointer to the buffer where the float data will be written. + * @param f The float value to be dumped. + */ static void dump_float(mrb_state *mrb, uint8_t *buf, mrb_float f) { @@ -81,31 +111,37 @@ dump_float(mrb_state *mrb, uint8_t *buf, mrb_float f) union { double f; char s[sizeof(double)]; - } u = {.f = (double)f}; + } u = {(double)f}; if (littleendian) { memcpy(buf, u.s, sizeof(double)); } else { - size_t i; - - for (i=0; iplen * sizeof(uint8_t); /* len(n) */ - for (pool_no = 0; pool_no < irep->plen; pool_no++) { + for (int pool_no = 0; pool_no < irep->plen; pool_no++) { int ai = mrb_gc_arena_save(mrb); switch (irep->pool[pool_no].tt) { @@ -124,12 +160,12 @@ get_pool_block_size(mrb_state *mrb, const mrb_irep *irep) /* fall through */ #endif case IREP_TT_INT32: - size += 4; /* 32bits = 4bytes */ + size += 4; /* 32 bits = 4 bytes */ break; case IREP_TT_BIGINT: { - mrb_int len = irep->pool[pool_no].u.str[0]; + mrb_int len = (uint8_t)irep->pool[pool_no].u.str[0]; mrb_assert_int_fit(mrb_int, len, size_t, SIZE_MAX); size += (size_t)len+2; } @@ -158,17 +194,28 @@ get_pool_block_size(mrb_state *mrb, const mrb_irep *irep) return size; } +/** + * Writes the literal pool of an IREP to the provided buffer. + * It iterates through each entry in the pool, determines its type + * (integer, float, string, bigint), and writes the type identifier + * and a binary representation of the value to the buffer. + * + * @param mrb The mruby state, used for garbage collection management + * (`mrb_gc_arena_save`/`restore`) and potentially for `dump_float`. + * @param irep Pointer to the IREP structure whose literal pool is to be written. + * @param buf Pointer to the buffer where the literal pool data will be written. + * @return `ptrdiff_t` representing the number of bytes written to the buffer. + */ static ptrdiff_t write_pool_block(mrb_state *mrb, const mrb_irep *irep, uint8_t *buf) { - int pool_no; uint8_t *cur = buf; mrb_int len; const char *ptr; cur += uint16_to_bin(irep->plen, cur); /* number of pool */ - for (pool_no = 0; pool_no < irep->plen; pool_no++) { + for (int pool_no = 0; pool_no < irep->plen; pool_no++) { int ai = mrb_gc_arena_save(mrb); switch (irep->pool[pool_no].tt) { @@ -195,7 +242,7 @@ write_pool_block(mrb_state *mrb, const mrb_irep *irep, uint8_t *buf) case IREP_TT_BIGINT: cur += uint8_to_bin(IREP_TT_BIGINT, cur); /* data type */ - len = irep->pool[pool_no].u.str[0]; + len = (uint8_t)irep->pool[pool_no].u.str[0]; memcpy(cur, irep->pool[pool_no].u.str, (size_t)len+2); cur += len+2; break; @@ -229,17 +276,24 @@ write_pool_block(mrb_state *mrb, const mrb_irep *irep, uint8_t *buf) return cur - buf; } +/** + * Calculates the total size in bytes required to store the symbol block of an IREP. + * This includes the count of symbols and, for each symbol, its length and + * the string representation (including a null terminator). + * + * @param mrb The mruby state, used for `mrb_sym_name_len` to get symbol details. + * @param irep Pointer to the IREP structure whose symbol block size is to be calculated. + * @return `size_t` representing the total calculated size of the symbol block in bytes. + */ static size_t get_syms_block_size(mrb_state *mrb, const mrb_irep *irep) { - size_t size = 0; - int sym_no; - mrb_int len; + size_t size = sizeof(uint16_t); /* slen */ - size += sizeof(uint16_t); /* slen */ - for (sym_no = 0; sym_no < irep->slen; sym_no++) { + for (int sym_no = 0; sym_no < irep->slen; sym_no++) { size += sizeof(uint16_t); /* snl(n) */ if (irep->syms[sym_no] != 0) { + mrb_int len; mrb_sym_name_len(mrb, irep->syms[sym_no], &len); size += len + 1; /* sn(n) + null char */ } @@ -248,20 +302,28 @@ get_syms_block_size(mrb_state *mrb, const mrb_irep *irep) return size; } +/** + * Writes the symbol block of an IREP to the provided buffer. + * It first writes the number of symbols. Then, for each symbol, it writes the + * length of the symbol's string representation followed by the string itself + * and a null terminator. Handles null symbols by writing `MRB_DUMP_NULL_SYM_LEN`. + * + * @param mrb The mruby state, used for `mrb_sym_name_len` to get symbol details. + * @param irep Pointer to the IREP structure whose symbol block is to be written. + * @param buf Pointer to the buffer where the symbol block data will be written. + * @return `ptrdiff_t` representing the number of bytes written to the buffer. + */ static ptrdiff_t write_syms_block(mrb_state *mrb, const mrb_irep *irep, uint8_t *buf) { - int sym_no; uint8_t *cur = buf; - const char *name; cur += uint16_to_bin(irep->slen, cur); /* number of symbol */ - for (sym_no = 0; sym_no < irep->slen; sym_no++) { + for (int sym_no = 0; sym_no < irep->slen; sym_no++) { if (irep->syms[sym_no] != 0) { mrb_int len; - - name = mrb_sym_name_len(mrb, irep->syms[sym_no], &len); + const char *name = mrb_sym_name_len(mrb, irep->syms[sym_no], &len); mrb_assert_int_fit(mrb_int, len, uint16_t, UINT16_MAX); cur += uint16_to_bin((uint16_t)len, cur); /* length of symbol name */ @@ -280,23 +342,29 @@ write_syms_block(mrb_state *mrb, const mrb_irep *irep, uint8_t *buf) static size_t get_irep_record_size_1(mrb_state *mrb, const mrb_irep *irep) { - size_t size = 0; - - size += get_irep_header_size(mrb); + size_t size = get_irep_header_size(mrb); size += get_iseq_block_size(mrb, irep); size += get_pool_block_size(mrb, irep); size += get_syms_block_size(mrb, irep); return size; } +/** + * Recursively calculates the total size in bytes of an IREP record. + * This includes the size of the current IREP's own data (header, iseq, pool, + * symbols - obtained via `get_irep_record_size_1`) and the sizes of all + * its child IREPs (reps). + * + * @param mrb The mruby state, passed through to helper functions. + * @param irep Pointer to the IREP structure for which the record size is to be calculated. + * @return `size_t` representing the total calculated size of the IREP record and its children in bytes. + */ static size_t get_irep_record_size(mrb_state *mrb, const mrb_irep *irep) { - size_t size = 0; - int irep_no; + size_t size = get_irep_record_size_1(mrb, irep); - size = get_irep_record_size_1(mrb, irep); - for (irep_no = 0; irep_no < irep->rlen; irep_no++) { + for (int irep_no = 0; irep_no < irep->rlen; irep_no++) { size += get_irep_record_size(mrb, irep->reps[irep_no]); } return size; @@ -305,7 +373,6 @@ get_irep_record_size(mrb_state *mrb, const mrb_irep *irep) static int write_irep_record(mrb_state *mrb, const mrb_irep *irep, uint8_t *bin, size_t *irep_record_size, uint8_t flags) { - int i; uint8_t *src = bin; if (irep == NULL) { @@ -317,7 +384,7 @@ write_irep_record(mrb_state *mrb, const mrb_irep *irep, uint8_t *bin, size_t *ir bin += write_pool_block(mrb, irep, bin); bin += write_syms_block(mrb, irep, bin); - for (i = 0; i < irep->rlen; i++) { + for (int i = 0; i < irep->rlen; i++) { int result; size_t rsize; @@ -361,8 +428,6 @@ write_section_irep_header(mrb_state *mrb, size_t section_size, uint8_t *bin) static int write_section_irep(mrb_state *mrb, const mrb_irep *irep, uint8_t *bin, size_t *len_p, uint8_t flags) { - int result; - size_t rsize = 0; uint8_t *cur = bin; if (mrb == NULL || bin == NULL) { @@ -371,7 +436,8 @@ write_section_irep(mrb_state *mrb, const mrb_irep *irep, uint8_t *bin, size_t *l cur += sizeof(struct rite_section_irep_header); - result = write_irep_record(mrb, irep, cur, &rsize, flags); + size_t rsize = 0; + int result = write_irep_record(mrb, irep, cur, &rsize, flags); if (result != MRB_DUMP_OK) { return result; } @@ -385,14 +451,10 @@ write_section_irep(mrb_state *mrb, const mrb_irep *irep, uint8_t *bin, size_t *l static size_t get_debug_record_size(mrb_state *mrb, const mrb_irep *irep) { - size_t ret = 0; - uint16_t f_idx; - int i; - - ret += sizeof(uint32_t); /* record size */ + size_t ret = sizeof(uint32_t); /* record size */ ret += sizeof(uint16_t); /* file count */ - for (f_idx = 0; f_idx < irep->debug_info->flen; ++f_idx) { + for (uint16_t f_idx = 0; f_idx < irep->debug_info->flen; f_idx++) { mrb_irep_debug_info_file const* file = irep->debug_info->files[f_idx]; ret += sizeof(uint32_t); /* position */ @@ -417,7 +479,7 @@ get_debug_record_size(mrb_state *mrb, const mrb_irep *irep) default: mrb_assert(0); break; } } - for (i=0; irlen; i++) { + for (int i=0; irlen; i++) { ret += get_debug_record_size(mrb, irep->reps[i]); } @@ -427,10 +489,8 @@ get_debug_record_size(mrb_state *mrb, const mrb_irep *irep) static int find_filename_index(const mrb_sym *ary, int ary_len, mrb_sym s) { - int i; - - for (i = 0; i < ary_len; ++i) { - if (ary[i] == s) { return i; } + for (int i = 0; i < ary_len; i++) { + if (ary[i] == s) return i; } return -1; } @@ -441,26 +501,23 @@ get_filename_table_size(mrb_state *mrb, const mrb_irep *irep, mrb_sym **fp, uint mrb_sym *filenames = *fp; size_t size = 0; const mrb_irep_debug_info *di = irep->debug_info; - int i; mrb_assert(lp); - for (i = 0; i < di->flen; ++i) { - mrb_irep_debug_info_file *file; - mrb_int filename_len; - - file = di->files[i]; + for (int i = 0; i < di->flen; i++) { + mrb_irep_debug_info_file *file = di->files[i]; if (find_filename_index(filenames, *lp, file->filename_sym) == -1) { /* register filename */ *lp += 1; - *fp = filenames = (mrb_sym *)mrb_realloc(mrb, filenames, sizeof(mrb_sym) * (*lp)); + *fp = filenames = (mrb_sym*)mrb_realloc(mrb, filenames, sizeof(mrb_sym) * (*lp)); filenames[*lp - 1] = file->filename_sym; /* filename */ + mrb_int filename_len; mrb_sym_name_len(mrb, file->filename_sym, &filename_len); size += sizeof(uint16_t) + (size_t)filename_len; } } - for (i=0; irlen; i++) { + for (int i=0; irlen; i++) { size += get_filename_table_size(mrb, irep->reps[i], fp, lp); } return size; @@ -470,13 +527,11 @@ static size_t write_debug_record_1(mrb_state *mrb, const mrb_irep *irep, uint8_t *bin, mrb_sym const* filenames, uint16_t filenames_len) { uint8_t *cur; - uint16_t f_idx; - ptrdiff_t ret; cur = bin + sizeof(uint32_t); /* skip record size */ cur += uint16_to_bin(irep->debug_info->flen, cur); /* file count */ - for (f_idx = 0; f_idx < irep->debug_info->flen; ++f_idx) { + for (int f_idx = 0; f_idx < irep->debug_info->flen; f_idx++) { int filename_idx; const mrb_irep_debug_info_file *file = irep->debug_info->files[f_idx]; @@ -495,14 +550,14 @@ write_debug_record_1(mrb_state *mrb, const mrb_irep *irep, uint8_t *bin, mrb_sym switch (file->line_type) { case mrb_debug_line_ary: { uint32_t l; - for (l = 0; l < file->line_entry_count; ++l) { + for (l = 0; l < file->line_entry_count; l++) { cur += uint16_to_bin(file->lines.ary[l], cur); } } break; case mrb_debug_line_flat_map: { uint32_t line; - for (line = 0; line < file->line_entry_count; ++line) { + for (line = 0; line < file->line_entry_count; line++) { cur += uint32_to_bin(file->lines.flat_map[line].start_pos, cur); cur += uint16_to_bin(file->lines.flat_map[line].line, cur); } @@ -517,7 +572,7 @@ write_debug_record_1(mrb_state *mrb, const mrb_irep *irep, uint8_t *bin, mrb_sym } } - ret = cur - bin; + ptrdiff_t ret = cur - bin; mrb_assert_int_fit(ptrdiff_t, ret, uint32_t, UINT32_MAX); uint32_to_bin((uint32_t)ret, bin); @@ -528,13 +583,11 @@ write_debug_record_1(mrb_state *mrb, const mrb_irep *irep, uint8_t *bin, mrb_sym static size_t write_debug_record(mrb_state *mrb, const mrb_irep *irep, uint8_t *bin, mrb_sym const* filenames, uint16_t filenames_len) { - size_t size, len; - int irep_no; + size_t size = write_debug_record_1(mrb, irep, bin, filenames, filenames_len); - size = len = write_debug_record_1(mrb, irep, bin, filenames, filenames_len); - bin += len; - for (irep_no = 0; irep_no < irep->rlen; irep_no++) { - len = write_debug_record(mrb, irep->reps[irep_no], bin, filenames, filenames_len); + bin += size; + for (int irep_no = 0; irep_no < irep->rlen; irep_no++) { + size_t len = write_debug_record(mrb, irep->reps[irep_no], bin, filenames, filenames_len); bin += len; size += len; } @@ -546,26 +599,23 @@ write_debug_record(mrb_state *mrb, const mrb_irep *irep, uint8_t *bin, mrb_sym c static int write_section_debug(mrb_state *mrb, const mrb_irep *irep, uint8_t *cur, mrb_sym const *filenames, uint16_t filenames_len) { - size_t section_size = 0; const uint8_t *bin = cur; - struct rite_section_debug_header *header; - size_t dlen; - uint16_t i; - char const *sym; mrb_int sym_len; if (mrb == NULL || cur == NULL) { return MRB_DUMP_INVALID_ARGUMENT; } - header = (struct rite_section_debug_header *)bin; - cur += sizeof(struct rite_section_debug_header); - section_size += sizeof(struct rite_section_debug_header); + struct rite_section_debug_header *header = (struct rite_section_debug_header*)bin; + size_t section_size = sizeof(struct rite_section_debug_header); + cur += section_size; /* filename table */ cur += uint16_to_bin(filenames_len, cur); section_size += sizeof(uint16_t); - for (i = 0; i < filenames_len; ++i) { - sym = mrb_sym_name_len(mrb, filenames[i], &sym_len); + for (int i = 0; i < filenames_len; i++) { + mrb_int sym_len; + char const *sym = mrb_sym_name_len(mrb, filenames[i], &sym_len); + mrb_assert(sym); cur += uint16_to_bin((uint16_t)sym_len, cur); memcpy(cur, sym, sym_len); @@ -574,7 +624,7 @@ write_section_debug(mrb_state *mrb, const mrb_irep *irep, uint8_t *cur, mrb_sym } /* debug records */ - dlen = write_debug_record(mrb, irep, cur, filenames, filenames_len); + size_t dlen = write_debug_record(mrb, irep, cur, filenames, filenames_len); section_size += dlen; memcpy(header->section_ident, RITE_SECTION_DEBUG_IDENT, sizeof(header->section_ident)); @@ -587,23 +637,21 @@ write_section_debug(mrb_state *mrb, const mrb_irep *irep, uint8_t *cur, mrb_sym static void create_lv_sym_table(mrb_state *mrb, const mrb_irep *irep, mrb_sym **syms, uint32_t *syms_len) { - int i; - if (*syms == NULL) { *syms = (mrb_sym*)mrb_malloc(mrb, sizeof(mrb_sym) * 1); } - for (i = 0; i + 1 < irep->nlocals; ++i) { + for (int i = 0; i + 1 < irep->nlocals; i++) { mrb_sym const name = irep->lv[i]; if (name == 0) continue; if (find_filename_index(*syms, *syms_len, name) != -1) continue; - ++(*syms_len); + (*syms_len)++; *syms = (mrb_sym*)mrb_realloc(mrb, *syms, sizeof(mrb_sym) * (*syms_len)); (*syms)[*syms_len - 1] = name; } - for (i = 0; i < irep->rlen; ++i) { + for (int i = 0; i < irep->rlen; i++) { create_lv_sym_table(mrb, irep->reps[i], syms, syms_len); } } @@ -612,14 +660,12 @@ static int write_lv_sym_table(mrb_state *mrb, uint8_t **start, mrb_sym const *syms, uint32_t syms_len) { uint8_t *cur = *start; - uint32_t i; - const char *str; - mrb_int str_len; cur += uint32_to_bin(syms_len, cur); - for (i = 0; i < syms_len; ++i) { - str = mrb_sym_name_len(mrb, syms[i], &str_len); + for (uint32_t i = 0; i < syms_len; i++) { + mrb_int str_len; + const char *str = mrb_sym_name_len(mrb, syms[i], &str_len); cur += uint16_to_bin((uint16_t)str_len, cur); memcpy(cur, str, str_len); cur += str_len; @@ -634,9 +680,8 @@ static int write_lv_record(mrb_state *mrb, const mrb_irep *irep, uint8_t **start, mrb_sym const *syms, uint32_t syms_len) { uint8_t *cur = *start; - int i; - for (i = 0; i + 1 < irep->nlocals; ++i) { + for (int i = 0; i + 1 < irep->nlocals; i++) { if (irep->lv[i] == 0) { cur += uint16_to_bin(RITE_LV_NULL_MARK, cur); } @@ -648,7 +693,7 @@ write_lv_record(mrb_state *mrb, const mrb_irep *irep, uint8_t **start, mrb_sym c } } - for (i = 0; i < irep->rlen; ++i) { + for (int i = 0; i < irep->rlen; i++) { write_lv_record(mrb, irep->reps[i], &cur, syms, syms_len); } @@ -660,12 +705,9 @@ write_lv_record(mrb_state *mrb, const mrb_irep *irep, uint8_t **start, mrb_sym c static size_t get_lv_record_size(mrb_state *mrb, const mrb_irep *irep) { - size_t ret = 0; - int i; + size_t ret = sizeof(uint16_t) * (irep->nlocals - 1); - ret += sizeof(uint16_t) * (irep->nlocals - 1); - - for (i = 0; i < irep->rlen; ++i) { + for (int i = 0; i < irep->rlen; i++) { ret += get_lv_record_size(mrb, irep->reps[i]); } @@ -675,11 +717,9 @@ get_lv_record_size(mrb_state *mrb, const mrb_irep *irep) static size_t get_lv_section_size(mrb_state *mrb, const mrb_irep *irep, mrb_sym const *syms, uint32_t syms_len) { - size_t ret = 0, i; - - ret += sizeof(uint32_t); /* syms_len */ + size_t ret = sizeof(uint32_t); /* syms_len */ ret += sizeof(uint16_t) * syms_len; /* symbol name lengths */ - for (i = 0; i < syms_len; ++i) { + for (uint32_t i = 0; i < syms_len; i++) { mrb_int str_len; mrb_sym_name_len(mrb, syms[i], &str_len); ret += str_len; @@ -694,41 +734,37 @@ static int write_section_lv(mrb_state *mrb, const mrb_irep *irep, uint8_t *start, mrb_sym const *syms, uint32_t const syms_len) { uint8_t *cur = start; - struct rite_section_lv_header *header; - ptrdiff_t diff; - int result = MRB_DUMP_OK; if (mrb == NULL || cur == NULL) { return MRB_DUMP_INVALID_ARGUMENT; } - header = (struct rite_section_lv_header*)cur; + struct rite_section_lv_header *header = (struct rite_section_lv_header*)cur; cur += sizeof(struct rite_section_lv_header); - result = write_lv_sym_table(mrb, &cur, syms, syms_len); + int result = write_lv_sym_table(mrb, &cur, syms, syms_len); if (result != MRB_DUMP_OK) { - goto lv_section_exit; + return result; } result = write_lv_record(mrb, irep, &cur, syms, syms_len); if (result != MRB_DUMP_OK) { - goto lv_section_exit; + return result; } memcpy(header->section_ident, RITE_SECTION_LV_IDENT, sizeof(header->section_ident)); - diff = cur - start; + ptrdiff_t diff = cur - start; mrb_assert_int_fit(ptrdiff_t, diff, size_t, SIZE_MAX); uint32_to_bin((uint32_t)diff, header->section_size); -lv_section_exit: return result; } static int write_rite_binary_header(mrb_state *mrb, size_t binary_size, uint8_t *bin, uint8_t flags) { - struct rite_binary_header *header = (struct rite_binary_header *)bin; + struct rite_binary_header *header = (struct rite_binary_header*)bin; memcpy(header->binary_ident, RITE_BINARY_IDENT, sizeof(header->binary_ident)); memcpy(header->major_version, RITE_BINARY_MAJOR_VER, sizeof(header->major_version)); @@ -744,10 +780,8 @@ write_rite_binary_header(mrb_state *mrb, size_t binary_size, uint8_t *bin, uint8 static mrb_bool debug_info_defined_p(const mrb_irep *irep) { - int i; - if (!irep->debug_info) return FALSE; - for (i=0; irlen; i++) { + for (int i = 0; i < irep->rlen; i++) { if (!debug_info_defined_p(irep->reps[i])) return FALSE; } return TRUE; @@ -756,26 +790,39 @@ debug_info_defined_p(const mrb_irep *irep) static mrb_bool lv_defined_p(const mrb_irep *irep) { - int i; - - if (irep->lv) { return TRUE; } - - for (i = 0; i < irep->rlen; ++i) { - if (lv_defined_p(irep->reps[i])) { return TRUE; } + if (irep->lv) return TRUE; + for (int i = 0; i < irep->rlen; i++) { + if (lv_defined_p(irep->reps[i])) return TRUE; } return FALSE; } -static int -dump_irep(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, uint8_t **bin, size_t *bin_size) +/** + * Dumps an IREP (Intermediate Representation) into a binary format. + * + * This function takes an IREP and converts it into a binary representation that can be + * stored or transmitted. The binary format includes sections for the IREP data, + * debug information (if specified by flags), and local variable information. + * + * @param mrb The mruby state. + * @param irep The IREP to dump. + * @param flags Flags to control the dump process (e.g., MRB_DUMP_DEBUG_INFO). + * @param bin A pointer to a buffer where the binary data will be stored. + * The buffer is allocated by this function and must be freed by the caller + * using mrb_free(). + * @param bin_size A pointer to a variable where the size of the binary data will be stored. + * + * @return MRB_DUMP_OK on success, or an error code (e.g., MRB_DUMP_GENERAL_FAILURE, + * MRB_DUMP_INVALID_ARGUMENT) on failure. + */ +int +mrb_dump_irep(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, uint8_t **bin, size_t *bin_size) { - int result = MRB_DUMP_GENERAL_FAILURE; - size_t malloc_size; - size_t section_irep_size; size_t section_lineno_size = 0, section_lv_size = 0; uint8_t *cur = NULL; - mrb_bool const debug_info_defined = debug_info_defined_p(irep), lv_defined = lv_defined_p(irep); + mrb_bool const debug_info_defined = (flags & MRB_DUMP_DEBUG_INFO) ? debug_info_defined_p(irep) : FALSE; + mrb_bool lv_defined = (flags & MRB_DUMP_NO_LVAR) ? FALSE : lv_defined_p(irep); mrb_sym *lv_syms = NULL; uint32_t lv_syms_len = 0; mrb_sym *filenames = NULL; uint16_t filenames_len = 0; @@ -784,22 +831,16 @@ dump_irep(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, uint8_t **bin, si return MRB_DUMP_GENERAL_FAILURE; } - section_irep_size = sizeof(struct rite_section_irep_header); + size_t section_irep_size = sizeof(struct rite_section_irep_header); section_irep_size += get_irep_record_size(mrb, irep); /* DEBUG section size */ - if (flags & MRB_DUMP_DEBUG_INFO) { - if (debug_info_defined) { - section_lineno_size += sizeof(struct rite_section_debug_header); - /* filename table */ - filenames = (mrb_sym*)mrb_malloc(mrb, sizeof(mrb_sym) + 1); - - /* filename table size */ - section_lineno_size += sizeof(uint16_t); - section_lineno_size += get_filename_table_size(mrb, irep, &filenames, &filenames_len); - - section_lineno_size += get_debug_record_size(mrb, irep); - } + if (debug_info_defined) { + section_lineno_size += sizeof(struct rite_section_debug_header); + /* filename table size */ + section_lineno_size += sizeof(uint16_t); + section_lineno_size += get_filename_table_size(mrb, irep, &filenames, &filenames_len); + section_lineno_size += get_debug_record_size(mrb, irep); } if (lv_defined) { @@ -808,13 +849,13 @@ dump_irep(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, uint8_t **bin, si section_lv_size += get_lv_section_size(mrb, irep, lv_syms, lv_syms_len); } - malloc_size = sizeof(struct rite_binary_header) + - section_irep_size + section_lineno_size + section_lv_size + - sizeof(struct rite_binary_footer); + size_t malloc_size = sizeof(struct rite_binary_header) + + section_irep_size + section_lineno_size + section_lv_size + + sizeof(struct rite_binary_footer); cur = *bin = (uint8_t*)mrb_malloc(mrb, malloc_size); cur += sizeof(struct rite_binary_header); - result = write_section_irep(mrb, irep, cur, §ion_irep_size, flags); + int result = write_section_irep(mrb, irep, cur, §ion_irep_size, flags); if (result != MRB_DUMP_OK) { goto error_exit; } @@ -824,12 +865,10 @@ dump_irep(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, uint8_t **bin, si sizeof(struct rite_binary_footer); /* write DEBUG section */ - if (flags & MRB_DUMP_DEBUG_INFO) { - if (debug_info_defined) { - result = write_section_debug(mrb, irep, cur, filenames, filenames_len); - if (result != MRB_DUMP_OK) { - goto error_exit; - } + if ((flags & MRB_DUMP_DEBUG_INFO) && debug_info_defined) { + result = write_section_debug(mrb, irep, cur, filenames, filenames_len); + if (result != MRB_DUMP_OK) { + goto error_exit; } cur += section_lineno_size; } @@ -855,26 +894,33 @@ dump_irep(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, uint8_t **bin, si return result; } -int -mrb_dump_irep(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, uint8_t **bin, size_t *bin_size) -{ - return dump_irep(mrb, irep, flags, bin, bin_size); -} - #ifndef MRB_NO_STDIO +/** + * Dumps an IREP (Intermediate Representation) into a binary format and writes it to a file. + * + * This function first calls `mrb_dump_irep` to get the binary representation of the IREP, + * then writes the binary data to the specified file pointer. + * + * @param mrb The mruby state. + * @param irep The IREP to dump. + * @param flags Flags to control the dump process. + * @param fp The file pointer to write the binary data to. + * + * @return MRB_DUMP_OK on success, or an error code (e.g., MRB_DUMP_INVALID_ARGUMENT, + * MRB_DUMP_WRITE_FAULT) on failure. + */ int mrb_dump_irep_binary(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, FILE* fp) { uint8_t *bin = NULL; - size_t bin_size = 0; - int result; if (fp == NULL) { return MRB_DUMP_INVALID_ARGUMENT; } - result = dump_irep(mrb, irep, flags, &bin, &bin_size); + size_t bin_size; + int result = mrb_dump_irep(mrb, irep, flags, &bin, &bin_size); if (result == MRB_DUMP_OK) { if (fwrite(bin, sizeof(bin[0]), bin_size, fp) != bin_size) { result = MRB_DUMP_WRITE_FAULT; @@ -885,51 +931,57 @@ mrb_dump_irep_binary(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, FILE* return result; } +/** + * Dumps an IREP (Intermediate Representation) as a C source file. + * + * This function converts an IREP into a C source file. The generated file + * will contain a `uint8_t` array holding the binary representation of the IREP. + * + * @param mrb The mruby state. + * @param irep The IREP to dump. + * @param flags Flags to control the dump process (e.g., `MRB_DUMP_STATIC` to + * make the array static). + * @param fp The file pointer to write the C source code to. + * @param initname The name of the `uint8_t` array in the generated C code. + * + * @return MRB_DUMP_OK on success, or an error code (e.g., + * `MRB_DUMP_INVALID_ARGUMENT`, `MRB_DUMP_WRITE_FAULT`) on failure. + */ int mrb_dump_irep_cfunc(mrb_state *mrb, const mrb_irep *irep, uint8_t flags, FILE *fp, const char *initname) { uint8_t *bin = NULL; - size_t bin_size = 0, bin_idx = 0; - int result; if (fp == NULL || initname == NULL || initname[0] == '\0') { return MRB_DUMP_INVALID_ARGUMENT; } - result = dump_irep(mrb, irep, flags, &bin, &bin_size); - if (result == MRB_DUMP_OK) { - if (fprintf(fp, "#include \n") < 0) { /* for uint8_t under at least Darwin */ - mrb_free(mrb, bin); - return MRB_DUMP_WRITE_FAULT; - } - if (fprintf(fp, - "%s\n" - "const uint8_t %s[] = {", - (flags & MRB_DUMP_STATIC) ? "static" - : "#ifdef __cplusplus\n" - "extern\n" - "#endif", - initname) < 0) { - mrb_free(mrb, bin); - return MRB_DUMP_WRITE_FAULT; - } - while (bin_idx < bin_size) { - if (bin_idx % 16 == 0) { - if (fputs("\n", fp) == EOF) { - mrb_free(mrb, bin); - return MRB_DUMP_WRITE_FAULT; - } - } - if (fprintf(fp, "0x%02x,", bin[bin_idx++]) < 0) { - mrb_free(mrb, bin); - return MRB_DUMP_WRITE_FAULT; - } - } - if (fputs("\n};\n", fp) == EOF) { - mrb_free(mrb, bin); - return MRB_DUMP_WRITE_FAULT; + size_t bin_size, bin_idx = 0; + int result = mrb_dump_irep(mrb, irep, flags, &bin, &bin_size); + if (result != MRB_DUMP_OK) goto exit; + + if (fprintf(fp, "#include \n") < 0) /* for uint8_t under at least Darwin */ + goto write_error; + if (fprintf(fp, + "%s\n" + "const uint8_t %s[] = {", + (flags & MRB_DUMP_STATIC) ? "static" + : "#ifdef __cplusplus\n" + "extern\n" + "#endif", + initname) < 0) + goto write_error; + while (bin_idx < bin_size) { + if (bin_idx % 16 == 0) { + if (fputs("\n", fp) == EOF) goto write_error; } + if (fprintf(fp, "0x%02x,", bin[bin_idx++]) < 0) goto write_error; } + if (fputs("\n};\n", fp) == EOF) goto write_error; + goto exit; +write_error: + result = MRB_DUMP_WRITE_FAULT; +exit: mrb_free(mrb, bin); return result; } diff --git a/src/enum.c b/src/enum.c index b959567150..8ca563bc79 100644 --- a/src/enum.c +++ b/src/enum.c @@ -24,7 +24,6 @@ enum_update_hash(mrb_state *mrb, mrb_value self) void mrb_init_enumerable(mrb_state *mrb) { - struct RClass *enumerable; - enumerable = mrb_define_module(mrb, "Enumerable"); /* 15.3.2 */ - mrb_define_module_function(mrb, enumerable, "__update_hash", enum_update_hash, MRB_ARGS_REQ(3)); + struct RClass *enumerable = mrb_define_module_id(mrb, MRB_SYM(Enumerable)); /* 15.3.2 */ + mrb_define_module_function_id(mrb, enumerable, MRB_SYM(__update_hash), enum_update_hash, MRB_ARGS_REQ(3)); } diff --git a/src/error.c b/src/error.c index 8773f13477..9ffd7c3377 100644 --- a/src/error.c +++ b/src/error.c @@ -16,7 +16,6 @@ #include #include #include -#include void mrb_exc_mesg_set(mrb_state *mrb, struct RException *exc, mrb_value mesg) @@ -24,7 +23,7 @@ mrb_exc_mesg_set(mrb_state *mrb, struct RException *exc, mrb_value mesg) if (!mrb_string_p(mesg)) { mesg = mrb_obj_as_string(mrb, mesg); } - exc->mesg = mrb_obj_ptr(mesg); + exc->mesg = mrb_basic_ptr(mesg); mrb_field_write_barrier_value(mrb, (struct RBasic*)exc, mesg); } @@ -80,21 +79,20 @@ exc_initialize(mrb_state *mrb, mrb_value exc) * With no argument, or if the argument is the same as the receiver, * return the receiver. Otherwise, create a new * exception object of the same class as the receiver, but with a - * message equal to string. + * message equal to `string`. * */ static mrb_value exc_exception(mrb_state *mrb, mrb_value self) { - mrb_value exc; mrb_value a; - mrb_int argc; + mrb_int argc = mrb_get_args(mrb, "|o", &a); - argc = mrb_get_args(mrb, "|o", &a); if (argc == 0) return self; if (mrb_obj_equal(mrb, self, a)) return self; - exc = mrb_obj_clone(mrb, self); + + mrb_value exc = mrb_obj_clone(mrb, self); mrb_exc_mesg_set(mrb, mrb_exc_ptr(exc), a); return exc; @@ -112,12 +110,11 @@ static mrb_value exc_to_s(mrb_state *mrb, mrb_value exc) { mrb_value mesg = mrb_exc_mesg_get(mrb, mrb_exc_ptr(exc)); - struct RObject *p; if (!mrb_string_p(mesg)) { return mrb_str_new_cstr(mrb, mrb_obj_classname(mrb, exc)); } - p = mrb_obj_ptr(mesg); + struct RObject *p = mrb_obj_ptr(mesg); if (!p->c) { p->c = mrb->string_class; } @@ -128,9 +125,9 @@ exc_to_s(mrb_state *mrb, mrb_value exc) * call-seq: * exception.inspect -> string * - * Returns this exception's file name, line number, + * Returns this exception's filename, line number, * message and class name. - * If file name or line number is not set, + * If filename or line number is not set, * returns message and class name. */ @@ -139,6 +136,14 @@ mrb_exc_inspect(mrb_state *mrb, mrb_value exc) { mrb_value cname = mrb_mod_to_s(mrb, mrb_obj_value(mrb_obj_class(mrb, exc))); mrb_value mesg = mrb_exc_mesg_get(mrb, mrb_exc_ptr(exc)); /* string or nil */ + return (mrb_nil_p(mesg)||RSTRING_LEN(mesg)==0) ? cname : mrb_format(mrb, "#<%v: %v>", cname, mesg); +} + +mrb_value +mrb_exc_get_output(mrb_state *mrb, struct RObject *exc) +{ + mrb_value cname = mrb_mod_to_s(mrb, mrb_obj_value(mrb_class_real(exc->c))); + mrb_value mesg = mrb_exc_mesg_get(mrb, (struct RException*)exc); /* string or nil */ return (mrb_nil_p(mesg)||RSTRING_LEN(mesg)==0) ? cname : mrb_format(mrb, "%v (%v)", mesg, cname); } @@ -160,7 +165,7 @@ set_backtrace(mrb_state *mrb, mrb_value exc, mrb_value backtrace) p++; } } - mrb_exc_ptr(exc)->backtrace = mrb_obj_ptr(backtrace); + mrb_exc_ptr(exc)->backtrace = mrb_basic_ptr(backtrace); mrb_field_write_barrier_value(mrb, mrb_basic_ptr(exc), backtrace); } @@ -195,12 +200,25 @@ static mrb_noreturn void exc_throw(mrb_state *mrb, mrb_value exc) { if (!mrb->jmp) { - mrb_p(mrb, exc); + mrb_print_error(mrb); abort(); } MRB_THROW(mrb->jmp); } +/* + * Raises the given exception object. + * + * This function sets the provided exception object as the current + * exception in the mruby state and then triggers the exception + * handling mechanism (longjmp). + * + * If the provided object is a 'break' object, it's handled specially. + * If it's not an exception object, a TypeError is raised. + * + * mrb: The mruby state. + * exc: The exception object to raise. + */ MRB_API mrb_noreturn void mrb_exc_raise(mrb_state *mrb, mrb_value exc) { @@ -208,7 +226,7 @@ mrb_exc_raise(mrb_state *mrb, mrb_value exc) mrb->exc = mrb_obj_ptr(exc); } else { - if (!mrb_obj_is_kind_of(mrb, exc, mrb->eException_class)) { + if (mrb_type(exc) != MRB_TT_EXCEPTION) { mrb_raise(mrb, E_TYPE_ERROR, "exception object expected"); } mrb_exc_set(mrb, exc); @@ -216,6 +234,16 @@ mrb_exc_raise(mrb_state *mrb, mrb_value exc) exc_throw(mrb, exc); } +/* + * Creates a new exception of class `c` with the message `msg` and raises it. + * + * This is a convenience function that combines creating an exception + * from a C string and then raising it. + * + * mrb: The mruby state. + * c: The exception class to instantiate. + * msg: The C string message for the exception. + */ MRB_API mrb_noreturn void mrb_raise(mrb_state *mrb, struct RClass *c, const char *msg) { @@ -223,42 +251,41 @@ mrb_raise(mrb_state *mrb, struct RClass *c, const char *msg) } /* - * vsprintf like formatting. + * Formats arguments according to a format string, similar to vsprintf. + * This function is the core of mruby's string formatting capabilities. + * It takes a format string and a va_list of arguments and returns a + * new mruby string with the formatted result. * - * The syntax of a format sequence is as follows. + * The format string supports various specifiers to control how arguments + * are converted to strings. * + * Format Sequence Syntax: * %[modifier]specifier * - * The modifiers are: - * - * ----------+------------------------------------------------------------ - * Modifier | Meaning - * ----------+------------------------------------------------------------ - * ! | Convert to string by corresponding `inspect` instead of - * | corresponding `to_s`. - * ----------+------------------------------------------------------------ - * - * The specifiers are: - * - * ----------+----------------+-------------------------------------------- - * Specifier | Argument Type | Note - * ----------+----------------+-------------------------------------------- - * c | char | - * d | int | - * f | mrb_float | - * i | mrb_int | - * l | char*, size_t | Arguments are string and length. - * n | mrb_sym | - * s | char* | Argument is NUL terminated string. - * t | mrb_value | Convert to type (class) of object. - * v,S | mrb_value | - * C | struct RClass* | - * T | mrb_value | Convert to real type (class) of object. - * Y | mrb_value | Same as `!v` if argument is `true`, `false` - * | | or `nil`, otherwise same as `T`. - * % | - | Convert to percent sign itself (no argument - * | | taken). - * ----------+----------------+-------------------------------------------- + * Modifier: + * ! : Use the 'inspect' method for conversion instead of 'to_s'. + * + * Specifiers: + * c : char + * d : int (decimal) + * i : mrb_int (decimal) + * f : mrb_float + * l : char* and size_t (string with length) + * n : mrb_sym (symbol name) + * s : char* (NUL-terminated C string) + * t : mrb_value (type/class of the object) + * v,S: mrb_value (converted using to_s or inspect based on '!') + * C : struct RClass* (class name) + * T : mrb_value (real type/class of the object) + * Y : mrb_value (uses 'inspect' if true, false, or nil, otherwise same as 'T') + * % : Literal '%' character (no argument consumed) + * + * mrb: The mruby state. + * format: The format string. + * ap: The va_list of arguments. + * + * Returns a new mrb_value string containing the formatted output. + * Raises ArgumentError if the format string is malformed. */ MRB_API mrb_value mrb_vformat(mrb_state *mrb, const char *format, va_list ap) @@ -278,7 +305,7 @@ mrb_vformat(mrb_state *mrb, const char *format, va_list ap) if (c == '%') { if (*p == '!') { inspect = TRUE; - ++p; + p++; } if (!*p) break; switch (*p) { @@ -323,6 +350,7 @@ mrb_vformat(mrb_state *mrb, const char *format, va_list ap) goto L_cat_obj; case 's': chars = va_arg(ap, char*); + if (chars == NULL) chars = "(null)"; len = strlen(chars); goto L_cat; case 't': @@ -380,14 +408,27 @@ mrb_vformat(mrb_state *mrb, const char *format, va_list ap) return result; } +/* + * Formats arguments according to a format string, similar to sprintf. + * + * This function takes a format string and a variable number of arguments, + * then calls mrb_vformat to perform the actual formatting. + * See mrb_vformat for details on the format string specifiers. + * + * mrb: The mruby state. + * format: The format string. + * ...: Variable arguments to be formatted. + * + * Returns a new mrb_value string containing the formatted output. + */ MRB_API mrb_value mrb_format(mrb_state *mrb, const char *format, ...) { va_list ap; - mrb_value str; va_start(ap, format); - str = mrb_vformat(mrb, format, ap); + + mrb_value str = mrb_vformat(mrb, format, ap); va_end(ap); return str; @@ -396,45 +437,83 @@ mrb_format(mrb_state *mrb, const char *format, ...) static mrb_value error_va(mrb_state *mrb, struct RClass *c, const char *fmt, va_list ap) { - mrb_value mesg = mrb_vformat(mrb, fmt, ap); - return mrb_exc_new_str(mrb, c, mesg); + return mrb_exc_new_str(mrb, c, mrb_vformat(mrb, fmt, ap)); } +/* + * Creates a new exception of class `c` with a formatted message and raises it. + * + * This function formats a message string using `fmt` and the subsequent + * variable arguments, then creates an exception of class `c` with this + * message, and finally raises the exception. + * See mrb_vformat for details on the format string specifiers. + * + * mrb: The mruby state. + * c: The exception class to instantiate. + * fmt: The format string for the exception message. + * ...: Variable arguments for the format string. + */ MRB_API mrb_noreturn void mrb_raisef(mrb_state *mrb, struct RClass *c, const char *fmt, ...) { va_list ap; - mrb_value exc; va_start(ap, fmt); - exc = error_va(mrb, c, fmt, ap); + + mrb_value exc = error_va(mrb, c, fmt, ap); va_end(ap); mrb_exc_raise(mrb, exc); } +/* + * Raises a NameError exception with a formatted message. + * + * This function creates a NameError exception. The message is generated + * from `fmt` and the variable arguments. The symbol `id` (e.g., the name + * of a missing constant or variable) is associated with the exception object + * via an instance variable named 'name'. + * See mrb_vformat for details on the format string specifiers. + * + * mrb: The mruby state. + * id: The symbol representing the name that caused the error. + * fmt: The format string for the exception message. + * ...: Variable arguments for the format string. + */ MRB_API mrb_noreturn void mrb_name_error(mrb_state *mrb, mrb_sym id, const char *fmt, ...) { va_list ap; - mrb_value exc; va_start(ap, fmt); - exc = error_va(mrb, E_NAME_ERROR, fmt, ap); + + mrb_value exc = error_va(mrb, E_NAME_ERROR, fmt, ap); va_end(ap); mrb_iv_set(mrb, exc, MRB_IVSYM(name), mrb_symbol_value(id)); mrb_exc_raise(mrb, exc); } +/* + * Prints a warning message to stderr. + * + * The message is formatted using `fmt` and the subsequent variable arguments. + * The output is prefixed with "warning: " and followed by a newline. + * This function does nothing if MRB_NO_STDIO is defined. + * See mrb_vformat for details on the format string specifiers. + * + * mrb: The mruby state. + * fmt: The format string for the warning message. + * ...: Variable arguments for the format string. + */ MRB_API void mrb_warn(mrb_state *mrb, const char *fmt, ...) { #ifndef MRB_NO_STDIO va_list ap; - mrb_value str; va_start(ap, fmt); - str = mrb_vformat(mrb, fmt, ap); + + mrb_value str = mrb_vformat(mrb, fmt, ap); fputs("warning: ", stderr); fwrite(RSTRING_PTR(str), RSTRING_LEN(str), 1, stderr); putc('\n', stderr); @@ -442,112 +521,200 @@ mrb_warn(mrb_state *mrb, const char *fmt, ...) #endif } +/* + * Reports an internal mruby bug, prints a message to stderr, and terminates the program. + * + * This function is called when an unexpected internal error occurs within mruby. + * It prints the given message prefixed with "bug: " to stderr and then + * calls exit(EXIT_FAILURE). + * If MRB_NO_STDIO is defined, the message is not printed, but the program still exits. + * + * mrb: The mruby state (currently unused in the function body but part of the API). + * mesg: The C string message describing the bug. + */ MRB_API mrb_noreturn void -mrb_bug(mrb_state *mrb, const char *fmt, ...) +mrb_bug(mrb_state *mrb, const char *mesg) { #ifndef MRB_NO_STDIO - va_list ap; - mrb_value str; - - va_start(ap, fmt); - str = mrb_vformat(mrb, fmt, ap); fputs("bug: ", stderr); - fwrite(RSTRING_PTR(str), RSTRING_LEN(str), 1, stderr); - va_end(ap); + fputs(mesg, stderr); + fputs("\n", stderr); #endif exit(EXIT_FAILURE); } -MRB_API mrb_value -mrb_make_exception(mrb_state *mrb, mrb_int argc, const mrb_value *argv) +mrb_value +mrb_make_exception(mrb_state *mrb, mrb_value exc, mrb_value mesg) { - mrb_value mesg; - int n; - - mesg = mrb_nil_value(); - switch (argc) { - case 0: - break; - case 1: - if (mrb_nil_p(argv[0])) - break; - if (mrb_string_p(argv[0])) { - mesg = mrb_exc_new_str(mrb, E_RUNTIME_ERROR, argv[0]); - break; - } - n = 0; - goto exception_call; - - case 2: - case 3: - n = 1; -exception_call: - { - mrb_sym exc = MRB_SYM(exception); - if (mrb_respond_to(mrb, argv[0], exc)) { - mesg = mrb_funcall_argv(mrb, argv[0], exc, n, argv+1); - } - else { - /* undef */ - mrb_raise(mrb, E_TYPE_ERROR, "exception class/object expected"); - } - } + mrb_int n = 1; - break; - default: - mrb_argnum_error(mrb, argc, 0, 3); - break; + if (mrb_nil_p(mesg)) { + n = 0; } - if (argc > 0) { - if (!mrb_obj_is_kind_of(mrb, mesg, mrb->eException_class)) - mrb_raise(mrb, mrb->eException_class, "exception object expected"); - if (argc > 2) - set_backtrace(mrb, mesg, argv[2]); + if (mrb_class_p(exc)) { + exc = mrb_funcall_argv(mrb, exc, MRB_SYM(new), n, &mesg); } - - return mesg; + else if (mrb_exception_p(exc)) { + if (n > 0) { + exc = mrb_obj_clone(mrb, exc); + mrb_exc_mesg_set(mrb, mrb_exc_ptr(exc), mesg); + } + } + else { + mrb_raise(mrb, E_TYPE_ERROR, "exception class/object expected"); + } + if (mrb_type(exc) != MRB_TT_EXCEPTION) { + mrb_raise(mrb, E_EXCEPTION, "exception object expected"); + } + return exc; } +/* + * Raises a SystemCallError if available, otherwise a RuntimeError, + * based on the current `errno` value. + * + * If the SystemCallError class is defined, this function attempts to call + * its `_sys_fail` method with the current `errno` and an optional + * message. This typically results in a SystemCallError being raised. + * + * If SystemCallError is not defined, or if the call to `_sys_fail` + * itself fails (which shouldn't happen in normal circumstances but leads + * to mrb_raise), it falls back to raising a RuntimeError with the + * given message (or a default message if `mesg` is NULL, though the + * current implementation would pass NULL to mrb_raise which might be + * an issue). + * + * mrb: The mruby state. + * mesg: An optional C string message to append to the error. If NULL, + * a default message or no message might be used depending on the + * error path. + */ MRB_API mrb_noreturn void mrb_sys_fail(mrb_state *mrb, const char *mesg) { - struct RClass *sce; - mrb_int no; + mrb_int no = (mrb_int)errno; + mrb_value mesg_str = mesg ? mrb_str_new_cstr(mrb, mesg) : mrb_nil_value(); - no = (mrb_int)errno; if (mrb_class_defined_id(mrb, MRB_SYM(SystemCallError))) { - sce = mrb_class_get_id(mrb, MRB_SYM(SystemCallError)); + struct RClass *sce = mrb_class_get_id(mrb, MRB_SYM(SystemCallError)); if (mesg != NULL) { - mrb_funcall_id(mrb, mrb_obj_value(sce), MRB_SYM(_sys_fail), 2, mrb_fixnum_value(no), mrb_str_new_cstr(mrb, mesg)); + mrb_funcall_argv2(mrb, mrb_obj_value(sce), MRB_SYM(_sys_fail), mrb_fixnum_value(no), mesg_str); } else { - mrb_funcall_id(mrb, mrb_obj_value(sce), MRB_SYM(_sys_fail), 1, mrb_fixnum_value(no)); + mrb_funcall_argv1(mrb, mrb_obj_value(sce), MRB_SYM(_sys_fail), mrb_fixnum_value(no)); } } - mrb_raise(mrb, E_RUNTIME_ERROR, mesg); + mrb_exc_raise(mrb, mrb_exc_new_str(mrb, E_RUNTIME_ERROR, mesg ? mesg_str : mrb_str_new_lit(mrb, ""))); } +/* + * Raises a NoMethodError exception with a formatted message. + * + * This function creates a NoMethodError. The message is generated from + * `fmt` and the variable arguments. The symbol `id` (the name of the + * missing method) and `args` (the arguments passed to the method) + * are associated with the exception object via instance variables + * named 'name' and 'args', respectively. + * See mrb_vformat for details on the format string specifiers. + * + * mrb: The mruby state. + * id: The symbol representing the name of the undefined method. + * args: The arguments that were passed to the method call. + * fmt: The format string for the exception message. + * ...: Variable arguments for the format string. + */ MRB_API mrb_noreturn void mrb_no_method_error(mrb_state *mrb, mrb_sym id, mrb_value args, char const* fmt, ...) { va_list ap; - mrb_value exc; va_start(ap, fmt); - exc = error_va(mrb, E_NOMETHOD_ERROR, fmt, ap); + + mrb_value exc = error_va(mrb, E_NOMETHOD_ERROR, fmt, ap); va_end(ap); mrb_iv_set(mrb, exc, MRB_IVSYM(name), mrb_symbol_value(id)); mrb_iv_set(mrb, exc, MRB_IVSYM(args), args); mrb_exc_raise(mrb, exc); } +static mrb_noreturn void +frozen_error(mrb_state *mrb, mrb_value v) +{ + mrb_raisef(mrb, E_FROZEN_ERROR, "can't modify frozen %T", v); +} + +/* + * Raises a FrozenError for the given frozen object. + * + * This function is called when an attempt is made to modify an object + * that has been frozen. It constructs and raises a FrozenError, + * indicating the specific object that could not be modified. + * + * mrb: The mruby state. + * frozen_obj: A pointer to the RBasic structure of the frozen object. + */ MRB_API mrb_noreturn void mrb_frozen_error(mrb_state *mrb, void *frozen_obj) { - mrb_raisef(mrb, E_FROZEN_ERROR, "can't modify frozen %t", mrb_obj_value(frozen_obj)); + frozen_error(mrb, mrb_obj_value(frozen_obj)); } +/* + * Checks if the given object is frozen. If it is, raises a FrozenError. + * + * This utility function is used before attempting an operation that + * would modify an object, to ensure that the operation is allowed. + * + * mrb: The mruby state. + * o: A pointer to the RBasic structure of the object to check. + */ +MRB_API void +mrb_check_frozen(mrb_state *mrb, void *o) +{ + if (mrb_frozen_p((struct RBasic*)o)) { + mrb_frozen_error(mrb, o); + } +} + +/* + * Checks if the given mrb_value refers to a frozen object. + * If it is frozen, or if it's an immediate value (which are implicitly + * unmodifiable in a way that would trigger a FrozenError for heap objects), + * this function raises a FrozenError. + * + * Note: The check `mrb_immediate_p(v)` combined with `frozen_error` + * might be misleading. Immediate values are not "frozen" in the same + * sense as heap objects. This function effectively raises a FrozenError + * if an attempt is made to modify an immediate value or a + * heap-allocated object that is explicitly frozen. + * + * mrb: The mruby state. + * v: The mrb_value to check. + */ +MRB_API void +mrb_check_frozen_value(mrb_state *mrb, mrb_value v) +{ + if (mrb_immediate_p(v) || mrb_frozen_p(mrb_basic_ptr(v))) { + frozen_error(mrb, v); + } +} + +/* + * Raises an ArgumentError indicating a mismatch in the number of arguments. + * + * This function is used to report errors when a method receives an + * incorrect number of arguments. It formats a message specifying the + * number of arguments received (`argc`) and the expected number, + * which can be an exact number (`min` == `max`), a minimum (`max` < 0), + * or a range (`min` to `max`). + * + * mrb: The mruby state. + * argc: The number of arguments actually received. + * min: The minimum number of arguments expected. + * max: The maximum number of arguments expected. If negative, it means + * `min` or more arguments are expected. + */ MRB_API mrb_noreturn void mrb_argnum_error(mrb_state *mrb, mrb_int argc, int min, int max) { @@ -561,27 +728,21 @@ mrb_argnum_error(mrb_state *mrb, mrb_int argc, int min, int max) #undef FMT } -void mrb_core_init_printabort(void); +void mrb_core_init_printabort(mrb_state *mrb); int -mrb_core_init_protect(mrb_state *mrb, void (*body)(mrb_state *, void *), void *opaque) +mrb_core_init_protect(mrb_state *mrb, void (*body)(mrb_state*, void*), void *opaque) { struct mrb_jmpbuf *prev_jmp = mrb->jmp; struct mrb_jmpbuf c_jmp; - int err = 1; + volatile int err = 1; MRB_TRY(&c_jmp) { mrb->jmp = &c_jmp; body(mrb, opaque); err = 0; } MRB_CATCH(&c_jmp) { - if (mrb->exc) { - mrb_p(mrb, mrb_obj_value(mrb->exc)); - mrb->exc = NULL; - } - else { - mrb_core_init_printabort(); - } + /* Leave mrb->exc set for caller to inspect */ } MRB_END_EXC(&c_jmp); mrb->jmp = prev_jmp; @@ -600,21 +761,60 @@ void mrb_protect_atexit(mrb_state *mrb) { if (mrb->atexit_stack_len > 0) { + if (mrb->c && mrb->c->ci) { + // Even if the call stack is incomplete due to some fault, atexit to be executed at the top level is desirable. + // Clean-up also makes it easier to collect unnecessary objects. + mrb_callinfo zero = { 0 }; + struct mrb_context *c = mrb->c = mrb->root_c; + mrb_gc_arena_restore(mrb, 0); + + if (c->ci == c->cibase) { + // Since there is no problem with the ci, the env object is detached normally. + struct REnv *e = mrb_vm_ci_env(c->ci); + *c->ci = zero; + c->ci->stack = c->stbase; + if (e) { + c->ci->u.env = NULL; + mrb_env_unshare(mrb, e, TRUE); + } + } + else { + // Any env objects on the ci that are in the process of being executed are destroyed. + do { + struct REnv *e = mrb_vm_ci_env(c->ci); + if (e) { + e->stack = NULL; + MRB_ENV_SET_LEN(e, 0); + MRB_ENV_SET_BIDX(e, 0); + MRB_ENV_CLOSE(e); + } + } while (c->ci-- > c->cibase); + c->ci = c->cibase; + *c->ci = zero; + c->ci->stack = c->stbase; + } + } + struct mrb_jmpbuf *prev_jmp = mrb->jmp; struct mrb_jmpbuf c_jmp; - for (int i = mrb->atexit_stack_len; i > 0; --i) { + int i = mrb->atexit_stack_len; + while (i > 0) { MRB_TRY(&c_jmp) { mrb->jmp = &c_jmp; - mrb->atexit_stack[i - 1](mrb); + do { + mrb->atexit_stack[--i](mrb); + mrb_gc_arena_restore(mrb, 0); + } while (i > 0); mrb->jmp = prev_jmp; } MRB_CATCH(&c_jmp) { + mrb->jmp = prev_jmp; /* ignore atexit errors */ + mrb_gc_arena_restore(mrb, 0); } MRB_END_EXC(&c_jmp); } #ifndef MRB_FIXED_STATE_ATEXIT_STACK mrb_free(mrb, mrb->atexit_stack); #endif - mrb->jmp = prev_jmp; } } @@ -629,10 +829,28 @@ mrb_raise_nomemory(mrb_state *mrb) } } +/* + * Prints the current exception and its backtrace to stderr. + * + * If an exception is set in the mruby state (`mrb->exc`), this function + * attempts to print its details, including the class name, message, + * and backtrace. + * It takes precautions to handle potential errors during the backtrace + * printing itself, especially if called from a context without an active + * jump buffer (e.g., top-level error). + * This function does nothing if MRB_NO_STDIO is defined. + * + * mrb: The mruby state. + */ MRB_API void mrb_print_error(mrb_state *mrb) { #ifndef MRB_NO_STDIO + if (!mrb) { + /* mrb_open() returned NULL - allocation failed */ + fputs("Failed to allocate mrb_state\n", stderr); + return; + } if (mrb->jmp == NULL) { struct mrb_jmpbuf c_jmp; MRB_TRY(&c_jmp) { @@ -649,29 +867,70 @@ mrb_print_error(mrb_state *mrb) #endif } +/* + * Clears the current exception status in the mruby state. + * + * After this function is called, `mrb->exc` will be NULL, indicating + * that there is no pending exception. + * + * mrb: The mruby state. + */ +MRB_API void +mrb_clear_error(mrb_state *mrb) +{ + mrb->exc = NULL; +} + +/* returns TRUE if error in the previous call; internally calls mrb_clear_error() */ +MRB_API mrb_bool +mrb_check_error(mrb_state *mrb) +{ + if (mrb->exc) { + mrb_clear_error(mrb); + return TRUE; + } + return FALSE; +} + +/* ---------------------------*/ +static const mrb_mt_entry exception_rom_entries[] = { + MRB_MT_ENTRY(exc_exception, MRB_SYM(exception), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(exc_initialize, MRB_SYM(initialize), MRB_ARGS_OPT(1) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(exc_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), + MRB_MT_ENTRY(exc_to_s, MRB_SYM(message), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_exc_inspect, MRB_SYM(inspect), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_exc_backtrace, MRB_SYM(backtrace), MRB_ARGS_NONE()), + MRB_MT_ENTRY(exc_set_backtrace, MRB_SYM(set_backtrace), MRB_ARGS_REQ(1)), +}; + void mrb_init_exception(mrb_state *mrb) { - struct RClass *exception, *script_error, *stack_error, *nomem_error; - - mrb->eException_class = exception = mrb_define_class(mrb, "Exception", mrb->object_class); /* 15.2.22 */ + struct RClass *exception = mrb->eException_class = mrb_define_class_id(mrb, MRB_SYM(Exception), mrb->object_class); /* 15.2.22 */ MRB_SET_INSTANCE_TT(exception, MRB_TT_EXCEPTION); - mrb_define_class_method(mrb, exception, "exception", mrb_instance_new, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, exception, "exception", exc_exception, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, exception, "initialize", exc_initialize, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, exception, "to_s", exc_to_s, MRB_ARGS_NONE()); - mrb_define_method(mrb, exception, "inspect", mrb_exc_inspect, MRB_ARGS_NONE()); - mrb_define_method(mrb, exception, "backtrace", mrb_exc_backtrace, MRB_ARGS_NONE()); - mrb_define_method(mrb, exception, "set_backtrace", exc_set_backtrace, MRB_ARGS_REQ(1)); - - mrb->eStandardError_class = mrb_define_class(mrb, "StandardError", mrb->eException_class); /* 15.2.23 */ - mrb_define_class(mrb, "RuntimeError", mrb->eStandardError_class); /* 15.2.28 */ - script_error = mrb_define_class(mrb, "ScriptError", mrb->eException_class); /* 15.2.37 */ - mrb_define_class(mrb, "SyntaxError", script_error); /* 15.2.38 */ - stack_error = mrb_define_class(mrb, "SystemStackError", exception); + mrb_define_class_method_id(mrb, exception, MRB_SYM(exception), mrb_instance_new, MRB_ARGS_OPT(1)); + MRB_MT_INIT_ROM(mrb, exception, exception_rom_entries); + + mrb->eStandardError_class = mrb_define_class_id(mrb, MRB_SYM(StandardError), mrb->eException_class); /* 15.2.23 */ + mrb_define_class_id(mrb, MRB_SYM(ArgumentError), E_STANDARD_ERROR); /* 15.2.24 */ + mrb_define_class_id(mrb, MRB_SYM(LocalJumpError), E_STANDARD_ERROR); /* 15.2.25 */ + struct RClass *range_error = mrb_define_class_id(mrb, MRB_SYM(RangeError), E_STANDARD_ERROR); /* 15.2.26 */ + mrb_define_class_id(mrb, MRB_SYM(FloatDomainError), range_error); + mrb_define_class_id(mrb, MRB_SYM(RegexpError), E_STANDARD_ERROR); /* 15.2.27 */ + struct RClass *runtime_error = mrb_define_class_id(mrb, MRB_SYM(RuntimeError), E_STANDARD_ERROR); /* 15.2.28 */ + mrb_define_class_id(mrb, MRB_SYM(FrozenError), runtime_error); + mrb_define_class_id(mrb, MRB_SYM(TypeError), E_STANDARD_ERROR); /* 15.2.29 */ + mrb_define_class_id(mrb, MRB_SYM(ZeroDivisionError), E_STANDARD_ERROR); /* 15.2.30 */ + struct RClass *script_error = mrb_define_class_id(mrb, MRB_SYM(ScriptError), exception); /* 15.2.37 */ + mrb_define_class_id(mrb, MRB_SYM(NotImplementedError), script_error); + mrb_define_class_id(mrb, MRB_SYM(SyntaxError), script_error); /* 15.2.38 */ + struct RClass *index_error = mrb_define_class_id(mrb, MRB_SYM(IndexError), E_STANDARD_ERROR); /* 15.2.33 */ + mrb_define_class_id(mrb, MRB_SYM(KeyError), index_error); + mrb_define_class_id(mrb, MRB_SYM(NoMatchingPatternError), E_STANDARD_ERROR); /* pattern matching */ + struct RClass *stack_error = mrb_define_class_id(mrb, MRB_SYM(SystemStackError), exception); mrb->stack_err = mrb_obj_ptr(mrb_exc_new_lit(mrb, stack_error, "stack level too deep")); - nomem_error = mrb_define_class(mrb, "NoMemoryError", exception); + struct RClass *nomem_error = mrb_define_class_id(mrb, MRB_SYM(NoMemoryError), exception); mrb->nomem_err = mrb_obj_ptr(mrb_exc_new_lit(mrb, nomem_error, "Out of memory")); #ifdef MRB_GC_FIXED_ARENA mrb->arena_err = mrb_obj_ptr(mrb_exc_new_lit(mrb, nomem_error, "arena overflow error")); diff --git a/src/error.h b/src/error.h deleted file mode 100644 index eb755ec7f0..0000000000 --- a/src/error.h +++ /dev/null @@ -1,3 +0,0 @@ -/* this header file is to be removed soon. - added for compatibility purpose (1.0.0) */ -#include diff --git a/src/etc.c b/src/etc.c index 28bfdbf8e7..528ce8f4cd 100644 --- a/src/etc.c +++ b/src/etc.c @@ -11,23 +11,31 @@ #include #include +/* + * Allocates an RData structure, initializes it with the given pointer and type, + * and assigns it to the given class. + */ MRB_API struct RData* mrb_data_object_alloc(mrb_state *mrb, struct RClass *klass, void *ptr, const mrb_data_type *type) { - struct RData *data; + struct RData *data = MRB_OBJ_ALLOC(mrb, MRB_TT_CDATA, klass); - data = MRB_OBJ_ALLOC(mrb, MRB_TT_DATA, klass); data->data = ptr; data->type = type; return data; } +/* + * Checks if the given mrb_value is a data object (MRB_TT_CDATA) and if its + * mrb_data_type matches the provided type. + * Raises an error if the checks fail. + */ MRB_API void mrb_data_check_type(mrb_state *mrb, mrb_value obj, const mrb_data_type *type) { if (!mrb_data_p(obj)) { - mrb_check_type(mrb, obj, MRB_TT_DATA); + mrb_check_type(mrb, obj, MRB_TT_CDATA); } if (DATA_TYPE(obj) != type) { const mrb_data_type *t2 = DATA_TYPE(obj); @@ -43,6 +51,11 @@ mrb_data_check_type(mrb_state *mrb, mrb_value obj, const mrb_data_type *type) } } +/* + * Checks if the given mrb_value is a data object and if its mrb_data_type + * matches the provided type. + * Returns a pointer to the data if the checks pass, otherwise returns NULL. + */ MRB_API void* mrb_data_check_get_ptr(mrb_state *mrb, mrb_value obj, const mrb_data_type *type) { @@ -55,6 +68,11 @@ mrb_data_check_get_ptr(mrb_state *mrb, mrb_value obj, const mrb_data_type *type) return DATA_PTR(obj); } +/* + * Retrieves a pointer to the data within a data object. + * Calls `mrb_data_check_type` to ensure the object is of the correct type, + * raising an error if the type check fails. + */ MRB_API void* mrb_data_get_ptr(mrb_state *mrb, mrb_value obj, const mrb_data_type *type) { @@ -62,6 +80,12 @@ mrb_data_get_ptr(mrb_state *mrb, mrb_value obj, const mrb_data_type *type) return DATA_PTR(obj); } +/* + * Converts an object to a symbol. + * If the object is already a symbol, it is returned directly. + * If the object is a string, it is interned to a symbol. + * Otherwise, a type error is raised. + */ MRB_API mrb_sym mrb_obj_to_sym(mrb_state *mrb, mrb_value name) { @@ -71,7 +95,7 @@ mrb_obj_to_sym(mrb_state *mrb, mrb_value name) return 0; /* not reached */ } -#ifndef MRB_NO_FLOAT +#if !defined(MRB_NO_FLOAT) && !defined(MRB_NAN_BOXING) static mrb_int mrb_float_id(mrb_float f) { @@ -81,6 +105,11 @@ mrb_float_id(mrb_float f) } #endif +/* + * Returns a unique identifier (mrb_int) for the given object. + * The method of generating the ID varies based on the object's type and + * boxing model (NaN boxing, word boxing, or no boxing). + */ MRB_API mrb_int mrb_obj_id(mrb_value obj) { @@ -137,7 +166,7 @@ mrb_obj_id(mrb_value obj) case MRB_TT_HASH: case MRB_TT_RANGE: case MRB_TT_EXCEPTION: - case MRB_TT_DATA: + case MRB_TT_CDATA: case MRB_TT_ISTRUCT: default: return MakeID(mrb_ptr(obj), tt); @@ -147,48 +176,267 @@ mrb_obj_id(mrb_value obj) #ifdef MRB_WORD_BOXING #ifndef MRB_NO_FLOAT +/* + * Boxes a `mrb_float` into an `mrb_value` using word boxing. + * - If `MRB_WORDBOX_NO_INLINE_FLOAT` is defined, it allocates a new + * RFloat object on the heap. + * - If `MRB_64BIT` and `MRB_USE_FLOAT32` are defined, it stores the float + * in the lower bits of the word, shifted and tagged. + * - 64-bit float64: rotation encoding, lossless for exponents [-255, +256]. + * - 32-bit float32: rotation encoding, lossless for exponents [-32, +31]. + * Floats outside the inline range are heap-allocated as RFloat. + */ + +#if !defined(MRB_WORDBOX_NO_INLINE_FLOAT) && \ + (!defined(MRB_USE_FLOAT32) || !defined(MRB_64BIT)) +/* + * Rotation-based float encoding (shared between 64-bit float64 and + * 32-bit float32 paths). + * + * Encode: rotl(bits - ADDEND, 3) produces a tagged value with bottom + * 2 bits == 10 (WORDBOX_FLOAT_FLAG). + * Decode: rotl(tagged_value, N-3) + ADDEND recovers the original bits. + * + * Special values (0.0, -0.0, +Inf, -Inf, NaN) are encoded as small + * sentinel constants that also have bottom 2 bits == 10. This avoids + * heap allocation for these common values. + */ +#define WORDBOX_FLOAT_ROTATE 3 + +/* sentinel values for special floats (all have & 3 == 2) */ +#define WORDBOX_FLOAT_PZERO 0x02 /* +0.0 */ +#define WORDBOX_FLOAT_NZERO 0x06 /* -0.0 */ +#define WORDBOX_FLOAT_PINF 0x0a /* +Infinity */ +#define WORDBOX_FLOAT_NINF 0x0e /* -Infinity */ +#define WORDBOX_FLOAT_NAN 0x12 /* NaN (all NaN bit patterns normalize to this) */ +#define WORDBOX_FLOAT_SENTINEL_MAX WORDBOX_FLOAT_NAN + +#if defined(MRB_USE_FLOAT32) && !defined(MRB_64BIT) +/* + * 32-bit + float32 rotation. + * + * Biased exponents [95, 158] (actual [-32, +31]) produce the correct + * tag pattern after rotation, covering values ~2.3e-10 to ~4.3e9. + * Out-of-range floats are heap-allocated as RFloat. + */ +#define WORDBOX_FLOAT32_EXP_MIN 95 /* biased, actual -32 */ +#define WORDBOX_FLOAT32_EXP_MAX 158 /* biased, actual +31 */ +#define WORDBOX_FLOAT32_ADDEND ((uint32_t)(WORDBOX_FLOAT32_EXP_MIN - (WORDBOX_FLOAT_FLAG << 6)) << 23) + +static uint32_t +wordbox_rotl32(uint32_t a, int n) +{ + return (a << n) | (a >> (32 - n)); +} + +static uint32_t +wordbox_float32_to_u32(float f) +{ + union { float f; uint32_t u; } u; + u.f = f; + return u.u; +} + +static float +wordbox_u32_to_float32(uint32_t v) +{ + union { float f; uint32_t u; } u; + u.u = v; + return u.f; +} + +#else /* 64-bit + float64 */ +/* + * 64-bit + float64 rotation. + * + * Biased exponents [768, 1279] (actual [-255, +256]) produce the + * correct tag pattern. Obscure floats whose rotation would collide + * with a sentinel are heap-allocated instead. + */ +#define WORDBOX_FLOAT_EXP_MIN (1023 - 255) /* 768 */ +#define WORDBOX_FLOAT_EXP_MAX (1023 + 256) /* 1279 */ +#define WORDBOX_FLOAT_ADDEND ((uint64_t)(WORDBOX_FLOAT_EXP_MIN - (WORDBOX_FLOAT_FLAG << 9)) << 52) + +static uint64_t +wordbox_rotl64(uint64_t a, int n) +{ + return (a << n) | (a >> (64 - n)); +} + +static uint64_t +wordbox_float64_to_u64(double d) +{ + union { double d; uint64_t u; } u; + u.d = d; + return u.u; +} + +static double +wordbox_u64_to_float64(uint64_t v) +{ + union { double d; uint64_t u; } u; + u.u = v; + return u.d; +} +#endif /* MRB_USE_FLOAT32 && !MRB_64BIT */ +#endif + MRB_API mrb_value mrb_word_boxing_float_value(mrb_state *mrb, mrb_float f) { union mrb_value_ v; -#ifdef MRB_WORDBOX_NO_FLOAT_TRUNCATE +#ifdef MRB_WORDBOX_NO_INLINE_FLOAT v.p = mrb_obj_alloc(mrb, MRB_TT_FLOAT, mrb->float_class); - v.fp->f = f; - MRB_SET_FROZEN_FLAG(v.bp); + mrb_rfloat_set(v.fp, f); + v.bp->frozen = 1; #elif defined(MRB_64BIT) && defined(MRB_USE_FLOAT32) v.w = 0; v.f = f; v.w = (v.w<<2) | 2; +#elif defined(MRB_64BIT) + { + uint64_t bits = wordbox_float64_to_u64((double)f); + uint64_t exp = (bits >> 52) & 0x7FF; + if (exp == 0) { + /* +0.0 or -0.0 (subnormals also fall here, go to heap) */ + if (bits == UINT64_C(0)) + v.w = WORDBOX_FLOAT_PZERO; + else if (bits == UINT64_C(0x8000000000000000)) + v.w = WORDBOX_FLOAT_NZERO; + else goto float_heap; + } + else if (exp == 0x7FF) { + /* +Inf, -Inf, or NaN */ + if (bits == UINT64_C(0x7FF0000000000000)) + v.w = WORDBOX_FLOAT_PINF; + else if (bits == UINT64_C(0xFFF0000000000000)) + v.w = WORDBOX_FLOAT_NINF; + else + v.w = WORDBOX_FLOAT_NAN; + } + else if (exp >= WORDBOX_FLOAT_EXP_MIN && exp <= WORDBOX_FLOAT_EXP_MAX) { + uintptr_t w = (uintptr_t)wordbox_rotl64(bits - WORDBOX_FLOAT_ADDEND, WORDBOX_FLOAT_ROTATE); + if (w <= WORDBOX_FLOAT_SENTINEL_MAX) goto float_heap; + v.w = w; + } + else { + float_heap: + v.p = mrb_obj_alloc(mrb, MRB_TT_FLOAT, mrb->float_class); + mrb_rfloat_set(v.fp, f); + v.bp->frozen = 1; + } + } #else - v.f = f; - v.w = (v.w & ~3) | 2; + /* 32-bit + float32: rotation encoding */ + { + uint32_t bits = wordbox_float32_to_u32(f); + uint32_t exp = (bits >> 23) & 0xFF; + if (exp == 0) { + /* +0.0 or -0.0 (subnormals also fall here, go to heap) */ + if (bits == 0u) + v.w = WORDBOX_FLOAT_PZERO; + else if (bits == 0x80000000u) + v.w = WORDBOX_FLOAT_NZERO; + else goto float_heap; + } + else if (exp == 0xFF) { + /* +Inf, -Inf, or NaN */ + if (bits == 0x7F800000u) + v.w = WORDBOX_FLOAT_PINF; + else if (bits == 0xFF800000u) + v.w = WORDBOX_FLOAT_NINF; + else + v.w = WORDBOX_FLOAT_NAN; + } + else if (exp >= WORDBOX_FLOAT32_EXP_MIN && exp <= WORDBOX_FLOAT32_EXP_MAX) { + uintptr_t w = (uintptr_t)wordbox_rotl32(bits - WORDBOX_FLOAT32_ADDEND, WORDBOX_FLOAT_ROTATE); + if (w <= WORDBOX_FLOAT_SENTINEL_MAX) goto float_heap; + v.w = w; + } + else { + float_heap: + v.p = mrb_obj_alloc(mrb, MRB_TT_FLOAT, mrb->float_class); + mrb_rfloat_set(v.fp, f); + v.bp->frozen = 1; + } + } #endif return v.value; } -#ifndef MRB_WORDBOX_NO_FLOAT_TRUNCATE +#ifndef MRB_WORDBOX_NO_INLINE_FLOAT +/* + * Unboxes an `mrb_value` to an `mrb_float`. + * - 64-bit + float32: right-shift by 2 to retrieve the float. + * - 64-bit + float64 / 32-bit + float32 (rotation encoding): + * decode inline floats via rotation, or read from heap RFloat. + */ MRB_API mrb_float mrb_word_boxing_value_float(mrb_value v) { +#if defined(MRB_64BIT) && defined(MRB_USE_FLOAT32) union mrb_value_ u; u.value = v; -#if defined(MRB_64BIT) && defined(MRB_USE_FLOAT32) u.w >>= 2; + return u.f; +#elif defined(MRB_64BIT) + if ((v.w & WORDBOX_FLOAT_MASK) == WORDBOX_FLOAT_FLAG) { + if (v.w <= WORDBOX_FLOAT_SENTINEL_MAX) { + switch (v.w) { + case WORDBOX_FLOAT_PZERO: return (mrb_float)( 0.0); + case WORDBOX_FLOAT_NZERO: return (mrb_float)(-0.0); + case WORDBOX_FLOAT_PINF: return (mrb_float)( INFINITY); + case WORDBOX_FLOAT_NINF: return (mrb_float)(-INFINITY); + case WORDBOX_FLOAT_NAN: return (mrb_float) NAN; + default: break; /* not reached */ + } + } + return (mrb_float)wordbox_u64_to_float64( + wordbox_rotl64((uint64_t)v.w, 64 - WORDBOX_FLOAT_ROTATE) + WORDBOX_FLOAT_ADDEND); + } + else { + union mrb_value_ u; + u.value = v; + return mrb_rfloat_value(u.fp); + } #else - u.w &= ~3; + /* 32-bit + float32: rotation decoding */ + if ((v.w & WORDBOX_FLOAT_MASK) == WORDBOX_FLOAT_FLAG) { + if (v.w <= WORDBOX_FLOAT_SENTINEL_MAX) { + switch (v.w) { + case WORDBOX_FLOAT_PZERO: return (mrb_float)( 0.0f); + case WORDBOX_FLOAT_NZERO: return (mrb_float)(-0.0f); + case WORDBOX_FLOAT_PINF: return (mrb_float)( INFINITY); + case WORDBOX_FLOAT_NINF: return (mrb_float)(-INFINITY); + case WORDBOX_FLOAT_NAN: return (mrb_float) NAN; + default: break; /* not reached */ + } + } + return (mrb_float)wordbox_u32_to_float32( + wordbox_rotl32((uint32_t)v.w, 32 - WORDBOX_FLOAT_ROTATE) + WORDBOX_FLOAT32_ADDEND); + } + else { + union mrb_value_ u; + u.value = v; + return mrb_rfloat_value(u.fp); + } #endif - return u.f; } #endif #endif /* MRB_NO_FLOAT */ +/* + * Boxes a C pointer (void*) into an `mrb_value` using word boxing. + * It allocates an `RCptr` object, sets its internal pointer `p` to the + * given C pointer, and then sets the `mrb_value` to this `RCptr` object. + */ MRB_API mrb_value mrb_word_boxing_cptr_value(mrb_state *mrb, void *p) { - mrb_value v; struct RCptr *cptr = MRB_OBJ_ALLOC(mrb, MRB_TT_CPTR, mrb->object_class); + mrb_value v; SET_OBJ_VALUE(v, cptr); cptr->p = p; @@ -197,17 +445,24 @@ mrb_word_boxing_cptr_value(mrb_state *mrb, void *p) #endif /* MRB_WORD_BOXING */ #if defined(MRB_WORD_BOXING) || (defined(MRB_NAN_BOXING) && defined(MRB_INT64)) +/* + * Boxes an `mrb_int` into an `mrb_value`. + * If the integer `n` can be represented as a fixnum (checked by `FIXABLE(n)`), + * it returns a fixnum-tagged `mrb_value`. Otherwise, it allocates an + * `RInteger` object on the heap, stores `n` in it, marks the object as + * frozen, and returns an object-tagged `mrb_value`. + * This function is used when word boxing is enabled or when NaN boxing is + * enabled for 64-bit integers. + */ MRB_API mrb_value mrb_boxing_int_value(mrb_state *mrb, mrb_int n) { if (FIXABLE(n)) return mrb_fixnum_value(n); else { mrb_value v; - struct RInteger *p; - - p = (struct RInteger*)mrb_obj_alloc(mrb, MRB_TT_INTEGER, mrb->integer_class); + struct RInteger *p = (struct RInteger*)mrb_obj_alloc(mrb, MRB_TT_INTEGER, mrb->integer_class); p->i = n; - MRB_SET_FROZEN_FLAG((struct RBasic*)p); + p->frozen = 1; SET_OBJ_VALUE(v, p); return v; } @@ -242,9 +497,9 @@ MRB_API int mrb_msvc_snprintf(char *s, size_t n, const char *format, ...) { va_list arg; - int ret; va_start(arg, format); - ret = mrb_msvc_vsnprintf(s, n, format, arg); + + int ret = mrb_msvc_vsnprintf(s, n, format, arg); va_end(arg); return ret; } diff --git a/src/fmt_fp.c b/src/fmt_fp.c deleted file mode 100644 index 32b7936742..0000000000 --- a/src/fmt_fp.c +++ /dev/null @@ -1,363 +0,0 @@ -#include -#include - -#ifndef MRB_NO_FLOAT -/*********************************************************************** - - Routine for converting a single-precision - floating point number into a string. - - The code in this function was inspired from Fred Bayer's pdouble.c. - Since pdouble.c was released as Public Domain, I'm releasing this - code as public domain as well. - - Dave Hylands - - The original code can be found in https://github.com/dhylands/format-float -***********************************************************************/ - -/*********************************************************************** - - I modified the routine for mruby: - - * support `double` - * support `#` (alt_form) modifier - - My modifications in this file are also placed in the public domain. - - Matz (Yukihiro Matsumoto) - -***********************************************************************/ - -#include - -#ifdef MRB_USE_FLOAT32 - -// 1 sign bit, 8 exponent bits, and 23 mantissa bits. -// exponent values 0 and 255 are reserved, exponent can be 1 to 254. -// exponent is stored with a bias of 127. -// The min and max floats are on the order of 1x10^37 and 1x10^-37 - -#define FLT_DECEXP 32 -#define FLT_ROUND_TO_ONE 0.9999995F -#define FLT_MIN_BUF_SIZE 6 // -9e+99 - -#else - -// 1 sign bit, 11 exponent bits, and 52 mantissa bits. - -#define FLT_DECEXP 256 -#define FLT_ROUND_TO_ONE 0.999999999995 -#define FLT_MIN_BUF_SIZE 7 // -9e+199 - -#endif /* MRB_USE_FLOAT32 */ - -static const mrb_float g_pos_pow[] = { -#ifndef MRB_USE_FLOAT32 - 1e256, 1e128, 1e64, -#endif - 1e32, 1e16, 1e8, 1e4, 1e2, 1e1 -}; -static const mrb_float g_neg_pow[] = { -#ifndef MRB_USE_FLOAT32 - 1e-256, 1e-128, 1e-64, -#endif - 1e-32, 1e-16, 1e-8, 1e-4, 1e-2, 1e-1 -}; - -/* - * mrb_format_float(mrb_float f, char *buf, size_t buf_size, char fmt, int prec, char sign) - * - * fmt: should be one of 'e', 'E', 'f', 'F', 'g', or 'G'. (|0x80 for '#') - * prec: is the precision (as specified in printf) - * sign: should be '\0', '+', or ' ' ('\0' is the normal one - only print - * a sign if ```f``` is negative. Anything else is printed as the - * sign character for positive numbers. - */ - -int -mrb_format_float(mrb_float f, char *buf, size_t buf_size, char fmt, int prec, char sign) { - char *s = buf; - int buf_remaining = (int)buf_size - 1; - int alt_form = 0; - - if ((uint8_t)fmt & 0x80) { - fmt &= 0x7f; /* turn off alt_form flag */ - alt_form = 1; - } - if (buf_size <= FLT_MIN_BUF_SIZE) { - // Smallest exp notion is -9e+99 (-9e+199) which is 6 (7) chars plus terminating - // null. - - if (buf_size >= 2) { - *s++ = '?'; - } - if (buf_size >= 1) { - *s++ = '\0'; - } - return buf_size >= 2; - } - if (signbit(f)) { - *s++ = '-'; - f = -f; - } else if (sign) { - *s++ = sign; - } - buf_remaining -= (int)(s - buf); // Adjust for sign - - { - char uc = fmt & 0x20; - if (isinf(f)) { - *s++ = 'I' ^ uc; - *s++ = 'N' ^ uc; - *s++ = 'F' ^ uc; - goto ret; - } else if (isnan(f)) { - *s++ = 'N' ^ uc; - *s++ = 'A' ^ uc; - *s++ = 'N' ^ uc; - ret: - *s = '\0'; - return (int)(s - buf); - } - } - - if (prec < 0) { - prec = 6; - } - char e_char = 'E' | (fmt & 0x20); // e_char will match case of fmt - fmt |= 0x20; // Force fmt to be lowercase - char org_fmt = fmt; - if (fmt == 'g' && prec == 0) { - prec = 1; - } - int e, e1; - int dec = 0; - char e_sign = '\0'; - int num_digits = 0; - const mrb_float *pos_pow = g_pos_pow; - const mrb_float *neg_pow = g_neg_pow; - - if (f == 0.0) { - e = 0; - if (fmt == 'e') { - e_sign = '+'; - } else if (fmt == 'f') { - num_digits = prec + 1; - } - } else if (f < 1.0) { // f < 1.0 - char first_dig = '0'; - if (f >= FLT_ROUND_TO_ONE) { - first_dig = '1'; - } - - // Build negative exponent - for (e = 0, e1 = FLT_DECEXP; e1; e1 >>= 1, pos_pow++, neg_pow++) { - if (*neg_pow > f) { - e += e1; - f *= *pos_pow; - } - } - char e_sign_char = '-'; - if (f < 1.0) { - if (f >= FLT_ROUND_TO_ONE) { - f = 1.0; - if (e == 0) { - e_sign_char = '+'; - } - } else { - e++; - f *= 10.0; - } - } - - // If the user specified 'g' format, and e is <= 4, then we'll switch - // to the fixed format ('f') - - if (fmt == 'f' || (fmt == 'g' && e <= 4)) { - fmt = 'f'; - dec = -1; - *s++ = first_dig; - - if (org_fmt == 'g') { - prec += (e - 1); - } - // truncate precision to prevent buffer overflow - if (prec + 2 > buf_remaining) { - prec = buf_remaining - 2; - } - num_digits = prec; - if (num_digits || alt_form) { - *s++ = '.'; - while (--e && num_digits) { - *s++ = '0'; - num_digits--; - } - } - } else { - // For e & g formats, we'll be printing the exponent, so set the - // sign. - e_sign = e_sign_char; - dec = 0; - - if (prec > (buf_remaining - FLT_MIN_BUF_SIZE)) { - prec = buf_remaining - FLT_MIN_BUF_SIZE; - if (fmt == 'g') { - prec++; - } - } - } - } else { - // Build positive exponent - for (e = 0, e1 = FLT_DECEXP; e1; e1 >>= 1, pos_pow++, neg_pow++) { - if (*pos_pow <= f) { - e += e1; - f *= *neg_pow; - } - } - - // If the user specified fixed format (fmt == 'f') and e makes the - // number too big to fit into the available buffer, then we'll - // switch to the 'e' format. - - if (fmt == 'f') { - if (e >= buf_remaining) { - fmt = 'e'; - } else if ((e + prec + 2) > buf_remaining) { - prec = buf_remaining - e - 2; - if (prec < 0) { - // This means no decimal point, so we can add one back - // for the decimal. - prec++; - } - } - } - if (fmt == 'e' && prec > (buf_remaining - 6)) { - prec = buf_remaining - 6; - } - // If the user specified 'g' format, and e is < prec, then we'll switch - // to the fixed format. - - if (fmt == 'g' && e < prec) { - fmt = 'f'; - prec -= (e + 1); - } - if (fmt == 'f') { - dec = e; - num_digits = prec + e + 1; - } else { - e_sign = '+'; - } - } - if (prec < 0) { - // This can happen when the prec is trimmed to prevent buffer overflow - prec = 0; - } - - // We now have f as a floating point number between >= 1 and < 10 - // (or equal to zero), and e contains the absolute value of the power of - // 10 exponent. and (dec + 1) == the number of dgits before the decimal. - - // For e, prec is # digits after the decimal - // For f, prec is # digits after the decimal - // For g, prec is the max number of significant digits - // - // For e & g there will be a single digit before the decimal - // for f there will be e digits before the decimal - - if (fmt == 'e') { - num_digits = prec + 1; - } else if (fmt == 'g') { - if (prec == 0) { - prec = 1; - } - num_digits = prec; - } - - // Print the digits of the mantissa - for (int i = 0; i < num_digits; ++i, --dec) { - int8_t d = (int8_t)((int)f)%10; - *s++ = '0' + d; - if (dec == 0 && (prec > 0 || alt_form)) { - *s++ = '.'; - } - f -= (mrb_float)d; - f *= 10.0; - } - - // Round - if (f >= 5.0) { - char *rs = s; - rs--; - while (1) { - if (*rs == '.') { - rs--; - continue; - } - if (*rs < '0' || *rs > '9') { - // + or - - rs++; // So we sit on the digit to the right of the sign - break; - } - if (*rs < '9') { - (*rs)++; - break; - } - *rs = '0'; - if (rs == buf) { - break; - } - rs--; - } - if (*rs == '0') { - // We need to insert a 1 - if (rs[1] == '.' && fmt != 'f') { - // We're going to round 9.99 to 10.00 - // Move the decimal point - rs[0] = '.'; - rs[1] = '0'; - if (e_sign == '-') { - e--; - } else { - e++; - } - } - s++; - char *ss = s; - while (ss > rs) { - *ss = ss[-1]; - ss--; - } - *rs = '1'; - if (f < 1.0 && fmt == 'f') { - // We rounded up to 1.0 - prec--; - } - } - } - - if (org_fmt == 'g' && prec > 0 && !alt_form) { - // Remove trailing zeros and a trailing decimal point - while (s[-1] == '0') { - s--; - } - if (s[-1] == '.') { - s--; - } - } - // Append the exponent - if (e_sign) { - *s++ = e_char; - *s++ = e_sign; - if (e >= 100) { - *s++ = '0' + (e / 100); - e %= 100; - } - *s++ = '0' + (e / 10); - *s++ = '0' + (e % 10); - } - *s = '\0'; - - return (int)(s - buf); -} -#endif diff --git a/src/fp_uscale.c b/src/fp_uscale.c new file mode 100644 index 0000000000..03d22ec44b --- /dev/null +++ b/src/fp_uscale.c @@ -0,0 +1,1514 @@ +/* +** fp_uscale.c - Unrounded Scaling float conversion +** +** Unified float formatting and parsing using the uscale algorithm. +** Based on Russ Cox's "Unrounded Scaling" approach. +** See https://research.swtch.com/fp +** +** Replaces fmt_fp.c (formatting) and readfloat.c (parsing). +*/ + +#include + +#ifndef MRB_NO_FLOAT + +#include +#include +#include + +/* ======== Platform support ======== */ + +#if defined(__SIZEOF_INT128__) +static inline void mul64(uint64_t a, uint64_t b, uint64_t *hi, uint64_t *lo) +{ + __uint128_t r = (__uint128_t)a * b; + *hi = (uint64_t)(r >> 64); + *lo = (uint64_t)r; +} +#elif defined(_MSC_VER) && defined(_M_X64) +#include +static inline void mul64(uint64_t a, uint64_t b, uint64_t *hi, uint64_t *lo) +{ + *lo = _umul128(a, b, hi); +} +#else +static inline void mul64(uint64_t a, uint64_t b, uint64_t *hi, uint64_t *lo) +{ + uint64_t a_lo = (uint32_t)a, a_hi = a >> 32; + uint64_t b_lo = (uint32_t)b, b_hi = b >> 32; + uint64_t p0 = a_lo * b_lo; + uint64_t p1 = a_lo * b_hi; + uint64_t p2 = a_hi * b_lo; + uint64_t p3 = a_hi * b_hi; + uint64_t mid = (p0 >> 32) + (uint32_t)p1 + (uint32_t)p2; + *lo = (p0 & 0xFFFFFFFFULL) | (mid << 32); + *hi = p3 + (p1 >> 32) + (p2 >> 32) + (mid >> 32); +} +#endif + +#if defined(__GNUC__) || defined(__clang__) +#define clz64(x) __builtin_clzll(x) +#elif defined(_MSC_VER) && defined(_M_X64) +static inline int clz64(uint64_t x) +{ + unsigned long idx; + _BitScanReverse64(&idx, x); + return 63 - (int)idx; +} +#else +static inline int clz64(uint64_t x) +{ + int n = 0; + if (x <= 0x00000000FFFFFFFFULL) { n += 32; x <<= 32; } + if (x <= 0x0000FFFFFFFFFFFFULL) { n += 16; x <<= 16; } + if (x <= 0x00FFFFFFFFFFFFFFULL) { n += 8; x <<= 8; } + if (x <= 0x0FFFFFFFFFFFFFFFULL) { n += 4; x <<= 4; } + if (x <= 0x3FFFFFFFFFFFFFFFULL) { n += 2; x <<= 2; } + if (x <= 0x7FFFFFFFFFFFFFFFULL) { n += 1; } + return n; +} +#endif + +#define bits_len64(x) (64 - clz64(x)) + +/* ======== pow10 table ======== */ + +typedef struct { + uint64_t hi; + uint64_t lo; +} pow10_entry; + +#define POW10_MIN (-343) +#define POW10_MAX 341 +#define POW10_TAB_SIZE (POW10_MAX - POW10_MIN + 1) + +/* generated by tools/gen_pow10_tab.rb */ +static const pow10_entry pow10_tab[POW10_TAB_SIZE] = { + {0xbf29dcaba82fdeafULL, 0x8bcd1178c77f03ccULL}, + {0xeef453d6923bd65bULL, 0xeec055d6f95ec4c0ULL}, + {0x9558b4661b6565f9ULL, 0xb53835a65bdb3af8ULL}, + {0xbaaee17fa23ebf77ULL, 0xa286430ff2d209b6ULL}, + {0xe95a99df8ace6f54ULL, 0x0b27d3d3ef868c23ULL}, + {0x91d8a02bb6c10595ULL, 0x86f8e46475b41796ULL}, + {0xb64ec836a47146faULL, 0x68b71d7d93211d7bULL}, + {0xe3e27a444d8d98b8ULL, 0x02e4e4dcf7e964daULL}, + {0x8e6d8c6ab0787f73ULL, 0x01cf0f0a1af1df08ULL}, + {0xb208ef855c969f50ULL, 0x4242d2cca1ae56caULL}, + {0xde8b2b66b3bc4724ULL, 0x52d3877fca19ec7dULL}, + {0x8b16fb203055ac77ULL, 0xb3c434afde5033ceULL}, + {0xaddcb9e83c6b1794ULL, 0x20b541dbd5e440c2ULL}, + {0xd953e8624b85dd79ULL, 0x28e29252cb5d50f2ULL}, + {0x87d4713d6f33aa6cULL, 0x798d9b73bf1a5297ULL}, + {0xa9c98d8ccb009507ULL, 0x97f10250aee0e73dULL}, + {0xd43bf0effdc0ba49ULL, 0xfded42e4da99210dULL}, + {0x84a57695fe98746eULL, 0xfeb449cf089fb4a8ULL}, + {0xa5ced43b7e3e9189ULL, 0xbe615c42cac7a1d2ULL}, + {0xcf42894a5dce35ebULL, 0xadf9b3537d798a46ULL}, + {0x818995ce7aa0e1b3ULL, 0x8cbc10142e6bf66cULL}, + {0xa1ebfb4219491a20ULL, 0xefeb14193a06f407ULL}, + {0xca66fa129f9b60a7ULL, 0x2be5d91f8888b109ULL}, + {0xfd00b897478238d1ULL, 0x76df4f676aaadd4bULL}, + {0x9e20735e8cb16383ULL, 0xaa4b91a0a2aaca4fULL}, + {0xc5a890362fddbc63ULL, 0x14de7608cb557ce2ULL}, + {0xf712b443bbd52b7cULL, 0x5a16138afe2adc1bULL}, + {0x9a6bb0aa55653b2eULL, 0xb84dcc36dedac991ULL}, + {0xc1069cd4eabe89f9ULL, 0x66613f4496917bf5ULL}, + {0xf148440a256e2c77ULL, 0x3ff98f15bc35daf2ULL}, + {0x96cd2a865764dbcbULL, 0xc7fbf96d95a1a8d7ULL}, + {0xbc807527ed3e12bdULL, 0x39faf7c8fb0a130dULL}, + {0xeba09271e88d976cULL, 0x0879b5bb39cc97d1ULL}, + {0x93445b8731587ea4ULL, 0x854c1195041fdee2ULL}, + {0xb8157268fdae9e4dULL, 0xa69f15fa4527d69bULL}, + {0xe61acf033d1a45e0ULL, 0x9046db78d671cc42ULL}, + {0x8fd0c16206306bacULL, 0x5a2c492b86071fa9ULL}, + {0xb3c4f1ba87bc8697ULL, 0x70b75b766788e793ULL}, + {0xe0b62e2929aba83dULL, 0xcce53254016b2178ULL}, + {0x8c71dcd9ba0b4926ULL, 0x600f3f7480e2f4ebULL}, + {0xaf8e5410288e1b70ULL, 0xf8130f51a11bb226ULL}, + {0xdb71e91432b1a24bULL, 0x3617d32609629eafULL}, + {0x892731ac9faf056fULL, 0x41cee3f7c5dda32dULL}, + {0xab70fe17c79ac6cbULL, 0x92429cf5b7550bf9ULL}, + {0xd64d3d9db981787eULL, 0xf6d34433252a4ef7ULL}, + {0x85f0468293f0eb4fULL, 0xda440a9ff73a715aULL}, + {0xa76c582338ed2622ULL, 0x50d50d47f5090db1ULL}, + {0xd1476e2c07286fabULL, 0xe50a5099f24b511eULL}, + {0x82cca4db847945cbULL, 0xaf267260376f12b2ULL}, + {0xa37fce126597973dULL, 0x1af00ef8454ad75fULL}, + {0xcc5fc196fefd7d0dULL, 0xe1ac12b6569d8d37ULL}, + {0xff77b1fcbebcdc50ULL, 0xda171763ec44f085ULL}, + {0x9faacf3df73609b2ULL, 0x884e6e9e73ab1653ULL}, + {0xc795830d75038c1eULL, 0x2a620a461095dbe8ULL}, + {0xf97ae3d0d2446f26ULL, 0xb4fa8cd794bb52e2ULL}, + {0x9becce62836ac578ULL, 0xb11c9806bcf513cdULL}, + {0xc2e801fb244576d6ULL, 0xdd63be086c3258c0ULL}, + {0xf3a20279ed56d48bULL, 0x94bcad8a873eeef0ULL}, + {0x9845418c345644d7ULL, 0x7cf5ec7694875556ULL}, + {0xbe5691ef416bd60dULL, 0xdc33679439a92aacULL}, + {0xedec366b11c6cb90ULL, 0xd340417948137557ULL}, + {0x94b3a202eb1c3f3aULL, 0x840828ebcd0c2956ULL}, + {0xb9e08a83a5e34f08ULL, 0x250a3326c04f33acULL}, + {0xe858ad248f5c22caULL, 0x2e4cbff070630097ULL}, + {0x91376c36d99995bfULL, 0xdceff7f6463de05eULL}, + {0xb58547448ffffb2eULL, 0x542bf5f3d7cd5875ULL}, + {0xe2e69915b3fff9faULL, 0xe936f370cdc0ae93ULL}, + {0x8dd01fad907ffc3cULL, 0x51c2582680986d1cULL}, + {0xb1442798f49ffb4bULL, 0x6632ee3020be8863ULL}, + {0xdd95317f31c7fa1eULL, 0xbfbfa9bc28ee2a7cULL}, + {0x8a7d3eef7f1cfc53ULL, 0xb7d7ca159994da8dULL}, + {0xad1c8eab5ee43b67ULL, 0x25cdbc9afffa1130ULL}, + {0xd863b256369d4a41ULL, 0x6f412bc1bff8957dULL}, + {0x873e4f75e2224e69ULL, 0xa588bb5917fb5d6eULL}, + {0xa90de3535aaae203ULL, 0x8eeaea2f5dfa34c9ULL}, + {0xd3515c2831559a84ULL, 0xf2a5a4bb3578c1fcULL}, + {0x8412d9991ed58092ULL, 0x17a786f5016b793dULL}, + {0xa5178fff668ae0b7ULL, 0x9d9168b241c6578dULL}, + {0xce5d73ff402d98e4ULL, 0x04f5c2ded237ed70ULL}, + {0x80fa687f881c7f8fULL, 0x831999cb4362f466ULL}, + {0xa139029f6a239f73ULL, 0xe3e0003e143bb17fULL}, + {0xc987434744ac874fULL, 0x5cd8004d994a9ddfULL}, + {0xfbe9141915d7a923ULL, 0xb40e0060ff9d4557ULL}, + {0x9d71ac8fada6c9b6ULL, 0x9088c03c9fc24b56ULL}, + {0xc4ce17b399107c23ULL, 0x34aaf04bc7b2de2cULL}, + {0xf6019da07f549b2cULL, 0x81d5ac5eb99f95b7ULL}, + {0x99c102844f94e0fcULL, 0xd1258bbb3403bd92ULL}, + {0xc0314325637a193aULL, 0x056eeeaa0104acf7ULL}, + {0xf03d93eebc589f89ULL, 0x86caaa548145d835ULL}, + {0x96267c7535b763b6ULL, 0xb43eaa74d0cba721ULL}, + {0xbbb01b9283253ca3ULL, 0x614e551204fe90e9ULL}, + {0xea9c227723ee8bccULL, 0xb9a1ea56863e3523ULL}, + {0x92a1958a76751760ULL, 0xf405327613e6e136ULL}, + {0xb749faed14125d37ULL, 0x31067f1398e09984ULL}, + {0xe51c79a85916f485ULL, 0x7d481ed87f18bfe5ULL}, + {0x8f31cc0937ae58d3ULL, 0x2e4d13474f6f77efULL}, + {0xb2fe3f0b8599ef08ULL, 0x79e05819234b55eaULL}, + {0xdfbdcece67006acaULL, 0x98586e1f6c1e2b65ULL}, + {0x8bd6a141006042beULL, 0x1f3744d3a392db1fULL}, + {0xaecc49914078536eULL, 0xa70516088c7791e7ULL}, + {0xda7f5bf590966849ULL, 0x50c65b8aaf957661ULL}, + {0x888f99797a5e012eULL, 0x927bf936adbd69fcULL}, + {0xaab37fd7d8f58179ULL, 0x371af784592cc47cULL}, + {0xd5605fcdcf32e1d7ULL, 0x04e1b5656f77f59bULL}, + {0x855c3be0a17fcd27ULL, 0xa30d115f65aaf980ULL}, + {0xa6b34ad8c9dfc070ULL, 0x0bd055b73f15b7e1ULL}, + {0xd0601d8efc57b08cULL, 0x0ec46b250edb25d9ULL}, + {0x823c12795db6ce58ULL, 0x893ac2f72948f7a7ULL}, + {0xa2cb1717b52481eeULL, 0xab8973b4f39b3591ULL}, + {0xcb7ddcdda26da269ULL, 0x566bd0a2308202f6ULL}, + {0xfe5d54150b090b03ULL, 0x2c06c4cabca283b3ULL}, + {0x9efa548d26e5a6e2ULL, 0x3b843afeb5e59250ULL}, + {0xc6b8e9b0709f109bULL, 0xca6549be635ef6e4ULL}, + {0xf867241c8cc6d4c1ULL, 0x3cfe9c2dfc36b49dULL}, + {0x9b407691d7fc44f9ULL, 0x861f219cbda230e2ULL}, + {0xc21094364dfb5637ULL, 0x67a6ea03ed0abd1bULL}, + {0xf294b943e17a2bc5ULL, 0xc190a484e84d6c62ULL}, + {0x979cf3ca6cec5b5bULL, 0x58fa66d3113063bdULL}, + {0xbd8430bd08277232ULL, 0xaf390087d57c7cacULL}, + {0xece53cec4a314ebeULL, 0x5b0740a9cadb9bd7ULL}, + {0x940f4613ae5ed137ULL, 0x78e4886a1ec94166ULL}, + {0xb913179899f68585ULL, 0xd71daa84a67b91c0ULL}, + {0xe757dd7ec07426e6ULL, 0xcce51525d01a7630ULL}, + {0x9096ea6f38489850ULL, 0xc00f2d37a21089deULL}, + {0xb4bca50b065abe64ULL, 0xf012f8858a94ac56ULL}, + {0xe1ebce4dc7f16dfcULL, 0x2c17b6a6ed39d76bULL}, + {0x8d3360f09cf6e4beULL, 0x9b8ed228544426a3ULL}, + {0xb080392cc4349dedULL, 0x427286b26955304cULL}, + {0xdca04777f541c568ULL, 0x130f285f03aa7c5fULL}, + {0x89e42caaf9491b61ULL, 0x0be9793b624a8dbbULL}, + {0xac5d37d5b79b623aULL, 0xcee3d78a3add312aULL}, + {0xd77485cb25823ac8ULL, 0x829ccd6cc9947d74ULL}, + {0x86a8d39ef77164bdULL, 0x51a20063fdfcce68ULL}, + {0xa8530886b54dbdecULL, 0x260a807cfd7c0203ULL}, + {0xd267caa862a12d67ULL, 0x2f8d209c3cdb0284ULL}, + {0x8380dea93da4bc61ULL, 0xbdb83461a608e192ULL}, + {0xa46116538d0deb79ULL, 0xad26417a0f8b19f7ULL}, + {0xcd795be870516657ULL, 0x986fd1d8936de074ULL}, + {0x806bd9714632dff7ULL, 0xff45e3275c24ac49ULL}, + {0xa086cfcd97bf97f4ULL, 0x7f175bf1332dd75bULL}, + {0xc8a883c0fdaf7df1ULL, 0x9edd32ed7ff94d32ULL}, + {0xfad2a4b13d1b5d6dULL, 0x86947fa8dff7a07eULL}, + {0x9cc3a6eec6311a64ULL, 0x341ccfc98bfac44fULL}, + {0xc3f490aa77bd60fdULL, 0x412403bbeef97563ULL}, + {0xf4f1b4d515acb93cULL, 0x116d04aaeab7d2bbULL}, + {0x991711052d8bf3c6ULL, 0x8ae422ead2b2e3b5ULL}, + {0xbf5cd54678eef0b7ULL, 0x2d9d2ba5875f9ca2ULL}, + {0xef340a98172aace5ULL, 0x7904768ee93783cbULL}, + {0x9580869f0e7aac0fULL, 0x2ba2ca1951c2b25fULL}, + {0xbae0a846d2195713ULL, 0x768b7c9fa6335ef6ULL}, + {0xe998d258869facd8ULL, 0xd42e5bc78fc036b4ULL}, + {0x91ff83775423cc07ULL, 0x849cf95cb9d82230ULL}, + {0xb67f6455292cbf09ULL, 0xe5c437b3e84e2abdULL}, + {0xe41f3d6a7377eecbULL, 0xdf3545a0e261b56cULL}, + {0x8e938662882af53fULL, 0xab814b848d7d1163ULL}, + {0xb23867fb2a35b28eULL, 0x16619e65b0dc55bcULL}, + {0xdec681f9f4c31f32ULL, 0x9bfa05ff1d136b2bULL}, + {0x8b3c113c38f9f37fULL, 0x217c43bf722c22fbULL}, + {0xae0b158b4738705fULL, 0x69db54af4eb72bbaULL}, + {0xd98ddaee19068c77ULL, 0xc45229db2264f6a8ULL}, + {0x87f8a8d4cfa417caULL, 0x1ab35a28f57f1a29ULL}, + {0xa9f6d30a038d1dbdULL, 0xa16030b332dee0b3ULL}, + {0xd47487cc8470652cULL, 0x89b83cdfff9698e0ULL}, + {0x84c8d4dfd2c63f3cULL, 0xd613260bffbe1f8cULL}, + {0xa5fb0a17c777cf0aULL, 0x0b97ef8effada76fULL}, + {0xcf79cc9db955c2cdULL, 0x8e7deb72bf99114bULL}, + {0x81ac1fe293d599c0ULL, 0x390eb327b7bfaacfULL}, + {0xa21727db38cb0030ULL, 0x47525ff1a5af9583ULL}, + {0xca9cf1d206fdc03cULL, 0x5926f7ee0f1b7ae3ULL}, + {0xfd442e4688bd304bULL, 0x6f70b5e992e2599cULL}, + {0x9e4a9cec15763e2fULL, 0x65a671b1fbcd7801ULL}, + {0xc5dd44271ad3cdbbULL, 0xbf100e1e7ac0d602ULL}, + {0xf7549530e188c129ULL, 0x2ed411a619710b83ULL}, + {0x9a94dd3e8cf578baULL, 0x7d448b07cfe6a731ULL}, + {0xc13a148e3032d6e8ULL, 0x1c95adc9c3e050feULL}, + {0xf18899b1bc3f8ca2ULL, 0x23bb193c34d8653eULL}, + {0x96f5600f15a7b7e6ULL, 0xd654efc5a1073f46ULL}, + {0xbcb2b812db11a5dfULL, 0x8bea2bb709490f18ULL}, + {0xebdf661791d60f57ULL, 0xeee4b6a4cb9b52deULL}, + {0x936b9fcebb25c996ULL, 0x354ef226ff4113cbULL}, + {0xb84687c269ef3bfcULL, 0xc2a2aeb0bf1158bdULL}, + {0xe65829b3046b0afbULL, 0xf34b5a5ceed5aeedULL}, + {0x8ff71a0fe2c2e6ddULL, 0xb80f187a15458d54ULL}, + {0xb3f4e093db73a094ULL, 0xa612de989a96f0a9ULL}, + {0xe0f218b8d25088b9ULL, 0xcf97963ec13cacd3ULL}, + {0x8c974f7383725574ULL, 0xe1bebde738c5ec04ULL}, + {0xafbd2350644eead0ULL, 0x1a2e6d6106f76705ULL}, + {0xdbac6c247d62a584ULL, 0x20ba08b948b540c6ULL}, + {0x894bc396ce5da773ULL, 0x94744573cd71487cULL}, + {0xab9eb47c81f51150ULL, 0xf99156d0c0cd9a9bULL}, + {0xd686619ba27255a3ULL, 0x37f5ac84f1010142ULL}, + {0x8613fd0145877586ULL, 0x42f98bd316a0a0c9ULL}, + {0xa798fc4196e952e8ULL, 0xd3b7eec7dc48c8fbULL}, + {0xd17f3b51fca3a7a1ULL, 0x08a5ea79d35afb3aULL}, + {0x82ef85133de648c5ULL, 0x6567b28c2418dd04ULL}, + {0xa3ab66580d5fdaf6ULL, 0x3ec19f2f2d1f1445ULL}, + {0xcc963fee10b7d1b4ULL, 0xce7206faf866d957ULL}, + {0xffbbcfe994e5c620ULL, 0x020e88b9b6808fadULL}, + {0x9fd561f1fd0f9bd4ULL, 0x01491574121059ccULL}, + {0xc7caba6e7c5382c9ULL, 0x019b5ad11694703fULL}, + {0xf9bd690a1b68637cULL, 0xc20231855c398c4fULL}, + {0x9c1661a651213e2eULL, 0xf9415ef359a3f7b1ULL}, + {0xc31bfa0fe5698db9ULL, 0xb791b6b0300cf59dULL}, + {0xf3e2f893dec3f127ULL, 0xa576245c3c103305ULL}, + {0x986ddb5c6b3a76b8ULL, 0x0769d6b9a58a1fe3ULL}, + {0xbe89523386091466ULL, 0x09444c680eeca7dcULL}, + {0xee2ba6c0678b5980ULL, 0x8b955f8212a7d1d3ULL}, + {0x94db483840b717f0ULL, 0x573d5bb14ba8e323ULL}, + {0xba121a4650e4ddecULL, 0x6d0cb29d9e931becULL}, + {0xe896a0d7e51e1567ULL, 0x884fdf450637e2e8ULL}, + {0x915e2486ef32cd61ULL, 0xf531eb8b23e2edd1ULL}, + {0xb5b5ada8aaff80b9ULL, 0xf27e666decdba945ULL}, + {0xe3231912d5bf60e7ULL, 0xef1e000968129396ULL}, + {0x8df5efabc5979c90ULL, 0x3572c005e10b9c3eULL}, + {0xb1736b96b6fd83b4ULL, 0x42cf7007594e834dULL}, + {0xddd0467c64bce4a1ULL, 0x53834c092fa22421ULL}, + {0x8aa22c0dbef60ee5ULL, 0x94320f85bdc55694ULL}, + {0xad4ab7112eb3929eULL, 0x793e93672d36ac39ULL}, + {0xd89d64d57a607745ULL, 0x178e3840f8845748ULL}, + {0x87625f056c7c4a8cULL, 0xeeb8e3289b52b68dULL}, + {0xa93af6c6c79b5d2eULL, 0x2a671bf2c2276430ULL}, + {0xd389b4787982347aULL, 0xb500e2ef72b13d3cULL}, + {0x843610cb4bf160ccULL, 0x31208dd5a7aec645ULL}, + {0xa54394fe1eedb8ffULL, 0x3d68b14b119a77d7ULL}, + {0xce947a3da6a9273fULL, 0x8cc2dd9dd60115cdULL}, + {0x811ccc668829b888ULL, 0xf7f9ca82a5c0ada0ULL}, + {0xa163ff802a3426a9ULL, 0x35f83d234f30d908ULL}, + {0xc9bcff6034c13053ULL, 0x03764c6c22fd0f4aULL}, + {0xfc2c3f3841f17c68ULL, 0x4453df872bbc531dULL}, + {0x9d9ba7832936edc1ULL, 0x2ab46bb47b55b3f2ULL}, + {0xc5029163f384a932ULL, 0xf56186a19a2b20eeULL}, + {0xf64335bcf065d37eULL, 0xb2b9e84a00b5e92aULL}, + {0x99ea0196163fa42fULL, 0xafb4312e4071b1baULL}, + {0xc06481fb9bcf8d3aULL, 0x1ba13d79d08e1e29ULL}, + {0xf07da27a82c37089ULL, 0xa2898cd844b1a5b3ULL}, + {0x964e858c91ba2656ULL, 0xc595f8072aef0790ULL}, + {0xbbe226efb628afebULL, 0x76fb7608f5aac974ULL}, + {0xeadab0aba3b2dbe6ULL, 0xd4ba538b33157bd1ULL}, + {0x92c8ae6b464fc970ULL, 0xc4f47436ffed6d62ULL}, + {0xb77ada0617e3bbccULL, 0xf6319144bfe8c8bbULL}, + {0xe55990879ddcaabeULL, 0x33bdf595efe2faeaULL}, + {0x8f57fa54c2a9eab7ULL, 0x6056b97db5eddcd2ULL}, + {0xb32df8e9f3546565ULL, 0xb86c67dd23695406ULL}, + {0xdff9772470297ebeULL, 0xa68781d46c43a908ULL}, + {0x8bfbea76c619ef37ULL, 0xa814b124c3aa49a5ULL}, + {0xaefae51477a06b04ULL, 0x1219dd6df494dc0eULL}, + {0xdab99e59958885c5ULL, 0x16a054c971ba1312ULL}, + {0x88b402f7fd75539cULL, 0xee2434fde7144bebULL}, + {0xaae103b5fcd2a882ULL, 0x29ad423d60d95ee6ULL}, + {0xd59944a37c0752a3ULL, 0xb41892ccb90fb6a0ULL}, + {0x857fcae62d8493a6ULL, 0x908f5bbff3a9d224ULL}, + {0xa6dfbd9fb8e5b88fULL, 0x34b332aff09446adULL}, + {0xd097ad07a71f26b3ULL, 0x81dfff5becb95858ULL}, + {0x825ecc24c8737830ULL, 0x712bff9973f3d737ULL}, + {0xa2f67f2dfa90563cULL, 0x8d76ff7fd0f0cd05ULL}, + {0xcbb41ef979346bcbULL, 0xb0d4bf5fc52d0046ULL}, + {0xfea126b7d78186bdULL, 0x1d09ef37b6784057ULL}, + {0x9f24b832e6b0f437ULL, 0xf2263582d20b2836ULL}, + {0xc6ede63fa05d3144ULL, 0x6eafc2e3868df244ULL}, + {0xf8a95fcf88747d95ULL, 0x8a5bb39c68316ed5ULL}, + {0x9b69dbe1b548ce7dULL, 0x36795041c11ee545ULL}, + {0xc24452da229b021cULL, 0x0417a45231669e97ULL}, + {0xf2d56790ab41c2a3ULL, 0x051d8d66bdc0463cULL}, + {0x97c560ba6b0919a6ULL, 0x2332786036982be5ULL}, + {0xbdb6b8e905cb6010ULL, 0xabff1678443e36dfULL}, + {0xed246723473e3814ULL, 0xd6fedc16554dc497ULL}, + {0x9436c0760c86e30cULL, 0x065f498df5509adeULL}, + {0xb94470938fa89bcfULL, 0x07f71bf172a4c196ULL}, + {0xe7958cb87392c2c3ULL, 0x49f4e2edcf4df1fbULL}, + {0x90bd77f3483bb9baULL, 0x4e390dd4a190b73dULL}, + {0xb4ecd5f01a4aa829ULL, 0xe1c75149c9f4e50cULL}, + {0xe2280b6c20dd5233ULL, 0xda39259c3c721e4fULL}, + {0x8d590723948a5360ULL, 0xa863b781a5c752f1ULL}, + {0xb0af48ec79ace838ULL, 0xd27ca5620f3927aeULL}, + {0xdcdb1b2798182245ULL, 0x071bceba9307719aULL}, + {0x8a08f0f8bf0f156cULL, 0xe47161349be4a700ULL}, + {0xac8b2d36eed2dac6ULL, 0x1d8db981c2ddd0c0ULL}, + {0xd7adf884aa879178ULL, 0xa4f127e2339544f0ULL}, + {0x86ccbb52ea94baebULL, 0x6716b8ed603d4b16ULL}, + {0xa87fea27a539e9a6ULL, 0xc0dc6728b84c9ddbULL}, + {0xd29fe4b18e88640fULL, 0x711380f2e65fc552ULL}, + {0x83a3eeeef9153e8aULL, 0xe6ac3097cffbdb53ULL}, + {0xa48ceaaab75a8e2cULL, 0xa0573cbdc3fad228ULL}, + {0xcdb02555653131b7ULL, 0xc86d0bed34f986b2ULL}, + {0x808e17555f3ebf12ULL, 0x1d442774411bf42fULL}, + {0xa0b19d2ab70e6ed7ULL, 0xa49531515162f13bULL}, + {0xc8de047564d20a8cULL, 0x0dba7da5a5bbad8aULL}, + {0xfb158592be068d2fULL, 0x11291d0f0f2a98edULL}, + {0x9ced737bb6c4183eULL, 0xaab9b229697a9f94ULL}, + {0xc428d05aa4751e4dULL, 0x55681eb3c3d94779ULL}, + {0xf53304714d9265e0ULL, 0x2ac22660b4cf9957ULL}, + {0x993fe2c6d07b7facULL, 0x1ab957fc7101bfd6ULL}, + {0xbf8fdb78849a5f97ULL, 0x2167adfb8d422fccULL}, + {0xef73d256a5c0f77dULL, 0x69c1997a7092bbbfULL}, + {0x95a8637627989aaeULL, 0x2218ffec865bb557ULL}, + {0xbb127c53b17ec15aULL, 0xaa9f3fe7a7f2a2adULL}, + {0xe9d71b689dde71b0ULL, 0x55470fe191ef4b59ULL}, + {0x9226712162ab070eULL, 0x354c69ecfb358f17ULL}, + {0xb6b00d69bb55c8d2ULL, 0xc29f84683a02f2ddULL}, + {0xe45c10c42a2b3b06ULL, 0x734765824883af95ULL}, + {0x8eb98a7a9a5b04e4ULL, 0x880c9f716d524dbdULL}, + {0xb267ed1940f1c61dULL, 0xaa0fc74dc8a6e12cULL}, + {0xdf01e85f912e37a4ULL, 0x9493b9213ad09977ULL}, + {0x8b61313bbabce2c7ULL, 0xdcdc53b4c4c25feaULL}, + {0xae397d8aa96c1b78ULL, 0x541368a1f5f2f7e5ULL}, + {0xd9c7dced53c72256ULL, 0x691842ca736fb5deULL}, + {0x881cea14545c7576ULL, 0x81af29be8825d1abULL}, + {0xaa242499697392d3ULL, 0x221af42e2a2f4616ULL}, + {0xd4ad2dbfc3d07788ULL, 0x6aa1b139b4bb179bULL}, + {0x84ec3c97da624ab5ULL, 0x42a50ec410f4eec1ULL}, + {0xa6274bbdd0fadd62ULL, 0x134e527515322a71ULL}, + {0xcfb11ead453994bbULL, 0x9821e7125a7eb50dULL}, + {0x81ceb32c4b43fcf5ULL, 0x7f15306b788f3128ULL}, + {0xa2425ff75e14fc32ULL, 0x5eda7c8656b2fd72ULL}, + {0xcad2f7f5359a3b3fULL, 0xf6911ba7ec5fbccfULL}, + {0xfd87b5f28300ca0eULL, 0x74356291e777ac03ULL}, + {0x9e74d1b791e07e49ULL, 0x88a15d9b30aacb82ULL}, + {0xc612062576589ddbULL, 0x6ac9b501fcd57e62ULL}, + {0xf79687aed3eec552ULL, 0xc57c22427c0addfbULL}, + {0x9abe14cd44753b53ULL, 0x3b6d95698d86cabdULL}, + {0xc16d9a0095928a28ULL, 0x8a48fac3f0e87d6cULL}, + {0xf1c90080baf72cb2ULL, 0xacdb3974ed229cc7ULL}, + {0x971da05074da7befULL, 0x2c0903e91435a1fcULL}, + {0xbce5086492111aebULL, 0x770b44e359430a7bULL}, + {0xec1e4a7db69561a6ULL, 0xd4ce161c2f93cd1aULL}, + {0x9392ee8e921d5d08ULL, 0xc500cdd19dbc6030ULL}, + {0xb877aa3236a4b44aULL, 0xf6410146052b783dULL}, + {0xe69594bec44de15cULL, 0xb3d141978676564cULL}, + {0x901d7cf73ab0acdaULL, 0xf062c8feb409f5efULL}, + {0xb424dc35095cd810ULL, 0xac7b7b3e610c736bULL}, + {0xe12e13424bb40e14ULL, 0xd79a5a0df94f9046ULL}, + {0x8cbccc096f5088ccULL, 0x06c07848bbd1ba2cULL}, + {0xafebff0bcb24aaffULL, 0x0870965aeac628b7ULL}, + {0xdbe6fecebdedd5bfULL, 0x4a8cbbf1a577b2e4ULL}, + {0x89705f4136b4a598ULL, 0xce97f577076acfcfULL}, + {0xabcc77118461cefdULL, 0x023df2d4c94583c2ULL}, + {0xd6bf94d5e57a42bdULL, 0xc2cd6f89fb96e4b3ULL}, + {0x8637bd05af6c69b6ULL, 0x59c065b63d3e4ef0ULL}, + {0xa7c5ac471b478424ULL, 0xf0307f23cc8de2acULL}, + {0xd1b71758e219652cULL, 0x2c3c9eecbfb15b57ULL}, + {0x83126e978d4fdf3cULL, 0x9ba5e353f7ced916ULL}, + {0xa3d70a3d70a3d70bULL, 0xc28f5c28f5c28f5cULL}, + {0xcccccccccccccccdULL, 0x3333333333333333ULL}, + {0x8000000000000000ULL, 0x0000000000000000ULL}, + {0xa000000000000000ULL, 0x0000000000000000ULL}, + {0xc800000000000000ULL, 0x0000000000000000ULL}, + {0xfa00000000000000ULL, 0x0000000000000000ULL}, + {0x9c40000000000000ULL, 0x0000000000000000ULL}, + {0xc350000000000000ULL, 0x0000000000000000ULL}, + {0xf424000000000000ULL, 0x0000000000000000ULL}, + {0x9896800000000000ULL, 0x0000000000000000ULL}, + {0xbebc200000000000ULL, 0x0000000000000000ULL}, + {0xee6b280000000000ULL, 0x0000000000000000ULL}, + {0x9502f90000000000ULL, 0x0000000000000000ULL}, + {0xba43b74000000000ULL, 0x0000000000000000ULL}, + {0xe8d4a51000000000ULL, 0x0000000000000000ULL}, + {0x9184e72a00000000ULL, 0x0000000000000000ULL}, + {0xb5e620f480000000ULL, 0x0000000000000000ULL}, + {0xe35fa931a0000000ULL, 0x0000000000000000ULL}, + {0x8e1bc9bf04000000ULL, 0x0000000000000000ULL}, + {0xb1a2bc2ec5000000ULL, 0x0000000000000000ULL}, + {0xde0b6b3a76400000ULL, 0x0000000000000000ULL}, + {0x8ac7230489e80000ULL, 0x0000000000000000ULL}, + {0xad78ebc5ac620000ULL, 0x0000000000000000ULL}, + {0xd8d726b7177a8000ULL, 0x0000000000000000ULL}, + {0x878678326eac9000ULL, 0x0000000000000000ULL}, + {0xa968163f0a57b400ULL, 0x0000000000000000ULL}, + {0xd3c21bcecceda100ULL, 0x0000000000000000ULL}, + {0x84595161401484a0ULL, 0x0000000000000000ULL}, + {0xa56fa5b99019a5c8ULL, 0x0000000000000000ULL}, + {0xcecb8f27f4200f3aULL, 0x0000000000000000ULL}, + {0x813f3978f8940985ULL, 0xc000000000000000ULL}, + {0xa18f07d736b90be6ULL, 0xb000000000000000ULL}, + {0xc9f2c9cd04674edfULL, 0x5c00000000000000ULL}, + {0xfc6f7c4045812297ULL, 0xb300000000000000ULL}, + {0x9dc5ada82b70b59eULL, 0x0fe0000000000000ULL}, + {0xc5371912364ce306ULL, 0x93d8000000000000ULL}, + {0xf684df56c3e01bc7ULL, 0x38ce000000000000ULL}, + {0x9a130b963a6c115dULL, 0xc380c00000000000ULL}, + {0xc097ce7bc90715b4ULL, 0xb460f00000000000ULL}, + {0xf0bdc21abb48db21ULL, 0xe1792c0000000000ULL}, + {0x96769950b50d88f5ULL, 0xecebbb8000000000ULL}, + {0xbc143fa4e250eb32ULL, 0xe826aa6000000000ULL}, + {0xeb194f8e1ae525feULL, 0xa23054f800000000ULL}, + {0x92efd1b8d0cf37bfULL, 0xa55e351b00000000ULL}, + {0xb7abc627050305aeULL, 0x0eb5c261c0000000ULL}, + {0xe596b7b0c643c71aULL, 0x926332fa30000000ULL}, + {0x8f7e32ce7bea5c70ULL, 0x1b7dffdc5e000000ULL}, + {0xb35dbf821ae4f38cULL, 0x225d7fd375800000ULL}, + {0xe0352f62a19e306fULL, 0x2af4dfc852e00000ULL}, + {0x8c213d9da502de46ULL, 0xbad90bdd33cc0000ULL}, + {0xaf298d050e4395d7ULL, 0x698f4ed480bf0000ULL}, + {0xdaf3f04651d47b4dULL, 0xc3f32289a0eec000ULL}, + {0x88d8762bf324cd10ULL, 0x5a77f59604953800ULL}, + {0xab0e93b6efee0054ULL, 0x7115f2fb85ba8600ULL}, + {0xd5d238a4abe98069ULL, 0x8d5b6fba67292780ULL}, + {0x85a36366eb71f042ULL, 0xb85925d48079b8b0ULL}, + {0xa70c3c40a64e6c52ULL, 0x666f6f49a09826dcULL}, + {0xd0cf4b50cfe20766ULL, 0x000b4b1c08be3093ULL}, + {0x82818f1281ed44a0ULL, 0x40070ef18576de5bULL}, + {0xa321f2d7226895c8ULL, 0x5008d2ade6d495f2ULL}, + {0xcbea6f8ceb02bb3aULL, 0x640b07596089bb6fULL}, + {0xfee50b7025c36a09ULL, 0xfd0dc92fb8ac2a4bULL}, + {0x9f4f2726179a2246ULL, 0xfe289dbdd36b9a6fULL}, + {0xc722f0ef9d80aad7ULL, 0xbdb2c52d4846810aULL}, + {0xf8ebad2b84e0d58cULL, 0x2d1f76789a58214dULL}, + {0x9b934c3b330c8578ULL, 0x9c33aa0b607714d0ULL}, + {0xc2781f49ffcfa6d6ULL, 0xc340948e3894da04ULL}, + {0xf316271c7fc3908bULL, 0x7410b9b1c6ba1085ULL}, + {0x97edd871cfda3a57ULL, 0x688a740f1c344a53ULL}, + {0xbde94e8e43d0c8edULL, 0xc2ad1112e3415ce8ULL}, + {0xed63a231d4c4fb28ULL, 0xb35855579c11b422ULL}, + {0x945e455f24fb1cf9ULL, 0x70173556c18b1095ULL}, + {0xb975d6b6ee39e437ULL, 0x4c1d02ac71edd4bbULL}, + {0xe7d34c64a9c85d45ULL, 0x9f2443578e6949e9ULL}, + {0x90e40fbeea1d3a4bULL, 0x4376aa16b901ce32ULL}, + {0xb51d13aea4a488deULL, 0x9454549c674241beULL}, + {0xe264589a4dcdab15ULL, 0x396969c38112d22eULL}, + {0x8d7eb76070a08aedULL, 0x03e1e21a30abc35dULL}, + {0xb0de65388cc8ada9ULL, 0xc4da5aa0bcd6b434ULL}, + {0xdd15fe86affad913ULL, 0xb610f148ec0c6141ULL}, + {0x8a2dbf142dfcc7acULL, 0x91ca96cd9387bcc8ULL}, + {0xacb92ed9397bf997ULL, 0xb63d3c80f869abfbULL}, + {0xd7e77a8f87daf7fcULL, 0x23cc8ba1368416f9ULL}, + {0x86f0ac99b4e8dafeULL, 0x965fd744c2128e5cULL}, + {0xa8acd7c0222311bdULL, 0x3bf7cd15f29731f3ULL}, + {0xd2d80db02aabd62cULL, 0x0af5c05b6f3cfe6fULL}, + {0x83c7088e1aab65dcULL, 0x86d9983925861f05ULL}, + {0xa4b8cab1a1563f53ULL, 0xa88ffe476ee7a6c7ULL}, + {0xcde6fd5e09abcf27ULL, 0x12b3fdd94aa19079ULL}, + {0x80b05e5ac60b6179ULL, 0xabb07ea7cea4fa4bULL}, + {0xa0dc75f1778e39d7ULL, 0x969c9e51c24e38deULL}, + {0xc913936dd571c84dULL, 0xfc43c5e632e1c716ULL}, + {0xfb5878494ace3a60ULL, 0xfb54b75fbf9a38dcULL}, + {0x9d174b2dcec0e47cULL, 0x9d14f29bd7c06389ULL}, + {0xc45d1df942711d9bULL, 0xc45a2f42cdb07c6bULL}, + {0xf5746577930d6501ULL, 0x3570bb13811c9b86ULL}, + {0x9968bf6abbe85f21ULL, 0x816674ec30b1e134ULL}, + {0xbfc2ef456ae276e9ULL, 0x61c012273cde5981ULL}, + {0xefb3ab16c59b14a3ULL, 0x3a3016b10c15efe1ULL}, + {0x95d04aee3b80ece6ULL, 0x445e0e2ea78db5edULL}, + {0xbb445da9ca612820ULL, 0xd57591ba51712368ULL}, + {0xea1575143cf97227ULL, 0x0ad2f628e5cd6c42ULL}, + {0x924d692ca61be759ULL, 0xa6c3d9d98fa063a9ULL}, + {0xb6e0c377cfa2e12fULL, 0x9074d04ff3887c93ULL}, + {0xe498f455c38b997bULL, 0xf4920463f06a9bb8ULL}, + {0x8edf98b59a373fedULL, 0xb8db42be7642a153ULL}, + {0xb2977ee300c50fe8ULL, 0xa712136e13d349a8ULL}, + {0xdf3d5e9bc0f653e2ULL, 0xd0d6984998c81c12ULL}, + {0x8b865b215899f46dULL, 0x42861f2dff7d118bULL}, + {0xae67f1e9aec07188ULL, 0x1327a6f97f5c55eeULL}, + {0xda01ee641a708deaULL, 0x17f190b7df336b6aULL}, + {0x884134fe908658b3ULL, 0xcef6fa72eb802322ULL}, + {0xaa51823e34a7eedfULL, 0x42b4b90fa6602beaULL}, + {0xd4e5e2cdc1d1ea97ULL, 0x9361e7538ff836e5ULL}, + {0x850fadc09923329fULL, 0xfc1d309439fb224fULL}, + {0xa6539930bf6bff46ULL, 0x7b247cb94879eae3ULL}, + {0xcfe87f7cef46ff17ULL, 0x19ed9be79a98659cULL}, + {0x81f14fae158c5f6fULL, 0xb0348170c09f3f81ULL}, + {0xa26da3999aef774aULL, 0x1c41a1ccf0c70f62ULL}, + {0xcb090c8001ab551dULL, 0xa3520a402cf8d33aULL}, + {0xfdcb4fa002162a64ULL, 0x8c268cd038370809ULL}, + {0x9e9f11c4014dda7fULL, 0xd798180223226505ULL}, + {0xc646d63501a1511eULL, 0x4d7e1e02abeafe47ULL}, + {0xf7d88bc24209a566ULL, 0xe0dda58356e5bdd9ULL}, + {0x9ae7575969460760ULL, 0xcc8a8772164f96a7ULL}, + {0xc1a12d2fc3978938ULL, 0xffad294e9be37c51ULL}, + {0xf209787bb47d6b85ULL, 0x3f9873a242dc5b65ULL}, + {0x9745eb4d50ce6333ULL, 0x07bf484569c9b91fULL}, + {0xbd176620a501fc00ULL, 0x49af1a56c43c2767ULL}, + {0xec5d3fa8ce427b00ULL, 0x5c1ae0ec754b3141ULL}, + {0x93ba47c980e98ce0ULL, 0x3990cc93c94efec8ULL}, + {0xb8a8d9bbe123f018ULL, 0x47f4ffb8bba2be7bULL}, + {0xe6d3102ad96cec1eULL, 0x59f23fa6ea8b6e1aULL}, + {0x9043ea1ac7e41393ULL, 0x783767c8529724d0ULL}, + {0xb454e4a179dd1878ULL, 0xd64541ba673cee04ULL}, + {0xe16a1dc9d8545e95ULL, 0x0bd69229010c2985ULL}, + {0x8ce2529e2734bb1eULL, 0xe7661b59a0a799f3ULL}, + {0xb01ae745b101e9e5ULL, 0xa13fa23008d18070ULL}, + {0xdc21a1171d42645eULL, 0x898f8abc0b05e08cULL}, + {0x899504ae72497ebbULL, 0x95f9b6b586e3ac57ULL}, + {0xabfa45da0edbde6aULL, 0xfb782462e89c976dULL}, + {0xd6f8d7509292d604ULL, 0xba562d7ba2c3bd49ULL}, + {0x865b86925b9bc5c3ULL, 0xf475dc6d45ba564dULL}, + {0xa7f26836f282b733ULL, 0x719353889728ebe1ULL}, + {0xd1ef0244af236500ULL, 0xcdf8286abcf326d9ULL}, + {0x8335616aed761f20ULL, 0x80bb1942b617f847ULL}, + {0xa402b9c5a8d3a6e8ULL, 0xa0e9df93639df659ULL}, + {0xcd036837130890a2ULL, 0xc92457783c8573f0ULL}, + {0x802221226be55a65ULL, 0x3db6b6ab25d36876ULL}, + {0xa02aa96b06deb0feULL, 0x0d246455ef484293ULL}, + {0xc83553c5c8965d3eULL, 0x906d7d6b6b1a5338ULL}, + {0xfa42a8b73abbf48dULL, 0x3488dcc645e0e806ULL}, + {0x9c69a97284b578d8ULL, 0x00d589fbebac9104ULL}, + {0xc38413cf25e2d70eULL, 0x010aec7ae697b545ULL}, + {0xf46518c2ef5b8cd2ULL, 0x814da799a03da296ULL}, + {0x98bf2f79d5993803ULL, 0x10d088c00426859eULL}, + {0xbeeefb584aff8604ULL, 0x5504aaf005302705ULL}, + {0xeeaaba2e5dbf6785ULL, 0x6a45d5ac067c30c7ULL}, + {0x952ab45cfa97a0b3ULL, 0x226ba58b840d9e7cULL}, + {0xba756174393d88e0ULL, 0x6b068eee6511061bULL}, + {0xe912b9d1478ceb18ULL, 0x85c832a9fe5547a2ULL}, + {0x91abb422ccb812efULL, 0x539d1faa3ef54cc5ULL}, + {0xb616a12b7fe617abULL, 0xa8846794ceb29ff6ULL}, + {0xe39c49765fdf9d95ULL, 0x12a5817a025f47f4ULL}, + {0x8e41ade9fbebc27eULL, 0xeba770ec417b8cf8ULL}, + {0xb1d219647ae6b31dULL, 0xa6914d2751da7037ULL}, + {0xde469fbd99a05fe4ULL, 0x9035a07126510c44ULL}, + {0x8aec23d680043befULL, 0xda218446b7f2a7abULL}, + {0xada72ccc20054aeaULL, 0x50a9e55865ef5195ULL}, + {0xd910f7ff28069da5ULL, 0xe4d45eae7f6b25fbULL}, + {0x87aa9aff79042287ULL, 0x6f04bb2d0fa2f7bdULL}, + {0xa99541bf57452b29ULL, 0xcac5e9f8538bb5acULL}, + {0xd3fa922f2d1675f3ULL, 0xbd776476686ea317ULL}, + {0x847c9b5d7c2e09b8ULL, 0x966a9eca014525eeULL}, + {0xa59bc234db398c26ULL, 0xbc05467c81966f6aULL}, + {0xcf02b2c21207ef2fULL, 0x6b06981ba1fc0b44ULL}, + {0x8161afb94b44f57eULL, 0xe2e41f11453d870aULL}, + {0xa1ba1ba79e1632ddULL, 0x9b9d26d5968ce8cdULL}, + {0xca28a291859bbf94ULL, 0x8284708afc302301ULL}, + {0xfcb2cb35e702af79ULL, 0xa3258cadbb3c2bc1ULL}, + {0x9defbf01b061adacULL, 0xc5f777ec95059b58ULL}, + {0xc56baec21c7a1917ULL, 0xf77555e7ba47022fULL}, + {0xf6c69a72a3989f5cULL, 0x7552ab61a8d8c2baULL}, + {0x9a3c2087a63f639aULL, 0xc953ab1d098779b4ULL}, + {0xc0cb28a98fcf3c80ULL, 0x7ba895e44be95822ULL}, + {0xf0fdf2d3f3c30ba0ULL, 0x9a92bb5d5ee3ae2aULL}, + {0x969eb7c47859e744ULL, 0x609bb51a5b4e4cdaULL}, + {0xbc4665b596706115ULL, 0x78c2a260f221e011ULL}, + {0xeb57ff22fc0c795aULL, 0x56f34af92eaa5815ULL}, + {0x9316ff75dd87cbd9ULL, 0xf6580edbbd2a770dULL}, + {0xb7dcbf5354e9becfULL, 0xf3ee1292ac7514d0ULL}, + {0xe5d3ef282a242e82ULL, 0x70e9973757925a05ULL}, + {0x8fa475791a569d11ULL, 0x0691fe8296bb7843ULL}, + {0xb38d92d760ec4456ULL, 0xc8367e233c6a5653ULL}, + {0xe070f78d3927556bULL, 0x7a441dac0b84ebe8ULL}, + {0x8c469ab843b89563ULL, 0x6c6a928b87331371ULL}, + {0xaf58416654a6babcULL, 0xc785372e68ffd84dULL}, + {0xdb2e51bfe9d0696bULL, 0xf96684fa033fce61ULL}, + {0x88fcf317f22241e3ULL, 0xbbe0131c4207e0fcULL}, + {0xab3c2fddeeaad25bULL, 0x2ad817e35289d93cULL}, + {0xd60b3bd56a5586f2ULL, 0x758e1ddc272c4f8bULL}, + {0x85c7056562757457ULL, 0x0978d2a9987bb1b6ULL}, + {0xa738c6bebb12d16dULL, 0x4bd70753fe9a9e24ULL}, + {0xd106f86e69d785c8ULL, 0x1eccc928fe4145adULL}, + {0x82a45b450226b39dULL, 0x133ffdb99ee8cb8cULL}, + {0xa34d721642b06085ULL, 0xd80ffd2806a2fe6fULL}, + {0xcc20ce9bd35c78a6ULL, 0xce13fc72084bbe0bULL}, + {0xff290242c83396cfULL, 0x8198fb8e8a5ead8eULL}, + {0x9f79a169bd203e42ULL, 0xf0ff9d39167b2c79ULL}, + {0xc75809c42c684dd2ULL, 0xad3f84875c19f797ULL}, + {0xf92e0c3537826146ULL, 0x588f65a93320757dULL}, + {0x9bbcc7a142b17cccULL, 0x77599f89bff4496eULL}, + {0xc2abf989935ddbffULL, 0x9530076c2ff15bcaULL}, + {0xf356f7ebf83552ffULL, 0xfa7c09473bedb2bcULL}, + {0x98165af37b2153dfULL, 0x3c8d85cc85748fb5ULL}, + {0xbe1bf1b059e9a8d7ULL, 0x8bb0e73fa6d1b3a3ULL}, + {0xeda2ee1c7064130dULL, 0xee9d210f9086208cULL}, + {0x9485d4d1c63e8be8ULL, 0x752234a9ba53d457ULL}, + {0xb9a74a0637ce2ee2ULL, 0x926ac1d428e8c96dULL}, + {0xe8111c87c5c1ba9aULL, 0x370572493322fbc8ULL}, + {0x910ab1d4db9914a1ULL, 0xe263676dbff5dd5dULL}, + {0xb54d5e4a127f59c9ULL, 0xdafc41492ff354b4ULL}, + {0xe2a0b5dc971f303bULL, 0xd1bb519b7bf029e2ULL}, + {0x8da471a9de737e25ULL, 0xa31513012d761a2dULL}, + {0xb10d8e1456105daeULL, 0x8bda57c178d3a0b8ULL}, + {0xdd50f1996b947519ULL, 0x2ed0edb1d70888e6ULL}, + {0x8a5296ffe33cc930ULL, 0x7d42948f26655590ULL}, + {0xace73cbfdc0bfb7cULL, 0x9c9339b2effeaaf4ULL}, + {0xd8210befd30efa5bULL, 0xc3b8081fabfe55b1ULL}, + {0x8714a775e3e95c79ULL, 0x9a530513cb7ef58eULL}, + {0xa8d9d1535ce3b397ULL, 0x80e7c658be5eb2f2ULL}, + {0xd31045a8341ca07dULL, 0xe121b7eeedf65fafULL}, + {0x83ea2b892091e44eULL, 0x6cb512f554b9fbcdULL}, + {0xa4e4b66b68b65d61ULL, 0x07e257b2a9e87ac0ULL}, + {0xce1de40642e3f4baULL, 0xc9daed9f54629971ULL}, + {0x80d2ae83e9ce78f4ULL, 0x3e28d48394bd9fe6ULL}, + {0xa1075a24e4421731ULL, 0x4db309a479ed07e0ULL}, + {0xc94930ae1d529cfdULL, 0x211fcc0d986849d8ULL}, + {0xfb9b7cd9a4a7443dULL, 0xe967bf10fe825c4eULL}, + {0x9d412e0806e88aa6ULL, 0x71e0d76a9f1179b1ULL}, + {0xc491798a08a2ad4fULL, 0x0e590d4546d5d81dULL}, + {0xf5b5d7ec8acb58a3ULL, 0x51ef5096988b4e24ULL}, + {0x9991a6f3d6bf1766ULL, 0x5335925e1f5710d6ULL}, + {0xbff610b0cc6edd40ULL, 0xe802f6f5a72cd50cULL}, + {0xeff394dcff8a948fULL, 0x2203b4b310f80a4fULL}, + {0x95f83d0a1fb69cdaULL, 0xb54250efea9b0671ULL}, + {0xbb764c4ca7a44410ULL, 0x6292e52be541c80eULL}, + {0xea53df5fd18d5514ULL, 0x7b379e76de923a12ULL}, + {0x92746b9be2f8552dULL, 0xcd02c30a4b1b644bULL}, + {0xb7118682dbb66a78ULL, 0xc04373ccdde23d5eULL}, + {0xe4d5e82392a40516ULL, 0xf05450c0155accb5ULL}, + {0x8f05b1163ba6832eULL, 0xd634b2780d58bff1ULL}, + {0xb2c71d5bca9023f9ULL, 0x8bc1df1610aeefedULL}, + {0xdf78e4b2bd342cf7ULL, 0x6eb256db94daabe9ULL}, + {0x8bab8eefb6409c1bULL, 0xe52f76493d08ab71ULL}, + {0xae9672aba3d0c321ULL, 0x5e7b53db8c4ad64eULL}, + {0xda3c0f568cc4f3e9ULL, 0x361a28d26f5d8be1ULL}, + {0x8865899617fb1872ULL, 0x81d05983859a776dULL}, + {0xaa7eebfb9df9de8eULL, 0x22446fe467011548ULL}, + {0xd51ea6fa85785632ULL, 0xaad58bdd80c15a9aULL}, + {0x8533285c936b35dfULL, 0x2ac5776a7078d8a0ULL}, + {0xa67ff273b8460357ULL, 0x7576d5450c970ec8ULL}, + {0xd01fef10a657842dULL, 0xd2d48a964fbcd27aULL}, + {0x8213f56a67f6b29cULL, 0x63c4d69df1d6038cULL}, + {0xa298f2c501f45f43ULL, 0x7cb60c456e4b8470ULL}, + {0xcb3f2f7642717714ULL, 0xdbe38f56c9de658cULL}, + {0xfe0efb53d30dd4d8ULL, 0x12dc732c7c55feefULL}, + {0x9ec95d1463e8a507ULL, 0x0bc9c7fbcdb5bf55ULL}, + {0xc67bb4597ce2ce49ULL, 0x4ebc39fac1232f2aULL}, + {0xf81aa16fdc1b81dbULL, 0x226b4879716bfaf5ULL}, + {0x9b10a4e5e9913129ULL, 0x35830d4be6e37cd9ULL}, + {0xc1d4ce1f63f57d73ULL, 0x02e3d09ee09c5c0fULL}, + {0xf24a01a73cf2dcd0ULL, 0x439cc4c698c37313ULL}, + {0x976e41088617ca02ULL, 0x2a41fafc1f7a27ecULL}, + {0xbd49d14aa79dbc83ULL, 0xb4d279bb2758b1e7ULL}, + {0xec9c459d51852ba3ULL, 0x22071829f12ede61ULL}, + {0x93e1ab8252f33b46ULL, 0x35446f1a36bd4afcULL}, + {0xb8da1662e7b00a18ULL, 0xc2958ae0c46c9dbcULL}, + {0xe7109bfba19c0c9eULL, 0xf33aed98f587c52bULL}, + {0x906a617d450187e3ULL, 0xd804d47f9974db3aULL}, + {0xb484f9dc9641e9dbULL, 0x4e06099f7fd21209ULL}, + {0xe1a63853bbd26452ULL, 0xa1878c075fc6968cULL}, + {0x8d07e33455637eb3ULL, 0x24f4b7849bdc1e17ULL}, + {0xb049dc016abc5e60ULL, 0x6e31e565c2d3259dULL}, + {0xdc5c5301c56b75f8ULL, 0x89be5ebf3387ef04ULL}, + {0x89b9b3e11b6329bbULL, 0x5616fb378034f562ULL}, + {0xac2820d9623bf42aULL, 0xab9cba05604232bbULL}, + {0xd732290fbacaf134ULL, 0x5683e886b852bf6aULL}, + {0x867f59a9d4bed6c1ULL, 0xb61271543333b7a2ULL}, + {0xa81f301449ee8c71ULL, 0xa3970da94000a58bULL}, + {0xd226fc195c6a2f8dULL, 0x8c7cd1139000ceeeULL}, + {0x83585d8fd9c25db8ULL, 0x37ce02ac3a008154ULL}, + {0xa42e74f3d032f526ULL, 0x45c183574880a1aaULL}, + {0xcd3a1230c43fb270ULL, 0xd731e42d1aa0ca14ULL}, + {0x80444b5e7aa7cf86ULL, 0x867f2e9c30a47e4cULL}, + {0xa0555e361951c367ULL, 0x281efa433ccd9de0ULL}, + {0xc86ab5c39fa63441ULL, 0x7226b8d40c010558ULL}, + {0xfa856334878fc151ULL, 0x4eb067090f0146aeULL}, + {0x9c935e00d4b9d8d3ULL, 0x912e4065a960cc2cULL}, + {0xc3b8358109e84f08ULL, 0xf579d07f13b8ff37ULL}, + {0xf4a642e14c6262c9ULL, 0x32d8449ed8a73f05ULL}, + {0x98e7e9cccfbd7dbeULL, 0x7fc72ae347688763ULL}, + {0xbf21e44003acdd2dULL, 0x1fb8f59c1942a93cULL}, + {0xeeea5d5004981479ULL, 0xe7a733031f93538bULL}, + {0x95527a5202df0cccULL, 0xf0c87fe1f3bc1437ULL}, + {0xbaa718e68396cffeULL, 0x2cfa9fda70ab1945ULL}, + {0xe950df20247c83feULL, 0xb83947d10cd5df96ULL}, + {0x91d28b7416cdd27fULL, 0xb323cce2a805abbeULL}, + {0xb6472e511c81471eULL, 0x1fecc01b520716adULL}, + {0xe3d8f9e563a198e6ULL, 0xa7e7f0222688dc59ULL}, + {0x8e679c2f5e44ff90ULL, 0xa8f0f615581589b7ULL}, + {0xb201833b35d63f74ULL, 0xd32d339aae1aec25ULL}, + {0xde81e40a034bcf50ULL, 0x07f8808159a1a72eULL}, + {0x8b112e86420f6192ULL, 0x04fb5050d805087dULL}, + {0xadd57a27d29339f7ULL, 0x863a24650e064a9cULL}, + {0xd94ad8b1c7380875ULL, 0xe7c8ad7e5187dd43ULL}, + {0x87cec76f1c830549ULL, 0x70dd6c6ef2f4ea4aULL}, + {0xa9c2794ae3a3c69bULL, 0x4d14c78aafb224ddULL}, + {0xd433179d9c8cb842ULL, 0xa059f96d5b9eae14ULL}, + {0x849feec281d7f329ULL, 0x24383be459432cccULL}, + {0xa5c7ea73224deff4ULL, 0xed464add6f93f7ffULL}, + {0xcf39e50feae16bf0ULL, 0x2897dd94cb78f5ffULL}, + {0x81842f29f2cce376ULL, 0x195eea7cff2b99bfULL}, + {0xa1e53af46f801c54ULL, 0x9fb6a51c3ef6802fULL}, + {0xca5e89b18b602369ULL, 0xc7a44e634eb4203bULL}, + {0xfcf62c1dee382c43ULL, 0xb98d61fc2261284aULL}, + {0x9e19db92b4e31baaULL, 0x93f85d3d957cb92eULL}, + {0xc5a05277621be294ULL, 0x38f6748cfadbe77aULL}, + {0xf70867153aa2db39ULL, 0x473411b03992e158ULL}, + {0x9a65406d44a5c904ULL, 0x8c808b0e23fbccd7ULL}, + {0xc0fe908895cf3b45ULL, 0xafa0add1acfac00dULL}, + {0xf13e34aabb430a16ULL, 0x9b88d94618397010ULL}, + {0x96c6e0eab509e64eULL, 0xa13587cbcf23e60aULL}, + {0xbc789925624c5fe1ULL, 0x4982e9bec2ecdf8dULL}, + {0xeb96bf6ebadf77d9ULL, 0x1be3a42e73a81770ULL}, + {0x933e37a534cbaae8ULL, 0x716e469d08490ea6ULL}, + {0xb80dc58e81fe95a2ULL, 0x8dc9d8444a5b524fULL}, + {0xe61136f2227e3b0aULL, 0x313c4e555cf226e3ULL}, + {0x8fcac257558ee4e7ULL, 0xdec5b0f55a17584eULL}, + {0xb3bd72ed2af29e20ULL, 0x56771d32b09d2e62ULL}, + {0xe0accfa875af45a8ULL, 0x6c14e47f5cc479faULL}, + {0x8c6c01c9498d8b89ULL, 0x438d0ecf99facc3cULL}, + {0xaf87023b9bf0ee6bULL, 0x1470528380797f4bULL}, + {0xdb68c2ca82ed2a06ULL, 0x598c67246097df1eULL}, +}; + +/* ======== Uscale core ======== */ + +static inline int log10_pow2(int x) +{ + return (x * 78913) >> 18; +} + +static inline int log2_pow10(int x) +{ + return (x * 108853) >> 15; +} + +typedef struct { + pow10_entry pm; + int s; +} scaler; + +static inline scaler prescale(int e, int p, int lp) +{ + scaler c; + c.pm = pow10_tab[p - POW10_MIN]; + c.s = -(e + lp + 3); + return c; +} + +typedef uint64_t unrounded; + +static inline uint64_t ur_floor(unrounded u) { return (u + 0) >> 2; } +static inline uint64_t ur_round(unrounded u) { return (u + 1 + ((u >> 2) & 1)) >> 2; } +static inline uint64_t ur_ceil(unrounded u) { return (u + 3) >> 2; } + +static inline unrounded ur_div(unrounded u, uint64_t d) +{ + return (u / d) | (u & 1) | (u % d != 0 ? 1 : 0); +} + +static inline unrounded ur_nudge(unrounded u, int delta) +{ + return u + delta; +} + +static unrounded uscale(uint64_t x, scaler c) +{ + uint64_t hi, mid, mid2, lo_unused; + mul64(x, c.pm.hi, &hi, &mid); + + uint64_t sticky = 1; + + if (c.s >= 64) { + /* x * 10^p < 2^(c.s-64), i.e. < 1 in the unrounded "1.0 = 4" encoding; + rounds to 0 with sticky=1 */ + return sticky; + } + + uint64_t mask = (1ULL << c.s) - 1; + + if ((hi & mask) == 0) { + mul64(x, c.pm.lo, &mid2, &lo_unused); + sticky = (mid - mid2) > 1 ? 1 : 0; + hi -= (mid < mid2) ? 1 : 0; + } + + return (hi >> c.s) | sticky; +} + +static void unpack64(double f, uint64_t *m, int *e) +{ + const int shift = 64 - 53; + const int min_exp = -(1074 + shift); + union { double d; uint64_t u; } u; + u.d = f; + uint64_t bits = u.u; + int exp; + + *m = (1ULL << 63) | ((bits & ((1ULL << 52) - 1)) << shift); + exp = (int)((bits >> 52) & ((1 << shift) - 1)); + + if (exp == 0) { + int s; + *m &= ~(1ULL << 63); + *e = min_exp; + s = clz64(*m); + *m <<= s; + *e -= s; + } + else { + *e = (exp - 1) + min_exp; + } +} + +static double pack64(uint64_t m, int e) +{ + union { double d; uint64_t u; } u; + if ((m & (1ULL << 52)) == 0) { + /* subnormal */ + u.u = m; + } + else { + int biased = 1075 + e; + if (biased >= 2047) { + /* exponent overflow -> infinity */ + u.u = 0x7FF0000000000000ULL; + } + else { + u.u = (m & ~(1ULL << 52)) | ((uint64_t)biased << 52); + } + } + return u.d; +} + +/* ======== Algorithm helpers ======== */ + +static const uint64_t uint64_pow10[20] = { + 1ULL, 10ULL, 100ULL, 1000ULL, 10000ULL, + 100000ULL, 1000000ULL, 10000000ULL, 100000000ULL, 1000000000ULL, + 10000000000ULL, 100000000000ULL, 1000000000000ULL, 10000000000000ULL, + 100000000000000ULL, 1000000000000000ULL, 10000000000000000ULL, + 100000000000000000ULL, 1000000000000000000ULL, 10000000000000000000ULL, +}; + +static int count_digits(uint64_t d) +{ + if (d == 0) return 1; /* clz64(0) is UB; "0" is one digit */ + int nd = log10_pow2(bits_len64(d)); + return nd + (d >= uint64_pow10[nd] ? 1 : 0); +} + +static void fixed_width(double f, int n, uint64_t *d, int *p) +{ + uint64_t m; + int e; + unrounded u; + + unpack64(f, &m, &e); + *p = n - 1 - log10_pow2(e + 63); + u = uscale(m, prescale(e, *p, log2_pow10(*p))); + *d = ur_round(u); + + if (*d >= uint64_pow10[n]) { + *d = ur_round(ur_div(u, 10)); + (*p)--; + } + *p = -(*p); +} + +/* + * skewed returns floor(log10(3/4 * 2^e)) + * Used for shortest-width printing at powers of 2 + */ +static inline int skewed(int e) +{ + return (e * 631305 - 261663) >> 21; +} + +/* + * trim_zeros removes trailing zeros from x * 10^p + */ +static void trim_zeros(uint64_t *x, int *p) +{ + const uint64_t inv5 = 0xcccccccccccccccdULL; + const uint64_t inv5p2 = 0x8f5c28f5c28f5c29ULL; + const uint64_t inv5p4 = 0xd288ce703afb7e91ULL; + const uint64_t inv5p8 = 0xc767074b22e90e21ULL; + uint64_t d; + + /* cut 1 zero, or else return */ + d = (*x * inv5); + d = (d >> 1) | (d << 63); + if (d <= UINT64_MAX / 10) { + *x = d; + (*p)++; + } + else { + return; + } + + /* cut 8 zeros */ + d = (*x * inv5p8); + d = (d >> 8) | (d << 56); + if (d <= UINT64_MAX / 100000000ULL) { + *x = d; + *p += 8; + } + + /* cut 4 zeros */ + d = (*x * inv5p4); + d = (d >> 4) | (d << 60); + if (d <= UINT64_MAX / 10000) { + *x = d; + *p += 4; + } + + /* cut 2 zeros */ + d = (*x * inv5p2); + d = (d >> 2) | (d << 62); + if (d <= UINT64_MAX / 100) { + *x = d; + *p += 2; + } + + /* cut 1 zero */ + d = (*x * inv5); + d = (d >> 1) | (d << 63); + if (d <= UINT64_MAX / 10) { + *x = d; + (*p)++; + } +} + +/* + * shortest computes the shortest formatting of f + * Returns d and p such that d * 10^p equals f when parsed + */ +static void shortest(double f, uint64_t *d, int *p) +{ + const int min_exp = -1085; + uint64_t m, min, max; + int e, z, odd, lp; + scaler pre; + uint64_t dmin, dmax; + + unpack64(f, &m, &e); + z = 11; + + if (m == (1ULL << 63) && e > min_exp) { + *p = -skewed(e + z); + min = m - (1ULL << (z - 2)); + } + else { + if (e < min_exp) { + z = 11 + (min_exp - e); + } + *p = -log10_pow2(e + z); + min = m - (1ULL << (z - 1)); + } + max = m + (1ULL << (z - 1)); + odd = (int)((m >> z) & 1); + + lp = log2_pow10(*p); + pre = prescale(e, *p, lp); + + dmin = ur_ceil(ur_nudge(uscale(min, pre), +odd)); + dmax = ur_floor(ur_nudge(uscale(max, pre), -odd)); + + /* check if a multiple of 10 is in range */ + *d = dmax / 10; + if (*d * 10 >= dmin) { + int new_p = -(*p - 1); + trim_zeros(d, &new_p); + *p = new_p; + return; + } + + /* multiple valid values: pick the rounded one */ + if (dmin < dmax) { + *d = ur_round(uscale(m, pre)); + } + else { + *d = dmin; + } + *p = -(*p); +} + +static double parse_decimal(uint64_t d, int p) +{ + int b, lp, e; + unrounded u; + uint64_t m; + + if (d == 0) return 0.0; + + b = bits_len64(d); + lp = log2_pow10(p); + e = 53 - b - lp; + if (e > 1074) e = 1074; + + u = uscale(d << (64 - b), prescale(e - (64 - b), p, lp)); + m = ur_round(u); + + if (m >= (1ULL << 53)) { + u = (u >> 1) | (u & 1); + m = ur_round(u); + e--; + } + + return pack64(m, -e); +} + +/* ======== Formatting helpers ======== */ + +static const char i2a[] = + "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; + +static void format_base10(char *buf, int nd, uint64_t u) +{ + int idx; + while (nd >= 2) { + nd -= 2; + idx = (int)(u % 100) * 2; + u /= 100; + buf[nd] = i2a[idx]; + buf[nd + 1] = i2a[idx + 1]; + } + if (nd > 0) { + buf[0] = '0' + (char)u; + } +} + +static int +emit_exp(char *s, char e_char, int exp) +{ + char *p = s; + *p++ = e_char; + if (exp < 0) { + *p++ = '-'; + exp = -exp; + } + else { + *p++ = '+'; + } + if (exp >= 100) { + *p++ = '0' + exp / 100; + exp %= 100; + } + *p++ = '0' + exp / 10; + *p++ = '0' + exp % 10; + return (int)(p - s); +} + +/* ======== mrb_format_float ======== */ + +#ifdef MRB_USE_FLOAT32 +#define FLT_MIN_BUF_SIZE 6 +#else +#define FLT_MIN_BUF_SIZE 7 +#endif + +int +mrb_format_float(mrb_float f, char *buf, size_t buf_size, char fmt, int prec, char sign) +{ + char *s = buf; + int buf_remaining = (int)buf_size - 1; + int alt_form = 0; + char e_char; + + if ((uint8_t)fmt & 0x80) { + fmt &= 0x7f; + alt_form = 1; + } + if (buf_size <= (size_t)FLT_MIN_BUF_SIZE) { + if (buf_size >= 2) *s++ = '?'; + if (buf_size >= 1) *s = '\0'; + return buf_size >= 2; + } + if (signbit(f)) { + *s++ = '-'; + f = -f; + } + else if (sign) { + *s++ = sign; + } + buf_remaining -= (int)(s - buf); + + { + char uc = fmt & 0x20; + if (isinf(f)) { + *s++ = 'I' ^ uc; + *s++ = 'N' ^ uc; + *s++ = 'F' ^ uc; + goto done; + } + if (isnan(f)) { + *s++ = 'N' ^ uc; + *s++ = 'A' ^ uc; + *s++ = 'N' ^ uc; + goto done; + } + } + + { + int use_shortest = (prec == -2); + if (prec < 0) prec = 6; + e_char = 'E' | (fmt & 0x20); + fmt |= 0x20; + if (fmt == 'g') { + if (use_shortest) fmt = 'S'; /* shortest mode */ + else if (prec == 0) prec = 1; + } + } + + if (f == 0.0) { + /* zero */ + if (fmt == 'e') { + *s++ = '0'; + if (prec > 0 || alt_form) { + int i; + if (prec > buf_remaining - 5) prec = buf_remaining - 5; + *s++ = '.'; + for (i = 0; i < prec; i++) *s++ = '0'; + } + s += emit_exp(s, e_char, 0); + } + else if (fmt == 'f') { + int i; + if (prec > buf_remaining - 2) prec = buf_remaining - 2; + *s++ = '0'; + if (prec > 0 || alt_form) { + *s++ = '.'; + for (i = 0; i < prec; i++) *s++ = '0'; + } + } + else { /* g */ + *s++ = '0'; + if (alt_form && prec > 1) { + int i; + *s++ = '.'; + for (i = 1; i < prec; i++) *s++ = '0'; + } + } + } + else { + /* nonzero finite */ + uint64_t d; + int p, nd, exp; + char digs[20] = {0}; /* gcc -Wmaybe-uninitialized false positive: count_digits() always returns >= 1 */ + + if (fmt == 'S') { + /* shortest representation for to_s */ + int i; + shortest((double)f, &d, &p); + nd = count_digits(d); + exp = p + nd - 1; + format_base10(digs, nd, d); + +#ifdef MRB_USE_FLOAT32 + if (exp < -4 || exp >= 7) { +#else + if (exp < -4 || exp >= 15) { +#endif + /* e format */ + *s++ = digs[0]; + if (nd > 1) { + *s++ = '.'; + for (i = 1; i < nd; i++) *s++ = digs[i]; + } + s += emit_exp(s, e_char, exp); + } + else { + /* f format */ + if (exp < 0) { + *s++ = '0'; + *s++ = '.'; + for (i = 0; i < -(exp + 1); i++) *s++ = '0'; + for (i = 0; i < nd; i++) *s++ = digs[i]; + } + else { + for (i = 0; i <= exp && i < nd; i++) *s++ = digs[i]; + for (; i <= exp; i++) *s++ = '0'; + if (nd > exp + 1) { + *s++ = '.'; + for (i = exp + 1; i < nd; i++) *s++ = digs[i]; + } + } + } + } + else if (fmt == 'g') { + /* g/G format */ + int fprec, n = prec; + if (n > 18) n = 18; + if (n < 1) n = 1; + fixed_width((double)f, n, &d, &p); + nd = count_digits(d); + exp = p + nd - 1; + + if (exp < -4 || exp >= prec) { + /* use e format with prec-1 fractional digits */ + fprec = prec - 1; + format_base10(digs, nd, d); + *s++ = digs[0]; + if (fprec > 0 || alt_form) { + int i, end = fprec < nd - 1 ? fprec : nd - 1; + *s++ = '.'; + for (i = 0; i < end; i++) *s++ = digs[1 + i]; + for (; i < fprec; i++) *s++ = '0'; + } + if (!alt_form) { + /* strip trailing zeros */ + while (s > buf && s[-1] == '0') s--; + if (s > buf && s[-1] == '.') s--; + } + s += emit_exp(s, e_char, exp); + } + else { + /* use f format with prec-(exp+1) fractional digits */ + int i; + fprec = prec - (exp + 1); + format_base10(digs, nd, d); + /* integer part */ + if (exp < 0) { + *s++ = '0'; + } + else { + for (i = 0; i <= exp && i < nd; i++) *s++ = digs[i]; + for (; i <= exp; i++) *s++ = '0'; + } + if (fprec > 0 || alt_form) { + int frac_avail; + *s++ = '.'; + if (exp < 0) { + int zeros = -(exp + 1); + for (i = 0; i < zeros && i < fprec; i++) *s++ = '0'; + frac_avail = fprec - zeros; + for (i = 0; i < frac_avail && i < nd; i++) *s++ = digs[i]; + for (; i < frac_avail; i++) *s++ = '0'; + } + else { + frac_avail = nd - exp - 1; + if (frac_avail < 0) frac_avail = 0; + for (i = 0; i < frac_avail && i < fprec; i++) *s++ = digs[exp + 1 + i]; + for (; i < fprec; i++) *s++ = '0'; + } + } + if (!alt_form && fprec > 0) { + while (s[-1] == '0') s--; + if (s[-1] == '.') s--; + } + } + } + else if (fmt == 'e') { + /* e/E format */ + int n = prec + 1; + int i; + if (n > 18) n = 18; + if (n < 1) n = 1; + fixed_width((double)f, n, &d, &p); + nd = count_digits(d); + exp = p + nd - 1; + format_base10(digs, nd, d); + *s++ = digs[0]; + if (prec > 0 || alt_form) { + *s++ = '.'; + for (i = 1; i < nd; i++) *s++ = digs[i]; + for (i = nd - 1; i < prec; i++) *s++ = '0'; + } + s += emit_exp(s, e_char, exp); + } + else { + /* f/F format */ + int exp_est, i; + union { double d; uint64_t u; } uf; + uf.d = (double)f; + exp_est = log10_pow2((int)((uf.u >> 52) & 0x7FF) - 1023); + + if (exp_est >= buf_remaining) { + /* too big for f, use e */ + int n = prec + 1; + if (n > 18) n = 18; + if (n < 1) n = 1; + fixed_width((double)f, n, &d, &p); + nd = count_digits(d); + exp = p + nd - 1; + format_base10(digs, nd, d); + *s++ = digs[0]; + if (prec > 0 || alt_form) { + *s++ = '.'; + for (i = 1; i < nd; i++) *s++ = digs[i]; + for (i = nd - 1; i < prec; i++) *s++ = '0'; + } + s += emit_exp(s, e_char, exp); + } + else { + if ((exp_est + prec + 2) > buf_remaining) { + prec = buf_remaining - exp_est - 2; + if (prec < 0) prec = 0; + } + + if (exp_est + prec <= 17) { + /* Direct uscale: compute round(f * 10^prec) */ + uint64_t m; + int e, lp; + unrounded u; + unpack64((double)f, &m, &e); + lp = log2_pow10(prec); + u = uscale(m, prescale(e, prec, lp)); + d = ur_round(u); + nd = count_digits(d); + exp = nd - prec - 1; + } + else { + /* Large number: use fixed_width, pad with zeros */ + int n = 18; + fixed_width((double)f, n, &d, &p); + nd = count_digits(d); + exp = p + nd - 1; + } + format_base10(digs, nd, d); + + if (exp >= 0) { + for (i = 0; i < nd && i <= exp; i++) *s++ = digs[i]; + for (; i <= exp; i++) *s++ = '0'; + if (prec > 0 || alt_form) { + int frac_avail = nd - exp - 1; + if (frac_avail < 0) frac_avail = 0; + *s++ = '.'; + for (i = 0; i < frac_avail && i < prec; i++) *s++ = digs[exp + 1 + i]; + for (; i < prec; i++) *s++ = '0'; + } + } + else { + int zeros, frac_avail; + *s++ = '0'; + if (prec > 0 || alt_form) { + *s++ = '.'; + zeros = -(exp + 1); + for (i = 0; i < zeros && i < prec; i++) *s++ = '0'; + frac_avail = prec - zeros; + for (i = 0; i < frac_avail && i < nd; i++) *s++ = digs[i]; + for (; i < frac_avail; i++) *s++ = '0'; + } + } + } + } + } + +done: + *s = '\0'; + return (int)(s - buf); +} + +/* ======== mrb_read_float ======== */ + +MRB_API mrb_bool +mrb_read_float(const char *str, char **endp, double *fp) +{ + const char *p = str; + const char *a = p; + uint64_t d = 0; + int nd = 0; + int dp = 0; + int trunc = 0; + int sign = 1; + int any_digits = 0; + + while (ISSPACE((unsigned char)*p)) p++; + + if (*p == '-') { sign = -1; p++; } + else if (*p == '+') p++; + + /* skip leading zeros */ + while (*p == '0') { p++; any_digits = 1; } + + /* integer part */ + while (ISDIGIT(*p)) { + if (nd < 19) { + d = d * 10 + (*p - '0'); + nd++; + } + else { + trunc++; + } + any_digits = 1; + a = ++p; + } + + /* fractional part */ + if (*p == '.') { + p++; + if (nd == 0) { + while (*p == '0') { dp++; p++; any_digits = 1; } + } + while (ISDIGIT(*p)) { + if (nd < 19) { + d = d * 10 + (*p - '0'); + nd++; + dp++; + } + any_digits = 1; + p++; + } + a = p; + } + + if (!any_digits) { + if (endp) *endp = (char*)str; + *fp = 0.0; + return FALSE; + } + + /* exponent (optional). On malformed exponent ("5e", "5e+"), keep `a` + pointing at the 'e' so the caller's *endp reflects where parsing + stopped, and fall through using the mantissa-only result. */ + int final_p = trunc - dp; + if ((*p | 32) == 'e') { + int e = 0; + int exp_sign = 1; + p++; + if (*p == '-') { exp_sign = -1; p++; } + else if (*p == '+') p++; + + if (ISDIGIT(*p)) { + while (ISDIGIT(*p)) { + if (e < 10000) e = e * 10 + (*p - '0'); + p++; + } + final_p += e * exp_sign; + a = p; + } + } + + { + double res; + if (d == 0) { + res = 0.0; + } + else if (final_p > 308) { + res = HUGE_VAL; + } + else if (final_p < POW10_MIN) { + res = 0.0; + } + else { + res = parse_decimal(d, final_p); + } + if (sign < 0) res = -res; + *fp = res; + } + + if (endp) *endp = (char*)a; + return TRUE; +} + +#endif /* MRB_NO_FLOAT */ diff --git a/src/gc.c b/src/gc.c index c0751102f9..f8021c6b16 100644 --- a/src/gc.c +++ b/src/gc.c @@ -22,12 +22,16 @@ #include #include #include -#include #ifdef MRB_GC_STRESS #include #endif +#ifdef MRB_USE_TASK_SCHEDULER +/* Forward declaration - actual implementation in task.c */ +void mrb_task_mark_all(mrb_state *mrb); +#endif + /* = Tri-color Incremental Garbage Collection @@ -88,7 +92,7 @@ == Generational Mode - mruby's GC offers an Generational Mode while re-using the tri-color GC + mruby's GC offers an Generational Mode while reusing the tri-color GC infrastructure. It will treat the Black objects as Old objects after each sweep phase, instead of painting them White. The key ideas are still the same as traditional generational GC: @@ -107,23 +111,37 @@ */ +typedef struct RVALUE RVALUE; + struct free_obj { MRB_OBJECT_HEADER; - struct RBasic *next; + RVALUE *next; }; struct RVALUE_initializer { MRB_OBJECT_HEADER; - char padding[sizeof(void*) * 4 - sizeof(uint32_t)]; +#if defined(MRB_WORD_BOXING) && defined(MRB_32BIT) && defined(MRB_USE_FLOAT32) && !defined(MRB_WORDBOX_NO_INLINE_FLOAT) + /* inline float word boxing needs 8-byte aligned objects; + pad RVALUE to 24 bytes (multiple of 8) on 32-bit */ + char padding[sizeof(void*) * 4]; +#else + char padding[sizeof(void*) * 3]; +#endif }; -typedef struct { +struct RVALUE { union { struct RVALUE_initializer init; /* must be first member to ensure initialization */ struct free_obj free; struct RBasic basic; struct RObject object; struct RClass klass; +#if defined(MRB_WORD_BOXING) || (defined(MRB_NAN_BOXING) && defined(MRB_INT64)) + struct RInteger integer; +#endif +#if defined(MRB_WORD_BOXING) && !defined(MRB_NO_FLOAT) && defined(MRB_WORDBOX_NO_INLINE_FLOAT) + struct RFloat flt; +#endif struct RString string; struct RArray array; struct RHash hash; @@ -136,50 +154,7 @@ typedef struct { struct RException exc; struct RBreak brk; } as; -} RVALUE; - -#ifdef GC_PROFILE -#include -#include - -static double program_invoke_time = 0; -static double gc_time = 0; -static double gc_total_time = 0; - -static double -gettimeofday_time(void) -{ - struct timeval tv; - gettimeofday(&tv, NULL); - return tv.tv_sec + tv.tv_usec * 1e-6; -} - -#define GC_INVOKE_TIME_REPORT(with) do {\ - fprintf(stderr, "%s\n", with);\ - fprintf(stderr, "gc_invoke: %19.3f\n", gettimeofday_time() - program_invoke_time);\ - fprintf(stderr, "is_generational: %d\n", is_generational(gc));\ - fprintf(stderr, "is_major_gc: %d\n", is_major_gc(gc));\ -} while(0) - -#define GC_TIME_START do {\ - gc_time = gettimeofday_time();\ -} while(0) - -#define GC_TIME_STOP_AND_REPORT do {\ - gc_time = gettimeofday_time() - gc_time;\ - gc_total_time += gc_time;\ - fprintf(stderr, "gc_state: %d\n", gc->state);\ - fprintf(stderr, "live: %zu\n", gc->live);\ - fprintf(stderr, "majorgc_old_threshold: %zu\n", gc->majorgc_old_threshold);\ - fprintf(stderr, "gc_threshold: %zu\n", gc->threshold);\ - fprintf(stderr, "gc_time: %30.20f\n", gc_time);\ - fprintf(stderr, "gc_total_time: %30.20f\n\n", gc_total_time);\ -} while(0) -#else -#define GC_INVOKE_TIME_REPORT(s) -#define GC_TIME_START -#define GC_TIME_STOP_AND_REPORT -#endif +}; #ifdef GC_DEBUG #define DEBUG(x) (x) @@ -191,35 +166,45 @@ gettimeofday_time(void) #define MRB_HEAP_PAGE_SIZE 1024 #endif +typedef struct mrb_heap_page { + RVALUE *freelist; + struct mrb_heap_page *next; + struct mrb_heap_page *free_next; + mrb_bool old:1; + mrb_bool region:1; /* from contiguous region, not malloc */ + RVALUE objects[MRB_HEAP_PAGE_SIZE]; +} mrb_heap_page; + +typedef struct mrb_heap_region { + struct mrb_heap_region *next; + uint8_t *base; /* start of user buffer */ + size_t size; /* buffer size in bytes */ + uint16_t page_count; /* pages carved from region */ +} mrb_heap_region; + #define GC_STEP_SIZE 1024 /* white: 001 or 010, black: 100, gray: 000, red:111 */ #define GC_GRAY 0 #define GC_WHITE_A 1 -#define GC_WHITE_B (1 << 1) -#define GC_BLACK (1 << 2) +#define GC_WHITE_B 2 +#define GC_BLACK 4 #define GC_RED MRB_GC_RED #define GC_WHITES (GC_WHITE_A | GC_WHITE_B) #define GC_COLOR_MASK 7 mrb_static_assert(MRB_GC_RED <= GC_COLOR_MASK); -#define paint_gray(o) ((o)->color = GC_GRAY) -#define paint_black(o) ((o)->color = GC_BLACK) -#define paint_white(o) ((o)->color = GC_WHITES) -#define paint_partial_white(s, o) ((o)->color = (s)->current_white_part) -#define is_gray(o) ((o)->color == GC_GRAY) -#define is_white(o) ((o)->color & GC_WHITES) -#define is_black(o) ((o)->color == GC_BLACK) -#define is_red(o) ((o)->color == GC_RED) +#define paint_gray(o) ((o)->gc_color = GC_GRAY) +#define paint_black(o) ((o)->gc_color = GC_BLACK) +#define paint_white(o) ((o)->gc_color = GC_WHITES) +#define paint_partial_white(s, o) ((o)->gc_color = (s)->current_white_part) +#define is_gray(o) ((o)->gc_color == GC_GRAY) +#define is_white(o) ((o)->gc_color & GC_WHITES) +#define is_black(o) ((o)->gc_color == GC_BLACK) +#define is_red(o) ((o)->gc_color == GC_RED) #define flip_white_part(s) ((s)->current_white_part = other_white_part(s)) #define other_white_part(s) ((s)->current_white_part ^ GC_WHITES) -#define is_dead(s, o) (((o)->color & other_white_part(s) & GC_WHITES) || (o)->tt == MRB_TT_FREE) - -/* We have removed `objects[]` from `mrb_heap_page` since it was not C++ - * compatible. Using array index to get pointer after structure instead. */ - -/* #define objects(p) ((RVALUE *)p->objects) */ -#define objects(p) ((RVALUE *)&p[1]) +#define is_dead(s, o) (((o)->gc_color & other_white_part(s) & GC_WHITES) || (o)->tt == MRB_TT_FREE) mrb_noreturn void mrb_raise_nomemory(mrb_state *mrb); @@ -229,12 +214,25 @@ mrb_realloc_simple(mrb_state *mrb, void *p, size_t len) void *p2; #if defined(MRB_GC_STRESS) && defined(MRB_DEBUG) - mrb_full_gc(mrb); + if (mrb->gc.state != MRB_GC_STATE_SWEEP) { + mrb_full_gc(mrb); + } #endif - p2 = (mrb->allocf)(mrb, p, len, mrb->allocf_ud); - if (!p2 && len > 0 && mrb->gc.heaps) { + p2 = mrb_basic_alloc_func(p, len); + if (!p2 && len > 0 && mrb->gc.heaps && mrb->gc.state != MRB_GC_STATE_SWEEP) { mrb_full_gc(mrb); - p2 = (mrb->allocf)(mrb, p, len, mrb->allocf_ud); + p2 = mrb_basic_alloc_func(p, len); + } + + if (p2 && len > 0) { + mrb->gc.malloc_increase += len; + if (mrb->gc.malloc_threshold > 0 && + mrb->gc.malloc_increase >= mrb->gc.malloc_threshold && + mrb->gc.state == MRB_GC_STATE_ROOT && + !mrb->gc.disabled && !mrb->gc.iterating) { + mrb->gc.malloc_increase = 0; + mrb_incremental_gc(mrb); + } } return p2; @@ -275,16 +273,17 @@ mrb_calloc(mrb_state *mrb, size_t nelem, size_t len) { void *p; - if (nelem > 0 && len > 0 && - nelem <= SIZE_MAX / len) { - size_t size; - size = nelem * len; + if (nelem == 0 || len == 0) { + p = NULL; + } + else if (nelem <= SIZE_MAX / len) { + size_t size = nelem * len; p = mrb_malloc(mrb, size); memset(p, 0, size); } else { - p = NULL; + mrb_raise(mrb, E_ARGUMENT_ERROR, "memory allocation overflow"); } return p; @@ -293,11 +292,11 @@ mrb_calloc(mrb_state *mrb, size_t nelem, size_t len) MRB_API void mrb_free(mrb_state *mrb, void *p) { - (mrb->allocf)(mrb, p, 0, mrb->allocf_ud); + mrb_basic_alloc_func(p, 0); } MRB_API void* -mrb_alloca(mrb_state *mrb, size_t size) +mrb_temp_alloc(mrb_state *mrb, size_t size) { struct RString *s; s = MRB_OBJ_ALLOC(mrb, MRB_TT_STRING, NULL); @@ -305,16 +304,27 @@ mrb_alloca(mrb_state *mrb, size_t size) } static mrb_bool -heap_p(mrb_gc *gc, struct RBasic *object) +heap_p(mrb_gc *gc, const struct RBasic *object) { mrb_heap_page* page; + mrb_heap_region *region; + + /* fast path: check contiguous regions via arithmetic */ + for (region = gc->regions; region; region = region->next) { + uintptr_t addr = (uintptr_t)object; + uintptr_t base = (uintptr_t)region->base; + uintptr_t end = base + (size_t)region->page_count * sizeof(mrb_heap_page); + if (addr >= base && addr < end) { + return TRUE; + } + } page = gc->heaps; while (page) { RVALUE *p; - p = objects(page); - if (&p[0].as.basic <= object && object <= &p[MRB_HEAP_PAGE_SIZE - 1].as.basic) { + p = page->objects; + if ((uintptr_t)object - (uintptr_t)p <= (MRB_HEAP_PAGE_SIZE - 1) * sizeof(RVALUE)) { return TRUE; } page = page->next; @@ -323,7 +333,8 @@ heap_p(mrb_gc *gc, struct RBasic *object) } MRB_API mrb_bool -mrb_object_dead_p(mrb_state *mrb, struct RBasic *object) { +mrb_object_dead_p(mrb_state *mrb, struct RBasic *object) +{ mrb_gc *gc = &mrb->gc; if (!heap_p(gc, object)) return TRUE; return is_dead(gc, object); @@ -333,63 +344,67 @@ static void link_heap_page(mrb_gc *gc, mrb_heap_page *page) { page->next = gc->heaps; - if (gc->heaps) - gc->heaps->prev = page; gc->heaps = page; -} - -static void -unlink_heap_page(mrb_gc *gc, mrb_heap_page *page) -{ - if (page->prev) - page->prev->next = page->next; - if (page->next) - page->next->prev = page->prev; - if (gc->heaps == page) - gc->heaps = page->next; - page->prev = NULL; - page->next = NULL; -} - -static void -link_free_heap_page(mrb_gc *gc, mrb_heap_page *page) -{ page->free_next = gc->free_heaps; - if (gc->free_heaps) { - gc->free_heaps->free_prev = page; - } gc->free_heaps = page; } static void -unlink_free_heap_page(mrb_gc *gc, mrb_heap_page *page) -{ - if (page->free_prev) - page->free_prev->free_next = page->free_next; - if (page->free_next) - page->free_next->free_prev = page->free_prev; - if (gc->free_heaps == page) - gc->free_heaps = page->free_next; - page->free_prev = NULL; - page->free_next = NULL; -} - -static void -add_heap(mrb_state *mrb, mrb_gc *gc) +init_heap_page(mrb_heap_page *page) { - mrb_heap_page *page = (mrb_heap_page *)mrb_calloc(mrb, 1, sizeof(mrb_heap_page) + MRB_HEAP_PAGE_SIZE * sizeof(RVALUE)); RVALUE *p, *e; - struct RBasic *prev = NULL; + RVALUE *prev = NULL; - for (p = objects(page), e=p+MRB_HEAP_PAGE_SIZE; pobjects, e=p+MRB_HEAP_PAGE_SIZE; pas.free.tt = MRB_TT_FREE; p->as.free.next = prev; - prev = &p->as.basic; + prev = p; } page->freelist = prev; +} +static void +add_heap(mrb_state *mrb, mrb_gc *gc) +{ + mrb_heap_page *page = (mrb_heap_page*)mrb_calloc(mrb, 1, sizeof(mrb_heap_page)); + init_heap_page(page); link_heap_page(gc, page); - link_free_heap_page(gc, page); +} + +MRB_API int +mrb_gc_add_region(mrb_state *mrb, void *start, size_t size) +{ + mrb_gc *gc = &mrb->gc; + uint8_t *base = (uint8_t*)start; + mrb_heap_region *region; + uint16_t page_count; + uint16_t i; + + /* align base to pointer size */ + uintptr_t align = sizeof(void*); + uintptr_t offset = ((uintptr_t)base + align - 1) & ~(align - 1); + size -= (size_t)(offset - (uintptr_t)base); + base = (uint8_t*)offset; + + page_count = (uint16_t)(size / sizeof(mrb_heap_page)); + if (page_count == 0) return 0; + + region = (mrb_heap_region*)mrb_malloc(mrb, sizeof(mrb_heap_region)); + region->base = base; + region->size = size; + region->page_count = page_count; + region->next = gc->regions; + gc->regions = region; + + /* carve pages from the contiguous buffer */ + for (i = 0; i < page_count; i++) { + mrb_heap_page *page = (mrb_heap_page*)(base + (size_t)i * sizeof(mrb_heap_page)); + memset(page, 0, sizeof(mrb_heap_page)); + page->region = TRUE; + init_heap_page(page); + link_heap_page(gc, page); + } + return page_count; } #define DEFAULT_GC_INTERVAL_RATIO 200 @@ -411,6 +426,7 @@ mrb_gc_init(mrb_state *mrb, mrb_gc *gc) gc->current_white_part = GC_WHITE_A; gc->heaps = NULL; gc->free_heaps = NULL; + gc->regions = NULL; add_heap(mrb, gc); gc->interval_ratio = DEFAULT_GC_INTERVAL_RATIO; gc->step_ratio = DEFAULT_GC_STEP_RATIO; @@ -418,13 +434,9 @@ mrb_gc_init(mrb_state *mrb, mrb_gc *gc) gc->generational = TRUE; gc->full = TRUE; #endif - -#ifdef GC_PROFILE - program_invoke_time = gettimeofday_time(); -#endif } -static void obj_free(mrb_state *mrb, struct RBasic *obj, int end); +static void obj_free(mrb_state *mrb, struct RBasic *obj, mrb_bool end); static void free_heap(mrb_state *mrb, mrb_gc *gc) @@ -436,11 +448,13 @@ free_heap(mrb_state *mrb, mrb_gc *gc) while (page) { tmp = page; page = page->next; - for (p = objects(tmp), e=p+MRB_HEAP_PAGE_SIZE; pobjects, e=p+MRB_HEAP_PAGE_SIZE; pas.free.tt != MRB_TT_FREE) obj_free(mrb, &p->as.basic, TRUE); } - mrb_free(mrb, tmp); + if (!tmp->region) { + mrb_free(mrb, tmp); + } } } @@ -448,13 +462,22 @@ void mrb_gc_destroy(mrb_state *mrb, mrb_gc *gc) { free_heap(mrb, gc); + /* free region descriptors (buffer memory belongs to the caller) */ + { + mrb_heap_region *region = gc->regions; + while (region) { + mrb_heap_region *next = region->next; + mrb_free(mrb, region); + region = next; + } + } #ifndef MRB_GC_FIXED_ARENA mrb_free(mrb, gc->arena); #endif } static void -gc_protect(mrb_state *mrb, mrb_gc *gc, struct RBasic *p) +gc_arena_keep(mrb_state *mrb, mrb_gc *gc) { #ifdef MRB_GC_FIXED_ARENA if (gc->arena_idx >= MRB_GC_ARENA_SIZE) { @@ -469,6 +492,16 @@ gc_protect(mrb_state *mrb, mrb_gc *gc, struct RBasic *p) gc->arena = (struct RBasic**)mrb_realloc(mrb, gc->arena, sizeof(struct RBasic*)*newcapa); gc->arena_capa = newcapa; } +#endif +} + +static inline void +gc_protect(mrb_state *mrb, mrb_gc *gc, struct RBasic *p) +{ +#ifdef MRB_GC_FIXED_ARENA + mrb_assert(gc->arena_idx < MRB_GC_ARENA_SIZE); +#else + mrb_assert(gc->arena_idx < gc->arena_capa); #endif gc->arena[gc->arena_idx++] = p; } @@ -480,6 +513,7 @@ mrb_gc_protect(mrb_state *mrb, mrb_value obj) if (mrb_immediate_p(obj)) return; struct RBasic *p = mrb_basic_ptr(obj); if (is_red(p)) return; + gc_arena_keep(mrb, &mrb->gc); gc_protect(mrb, &mrb->gc, p); } @@ -496,57 +530,80 @@ mrb_gc_protect(mrb_state *mrb, mrb_value obj) MRB_API void mrb_gc_register(mrb_state *mrb, mrb_value obj) { - mrb_sym root; - mrb_value table; - if (mrb_immediate_p(obj)) return; - root = GC_ROOT_SYM; - table = mrb_gv_get(mrb, root); - if (mrb_nil_p(table) || !mrb_array_p(table)) { + mrb_value table = mrb_gv_get(mrb, GC_ROOT_SYM); + int ai = mrb_gc_arena_save(mrb); + mrb_gc_protect(mrb, obj); + if (!mrb_array_p(table)) { table = mrb_ary_new(mrb); - mrb_gv_set(mrb, root, table); + mrb_obj_ptr(table)->c = NULL; /* hide from ObjectSpace.each_object */ + mrb_gv_set(mrb, GC_ROOT_SYM, table); } mrb_ary_push(mrb, table, obj); + mrb_gc_arena_restore(mrb, ai); } /* mrb_gc_unregister() removes the object from GC root. */ MRB_API void mrb_gc_unregister(mrb_state *mrb, mrb_value obj) { - mrb_sym root; - mrb_value table; - struct RArray *a; - mrb_int i; - if (mrb_immediate_p(obj)) return; - root = GC_ROOT_SYM; - table = mrb_gv_get(mrb, root); - if (mrb_nil_p(table)) return; - if (!mrb_array_p(table)) { - mrb_gv_set(mrb, root, mrb_nil_value()); - return; - } - a = mrb_ary_ptr(table); + mrb_value table = mrb_gv_get(mrb, GC_ROOT_SYM); + if (!mrb_array_p(table)) return; + struct RArray *a = mrb_ary_ptr(table); mrb_ary_modify(mrb, a); - for (i = 0; i < ARY_LEN(a); i++) { - if (mrb_ptr(ARY_PTR(a)[i]) == mrb_ptr(obj)) { - mrb_int len = ARY_LEN(a)-1; - mrb_value *ptr = ARY_PTR(a); - - ARY_SET_LEN(a, len); - memmove(&ptr[i], &ptr[i + 1], (len - i) * sizeof(mrb_value)); - break; + mrb_int len = ARY_LEN(a); + mrb_value *ptr = ARY_PTR(a); + mrb_int w = 0; + for (mrb_int r = 0; r < len; r++) { + if (mrb_ptr(ptr[r]) != mrb_ptr(obj)) { + ptr[w++] = ptr[r]; } } + ARY_SET_LEN(a, w); } -MRB_API struct RBasic* -mrb_obj_alloc(mrb_state *mrb, enum mrb_vtype ttype, struct RClass *cls) +/* Core allocation without type validation. + Used internally by mrb_proc_new, mrb_env_new, etc. */ +struct RBasic* +mrb_obj_alloc_core(mrb_state *mrb, enum mrb_vtype ttype, struct RClass *cls) { - struct RBasic *p; - static const RVALUE RVALUE_zero = { { { NULL, NULL, MRB_TT_FALSE } } }; + static const RVALUE RVALUE_zero = { { { NULL, MRB_TT_FALSE } } }; mrb_gc *gc = &mrb->gc; +#ifdef MRB_GC_STRESS + mrb_full_gc(mrb); +#endif + gc->gc_debt++; + if (gc->gc_debt > 0) { + mrb_incremental_gc(mrb); + } + gc_arena_keep(mrb, gc); + if (gc->free_heaps == NULL) { + add_heap(mrb, gc); + } + + RVALUE *p = gc->free_heaps->freelist; + gc->free_heaps->freelist = p->as.free.next; + if (gc->free_heaps->freelist == NULL) { + gc->free_heaps = gc->free_heaps->free_next; + } + + gc->live++; + gc_protect(mrb, gc, &p->as.basic); + *p = RVALUE_zero; + p->as.basic.tt = ttype; + p->as.basic.c = cls; + if (ttype == MRB_TT_OBJECT) { + p->as.basic.flags |= MRB_FL_OBJ_SHAPED; + } + paint_partial_white(gc, &p->as.basic); + return &p->as.basic; +} + +MRB_API struct RBasic* +mrb_obj_alloc(mrb_state *mrb, enum mrb_vtype ttype, struct RClass *cls) +{ if (cls) { enum mrb_vtype tt; @@ -560,46 +617,23 @@ mrb_obj_alloc(mrb_state *mrb, enum mrb_vtype ttype, struct RClass *cls) mrb_raise(mrb, E_TYPE_ERROR, "allocation failure"); } tt = MRB_INSTANCE_TT(cls); - if (tt != MRB_TT_FALSE && - ttype != MRB_TT_SCLASS && + if (ttype != MRB_TT_SCLASS && ttype != MRB_TT_ICLASS && ttype != MRB_TT_ENV && ttype != MRB_TT_BIGINT && - ttype != tt) { + ttype != tt && + !(cls == mrb->object_class && (ttype == MRB_TT_CPTR || ttype == MRB_TT_CDATA || ttype == MRB_TT_ISTRUCT))) { mrb_raisef(mrb, E_TYPE_ERROR, "allocation failure of %C", cls); } } if (ttype <= MRB_TT_FREE) { mrb_raisef(mrb, E_TYPE_ERROR, "allocation failure of %C (type %d)", cls, (int)ttype); } - -#ifdef MRB_GC_STRESS - mrb_full_gc(mrb); -#endif - if (gc->threshold < gc->live) { - mrb_incremental_gc(mrb); - } - if (gc->free_heaps == NULL) { - add_heap(mrb, gc); - } - - p = gc->free_heaps->freelist; - gc->free_heaps->freelist = ((struct free_obj*)p)->next; - if (gc->free_heaps->freelist == NULL) { - unlink_free_heap_page(gc, gc->free_heaps); - } - - gc->live++; - gc_protect(mrb, gc, p); - *(RVALUE *)p = RVALUE_zero; - p->tt = ttype; - p->c = cls; - paint_partial_white(gc, p); - return p; + return mrb_obj_alloc_core(mrb, ttype, cls); } static inline void -add_gray_list(mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) +add_gray_list(mrb_gc *gc, struct RBasic *obj) { #ifdef MRB_GC_STRESS if (obj->tt > MRB_TT_MAXDEFINE) { @@ -607,18 +641,18 @@ add_gray_list(mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) } #endif paint_gray(obj); - obj->gcnext = gc->gray_list; - gc->gray_list = obj; + if (gc->gray_stack_top < MRB_GRAY_STACK_SIZE) { + gc->gray_stack[gc->gray_stack_top++] = obj; + } + else { + gc->gray_overflow = TRUE; + } } -mrb_int mrb_ci_nregs(mrb_callinfo *ci); - static void mark_context_stack(mrb_state *mrb, struct mrb_context *c) { - size_t i; - size_t e; - mrb_value nil; + size_t i, e; if (c->stbase == NULL) return; if (c->ci) { @@ -637,9 +671,8 @@ mark_context_stack(mrb_state *mrb, struct mrb_context *c) } } e = c->stend - c->stbase; - nil = mrb_nil_value(); for (; istbase[i] = nil; + SET_NIL_VALUE(c->stbase[i]); } } @@ -669,9 +702,11 @@ mark_context(mrb_state *mrb, struct mrb_context *c) } } -static void +static size_t gc_mark_children(mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) { + size_t children = 0; + mrb_assert(is_gray(obj)); paint_black(obj); mrb_gc_mark(mrb, (struct RBasic*)obj->c); @@ -679,9 +714,11 @@ gc_mark_children(mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) case MRB_TT_ICLASS: { struct RClass *c = (struct RClass*)obj; - if (MRB_FLAG_TEST(c, MRB_FL_CLASS_IS_ORIGIN)) - mrb_gc_mark_mt(mrb, c); + if (MRB_FLAG_TEST(c, MRB_FL_CLASS_IS_ORIGIN)) { + children += mrb_gc_mark_mt(mrb, c); + } mrb_gc_mark(mrb, (struct RBasic*)((struct RClass*)obj)->super); + children++; } break; @@ -693,12 +730,14 @@ gc_mark_children(mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) mrb_gc_mark_mt(mrb, c); mrb_gc_mark(mrb, (struct RBasic*)c->super); + children += mrb_gc_mark_mt(mrb, c); + children++; } /* fall through */ case MRB_TT_OBJECT: - case MRB_TT_DATA: - mrb_gc_mark_iv(mrb, (struct RObject*)obj); + case MRB_TT_CDATA: + children += mrb_gc_mark_iv(mrb, (struct RObject*)obj); break; case MRB_TT_PROC: @@ -707,21 +746,21 @@ gc_mark_children(mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) mrb_gc_mark(mrb, (struct RBasic*)p->upper); mrb_gc_mark(mrb, (struct RBasic*)p->e.env); + children+=2; } break; case MRB_TT_ENV: { struct REnv *e = (struct REnv*)obj; - mrb_int i, len; - if (MRB_ENV_ONSTACK_P(e) && e->cxt && e->cxt->fib) { - mrb_gc_mark(mrb, (struct RBasic*)e->cxt->fib); - } - len = MRB_ENV_LEN(e); - for (i=0; istack[i]); } + children += len; } break; @@ -729,7 +768,20 @@ gc_mark_children(mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) { struct mrb_context *c = ((struct RFiber*)obj)->cxt; - if (c) mark_context(mrb, c); + if (!c || c->status == MRB_FIBER_TERMINATED) break; + mark_context(mrb, c); + if (!c->ci) break; + + /* mark stack */ + size_t i = c->ci->stack - c->stbase; + i += mrb_ci_nregs(c->ci); + if (c->stbase + i > c->stend) i = c->stend - c->stbase; + children += i; + + /* mark closure */ + if (c->cibase) { + children += c->ci - c->cibase + 1; + } } break; @@ -737,18 +789,19 @@ gc_mark_children(mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) case MRB_TT_ARRAY: { struct RArray *a = (struct RArray*)obj; - size_t i, e=ARY_LEN(a); + size_t len = ARY_LEN(a); mrb_value *p = ARY_PTR(a); - for (i=0; imesg) { mrb_gc_mark(mrb, (struct RBasic*)((struct RException*)obj)->mesg); + children++; + } + if (((struct RException*)obj)->backtrace) { + mrb_gc_mark(mrb, (struct RBasic*)((struct RException*)obj)->backtrace); + children++; } - mrb_gc_mark(mrb, (struct RBasic*)((struct RException*)obj)->backtrace); break; + case MRB_TT_BACKTRACE: + children += ((struct RBacktrace*)obj)->len; + break; + +#if defined(MRB_USE_RATIONAL) && defined(MRB_USE_BIGINT) + case MRB_TT_RATIONAL: + children += mrb_rational_mark(mrb, obj); + break; +#endif +#ifdef MRB_USE_SET + case MRB_TT_SET: + children += mrb_gc_mark_set(mrb, obj); + break; +#endif + default: break; } + return children; } MRB_API void @@ -790,11 +863,36 @@ mrb_gc_mark(mrb_state *mrb, struct RBasic *obj) if (!is_white(obj)) return; if (is_red(obj)) return; mrb_assert((obj)->tt != MRB_TT_FREE); - add_gray_list(mrb, &mrb->gc, obj); + switch (obj->tt) { + case MRB_TT_STRING: + /* most strings have no children; handle fshared inline */ + paint_black(obj); + mrb_gc_mark(mrb, (struct RBasic*)obj->c); + if (RSTR_FSHARED_P(obj)) { + struct RString *s = (struct RString*)obj; + mrb_gc_mark(mrb, (struct RBasic*)s->as.heap.aux.fshared); + } + return; + case MRB_TT_INTEGER: + case MRB_TT_CPTR: +#ifdef MRB_USE_BIGINT + case MRB_TT_BIGINT: +#endif +#ifdef MRB_USE_COMPLEX + case MRB_TT_COMPLEX: +#endif + /* leaf types: no children besides class */ + paint_black(obj); + mrb_gc_mark(mrb, (struct RBasic*)obj->c); + return; + default: + break; + } + add_gray_list(&mrb->gc, obj); } static void -obj_free(mrb_state *mrb, struct RBasic *obj, int end) +obj_free(mrb_state *mrb, struct RBasic *obj, mrb_bool end) { DEBUG(fprintf(stderr, "obj_free(%p,tt=%d)\n",obj,obj->tt)); switch (obj->tt) { @@ -824,13 +922,9 @@ obj_free(mrb_state *mrb, struct RBasic *obj, int end) { struct REnv *e = (struct REnv*)obj; - if (MRB_ENV_ONSTACK_P(e)) { - /* cannot be freed */ - e->stack = NULL; - break; + if (!MRB_ENV_ONSTACK_P(e)) { + mrb_free(mrb, e->stack); } - mrb_free(mrb, e->stack); - e->stack = NULL; } break; @@ -845,7 +939,7 @@ obj_free(mrb_state *mrb, struct RBasic *obj, int end) while (ce <= ci) { struct REnv *e = ci->u.env; - if (e && !mrb_object_dead_p(mrb, (struct RBasic*)e) && + if (e && heap_p(&mrb->gc, (struct RBasic*)e) && !is_dead(&mrb->gc, (struct RBasic*)e) && e->tt == MRB_TT_ENV && MRB_ENV_ONSTACK_P(e)) { mrb_env_unshare(mrb, e, TRUE); } @@ -878,7 +972,7 @@ obj_free(mrb_state *mrb, struct RBasic *obj, int end) { struct RProc *p = (struct RProc*)obj; - if (!MRB_PROC_CFUNC_P(p) && p->body.irep) { + if (!MRB_PROC_CFUNC_P(p) && !MRB_PROC_ALIAS_P(p) && p->body.irep) { mrb_irep *irep = (mrb_irep*)p->body.irep; if (end) { mrb_irep_cutref(mrb, irep); @@ -892,7 +986,13 @@ obj_free(mrb_state *mrb, struct RBasic *obj, int end) mrb_gc_free_range(mrb, ((struct RRange*)obj)); break; - case MRB_TT_DATA: +#ifdef MRB_USE_SET + case MRB_TT_SET: + mrb_gc_free_set(mrb, obj); + break; +#endif + + case MRB_TT_CDATA: { struct RData *d = (struct RData*)obj; if (d->type && d->type->dfree) { @@ -926,6 +1026,17 @@ obj_free(mrb_state *mrb, struct RBasic *obj, int end) break; #endif + case MRB_TT_BACKTRACE: + { + struct RBacktrace *bt = (struct RBacktrace*)obj; + for (size_t i = 0; i < bt->len; i++) { + const mrb_irep *irep = bt->locations[i].irep; + if (irep == NULL) continue; + mrb_irep_decref(mrb, (mrb_irep*)irep); + } + mrb_free(mrb, bt->locations); + } + default: break; } @@ -942,8 +1053,8 @@ root_scan_phase(mrb_state *mrb, mrb_gc *gc) int i, e; if (!is_minor_gc(gc)) { - gc->gray_list = NULL; - gc->atomic_gray_list = NULL; + gc->gray_stack_top = 0; + gc->gray_overflow = FALSE; } mrb_gc_mark_gv(mrb); @@ -985,105 +1096,44 @@ root_scan_phase(mrb_state *mrb, mrb_gc *gc) if (mrb->root_c != mrb->c) { mark_context(mrb, mrb->root_c); } + +#ifdef MRB_USE_TASK_SCHEDULER + /* mark tasks - calls into task.c to mark all task queues */ + mrb_task_mark_all(mrb); +#endif } -/* rough estimation of number of GC marks (non recursive) */ -static size_t -gc_gray_counts(mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) +static void +gc_gray_rescan(mrb_state *mrb, mrb_gc *gc) { - size_t children = 0; - - switch (obj->tt) { - case MRB_TT_ICLASS: - children++; - break; - - case MRB_TT_CLASS: - case MRB_TT_SCLASS: - case MRB_TT_MODULE: - { - struct RClass *c = (struct RClass*)obj; - - children += mrb_gc_mark_iv_size(mrb, (struct RObject*)obj); - children += mrb_gc_mark_mt_size(mrb, c); - children++; - } - break; - - case MRB_TT_OBJECT: - case MRB_TT_DATA: - children += mrb_gc_mark_iv_size(mrb, (struct RObject*)obj); - break; - - case MRB_TT_ENV: - children += MRB_ENV_LEN(obj); - break; - - case MRB_TT_FIBER: - { - struct mrb_context *c = ((struct RFiber*)obj)->cxt; - size_t i; - mrb_callinfo *ci; - - if (!c || c->status == MRB_FIBER_TERMINATED) break; - if (!c->ci) break; - - /* mark stack */ - i = c->ci->stack - c->stbase; - i += mrb_ci_nregs(c->ci); - if (c->stbase + i > c->stend) i = c->stend - c->stbase; - children += i; + mrb_heap_page *page = gc->heaps; - /* mark closure */ - if (c->cibase) { - for (i=0, ci = c->cibase; ci <= c->ci; i++, ci++) - ; + gc->gray_overflow = FALSE; + while (page) { + RVALUE *p = page->objects; + RVALUE *e = p + MRB_HEAP_PAGE_SIZE; + for (; p < e; p++) { + if (is_gray(&p->as.basic) && p->as.basic.tt != MRB_TT_FREE) { + if (gc->gray_stack_top >= MRB_GRAY_STACK_SIZE) { + gc->gray_overflow = TRUE; + return; + } + gc->gray_stack[gc->gray_stack_top++] = &p->as.basic; } - children += i; - } - break; - - case MRB_TT_STRUCT: - case MRB_TT_ARRAY: - { - struct RArray *a = (struct RArray*)obj; - children += ARY_LEN(a); } - break; - - case MRB_TT_HASH: - children += mrb_gc_mark_iv_size(mrb, (struct RObject*)obj); - children += mrb_gc_mark_hash_size(mrb, (struct RHash*)obj); - break; - - case MRB_TT_PROC: - case MRB_TT_RANGE: - case MRB_TT_BREAK: - children+=2; - break; - - case MRB_TT_EXCEPTION: - children += mrb_gc_mark_iv_size(mrb, (struct RObject*)obj); - if (((struct RException*)obj)->mesg) { - children++; - } - if (((struct RException*)obj)->backtrace) { - children++; - } - break; - - default: - break; + page = page->next; } - return children; } static void gc_mark_gray_list(mrb_state *mrb, mrb_gc *gc) { - while (gc->gray_list) { - struct RBasic *obj = gc->gray_list; - gc->gray_list = obj->gcnext; - gc_mark_children(mrb, gc, obj); + for (;;) { + while (gc->gray_stack_top > 0) { + struct RBasic *obj = gc->gray_stack[--gc->gray_stack_top]; + gc_mark_children(mrb, gc, obj); + } + if (!gc->gray_overflow) break; + gc_gray_rescan(mrb, gc); } } @@ -1092,11 +1142,18 @@ incremental_marking_phase(mrb_state *mrb, mrb_gc *gc, size_t limit) { size_t tried_marks = 0; - while (gc->gray_list && tried_marks < limit) { - struct RBasic *obj = gc->gray_list; - gc->gray_list = obj->gcnext; - gc_mark_children(mrb, gc, obj); - tried_marks += gc_gray_counts(mrb, gc, obj); + while (tried_marks < limit) { + if (gc->gray_stack_top > 0) { + struct RBasic *obj = gc->gray_stack[--gc->gray_stack_top]; + tried_marks += gc_mark_children(mrb, gc, obj); + } + else if (gc->gray_overflow) { + gc_gray_rescan(mrb, gc); + if (gc->gray_stack_top == 0) break; + } + else { + break; + } } return tried_marks; @@ -1131,92 +1188,97 @@ final_marking_phase(mrb_state *mrb, mrb_gc *gc) mark_context(mrb, mrb->root_c); } mrb_gc_mark(mrb, (struct RBasic*)mrb->exc); + /* mark pre-allocated exception */ clear_error_object(mrb, mrb->nomem_err); clear_error_object(mrb, mrb->stack_err); #ifdef MRB_GC_FIXED_ARENA clear_error_object(mrb, mrb->arena_err); #endif + gc_mark_gray_list(mrb, gc); - mrb_assert(gc->gray_list == NULL); - gc->gray_list = gc->atomic_gray_list; - gc->atomic_gray_list = NULL; - gc_mark_gray_list(mrb, gc); - mrb_assert(gc->gray_list == NULL); } static void prepare_incremental_sweep(mrb_state *mrb, mrb_gc *gc) { + // mrb_assert(gc->gray_stack_top == 0); gc->state = MRB_GC_STATE_SWEEP; - gc->sweeps = gc->heaps; + gc->sweeps = NULL; gc->live_after_mark = gc->live; } static size_t incremental_sweep_phase(mrb_state *mrb, mrb_gc *gc, size_t limit) { - mrb_heap_page *page = gc->sweeps; + mrb_heap_page *prev = gc->sweeps; + mrb_heap_page *page = prev ? prev->next : gc->heaps; size_t tried_sweep = 0; while (page && (tried_sweep < limit)) { - RVALUE *p = objects(page); - RVALUE *e = p + MRB_HEAP_PAGE_SIZE; size_t freed = 0; mrb_bool dead_slot = TRUE; - mrb_bool full = (page->freelist == NULL); if (is_minor_gc(gc) && page->old) { /* skip a slot which doesn't contain any young object */ - p = e; dead_slot = FALSE; } - while (pas.basic)) { - if (p->as.basic.tt != MRB_TT_FREE) { - obj_free(mrb, &p->as.basic, FALSE); - if (p->as.basic.tt == MRB_TT_FREE) { + else { + RVALUE *p = page->objects; + RVALUE *e = p + MRB_HEAP_PAGE_SIZE; + while (pas.basic)) { + if (p->as.basic.tt != MRB_TT_FREE) { + obj_free(mrb, &p->as.basic, FALSE); + mrb_assert(p->as.basic.tt == MRB_TT_FREE); p->as.free.next = page->freelist; - page->freelist = (struct RBasic*)p; + page->freelist = p; freed++; } - else { - dead_slot = FALSE; - } } + else { + if (!is_generational(gc)) + paint_partial_white(gc, &p->as.basic); /* next gc target */ + dead_slot = FALSE; + } + p++; } - else { - if (!is_generational(gc)) - paint_partial_white(gc, &p->as.basic); /* next gc target */ - dead_slot = FALSE; - } - p++; } /* free dead slot */ - if (dead_slot && freed < MRB_HEAP_PAGE_SIZE) { + if (dead_slot && !page->region) { mrb_heap_page *next = page->next; - unlink_heap_page(gc, page); - unlink_free_heap_page(gc, page); + if (prev) prev->next = next; + if (gc->heaps == page) + gc->heaps = page->next; + mrb_free(mrb, page); page = next; } else { - if (full && freed > 0) { - link_free_heap_page(gc, page); - } if (page->freelist == NULL && is_minor_gc(gc)) page->old = TRUE; else page->old = FALSE; + prev = page; page = page->next; } tried_sweep += MRB_HEAP_PAGE_SIZE; gc->live -= freed; gc->live_after_mark -= freed; } - gc->sweeps = page; + gc->sweeps = prev; + + /* rebuild free_heaps link */ + gc->free_heaps = NULL; + for (mrb_heap_page *p = gc->heaps; p; p=p->next) { + if (p->freelist) { + p->free_next = gc->free_heaps; + gc->free_heaps = p; + } + } + return tried_sweep; } @@ -1230,7 +1292,7 @@ incremental_gc(mrb_state *mrb, mrb_gc *gc, size_t limit) flip_white_part(gc); return 0; case MRB_GC_STATE_MARK: - if (gc->gray_list) { + if (gc->gray_stack_top > 0 || gc->gray_overflow) { return incremental_marking_phase(mrb, gc, limit); } else { @@ -1265,35 +1327,35 @@ incremental_gc_step(mrb_state *mrb, mrb_gc *gc) { size_t limit = 0, result = 0; limit = (GC_STEP_SIZE/100) * gc->step_ratio; + if (gc->step_limit > 0 && limit > gc->step_limit) { + limit = gc->step_limit; + } while (result < limit) { result += incremental_gc(mrb, gc, limit); if (gc->state == MRB_GC_STATE_ROOT) break; } - gc->threshold = gc->live + GC_STEP_SIZE; + gc->gc_debt -= (mrb_int)result; } static void clear_all_old(mrb_state *mrb, mrb_gc *gc) { - mrb_bool origin_mode = gc->generational; - mrb_assert(is_generational(gc)); - if (is_major_gc(gc)) { + if (gc->full) { /* finish the half baked GC */ incremental_gc_finish(mrb, gc); } - /* Sweep the dead objects, then reset all the live objects * (including all the old objects, of course) to white. */ gc->generational = FALSE; prepare_incremental_sweep(mrb, gc); incremental_gc_finish(mrb, gc); - gc->generational = origin_mode; - + gc->generational = TRUE; /* The gray objects have already been painted as white */ - gc->atomic_gray_list = gc->gray_list = NULL; + gc->gray_stack_top = 0; + gc->gray_overflow = FALSE; } MRB_API void @@ -1303,21 +1365,31 @@ mrb_incremental_gc(mrb_state *mrb) if (gc->disabled || gc->iterating) return; - GC_INVOKE_TIME_REPORT("mrb_incremental_gc()"); - GC_TIME_START; - if (is_minor_gc(gc)) { +#ifdef MRB_GC_STATS + gc->gc_total_count++; + gc->minor_gc_count++; +#endif incremental_gc_finish(mrb, gc); } else { +#ifdef MRB_GC_STATS + if (gc->state == MRB_GC_STATE_ROOT) { + gc->gc_total_count++; + gc->major_gc_count++; + } +#endif incremental_gc_step(mrb, gc); } if (gc->state == MRB_GC_STATE_ROOT) { + gc->malloc_increase = 0; mrb_assert(gc->live >= gc->live_after_mark); - gc->threshold = (gc->live_after_mark/100) * gc->interval_ratio; - if (gc->threshold < GC_STEP_SIZE) { - gc->threshold = GC_STEP_SIZE; + { + mrb_int credit = (mrb_int)((gc->live_after_mark/100) * gc->interval_ratio) + - (mrb_int)gc->live_after_mark; + if (credit < (mrb_int)GC_STEP_SIZE) credit = (mrb_int)GC_STEP_SIZE; + gc->gc_debt = -credit; } if (is_major_gc(gc)) { @@ -1325,7 +1397,7 @@ mrb_incremental_gc(mrb_state *mrb) gc->full = FALSE; if (threshold < MAJOR_GC_TOOMANY) { - gc->majorgc_old_threshold = threshold; + gc->oldgen_threshold = threshold; } else { /* too many objects allocated during incremental GC, */ @@ -1333,15 +1405,11 @@ mrb_incremental_gc(mrb_state *mrb) mrb_full_gc(mrb); } } - else if (is_minor_gc(gc)) { - if (gc->live > gc->majorgc_old_threshold) { - clear_all_old(mrb, gc); - gc->full = TRUE; - } + else if (is_minor_gc(gc) && gc->live > gc->oldgen_threshold) { + clear_all_old(mrb, gc); + gc->full = TRUE; } } - - GC_TIME_STOP_AND_REPORT; } /* Perform a full gc cycle */ @@ -1353,9 +1421,10 @@ mrb_full_gc(mrb_state *mrb) if (!mrb->c) return; if (gc->disabled || gc->iterating) return; - GC_INVOKE_TIME_REPORT("mrb_full_gc()"); - GC_TIME_START; - +#ifdef MRB_GC_STATS + gc->gc_total_count++; + gc->major_gc_count++; +#endif if (is_generational(gc)) { /* clear all the old objects back to young */ clear_all_old(mrb, gc); @@ -1367,17 +1436,21 @@ mrb_full_gc(mrb_state *mrb) } incremental_gc_finish(mrb, gc); - gc->threshold = (gc->live_after_mark/100) * gc->interval_ratio; + { + mrb_int credit = (mrb_int)((gc->live_after_mark/100) * gc->interval_ratio) + - (mrb_int)gc->live_after_mark; + if (credit < (mrb_int)GC_STEP_SIZE) credit = (mrb_int)GC_STEP_SIZE; + gc->gc_debt = -credit; + } if (is_generational(gc)) { - gc->majorgc_old_threshold = gc->live_after_mark/100 * MAJOR_GC_INC_RATIO; + gc->oldgen_threshold = gc->live_after_mark/100 * MAJOR_GC_INC_RATIO; gc->full = FALSE; } #ifdef MRB_USE_MALLOC_TRIM malloc_trim(0); #endif - GC_TIME_STOP_AND_REPORT; } MRB_API void @@ -1396,6 +1469,7 @@ mrb_field_write_barrier(mrb_state *mrb, struct RBasic *obj, struct RBasic *value { mrb_gc *gc = &mrb->gc; + if (!value) return; if (!is_black(obj)) return; if (!is_white(value)) return; if (is_red(value)) return; @@ -1404,7 +1478,7 @@ mrb_field_write_barrier(mrb_state *mrb, struct RBasic *obj, struct RBasic *value mrb_assert(is_generational(gc) || gc->state != MRB_GC_STATE_ROOT); if (is_generational(gc) || gc->state == MRB_GC_STATE_MARK) { - add_gray_list(mrb, gc, value); + add_gray_list(gc, value); } else { mrb_assert(gc->state == MRB_GC_STATE_SWEEP); @@ -1431,8 +1505,12 @@ mrb_write_barrier(mrb_state *mrb, struct RBasic *obj) mrb_assert(!is_dead(gc, obj)); mrb_assert(is_generational(gc) || gc->state != MRB_GC_STATE_ROOT); paint_gray(obj); - obj->gcnext = gc->atomic_gray_list; - gc->atomic_gray_list = obj; + if (gc->gray_stack_top < MRB_GRAY_STACK_SIZE) { + gc->gray_stack[gc->gray_stack_top++] = obj; + } + else { + gc->gray_overflow = TRUE; + } } /* @@ -1454,7 +1532,7 @@ gc_start(mrb_state *mrb, mrb_value obj) * call-seq: * GC.enable -> true or false * - * Enables garbage collection, returning true if garbage + * Enables garbage collection, returning `true` if garbage * collection was previously disabled. * * GC.disable #=> false @@ -1477,7 +1555,7 @@ gc_enable(mrb_state *mrb, mrb_value obj) * call-seq: * GC.disable -> true or false * - * Disables garbage collection, returning true if garbage + * Disables garbage collection, returning `true` if garbage * collection was already disabled. * * GC.disable #=> false @@ -1561,6 +1639,44 @@ gc_step_ratio_set(mrb_state *mrb, mrb_value obj) return mrb_nil_value(); } +static mrb_value +gc_step_limit_get(mrb_state *mrb, mrb_value obj) +{ + return mrb_int_value(mrb, (mrb_int)mrb->gc.step_limit); +} + +static mrb_value +gc_step_limit_set(mrb_state *mrb, mrb_value obj) +{ + mrb_int limit; + + mrb_get_args(mrb, "i", &limit); + if (limit < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "step_limit must be non-negative"); + } + mrb->gc.step_limit = (size_t)limit; + return mrb_int_value(mrb, limit); +} + +static mrb_value +gc_malloc_threshold_get(mrb_state *mrb, mrb_value obj) +{ + return mrb_int_value(mrb, (mrb_int)mrb->gc.malloc_threshold); +} + +static mrb_value +gc_malloc_threshold_set(mrb_state *mrb, mrb_value obj) +{ + mrb_int threshold; + + mrb_get_args(mrb, "i", &threshold); + if (threshold < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "malloc_threshold must be non-negative"); + } + mrb->gc.malloc_threshold = (size_t)threshold; + return mrb_int_value(mrb, threshold); +} + static void change_gen_gc_mode(mrb_state *mrb, mrb_gc *gc, mrb_bool enable) { @@ -1575,7 +1691,7 @@ change_gen_gc_mode(mrb_state *mrb, mrb_gc *gc, mrb_bool enable) } else if (!is_generational(gc) && enable) { incremental_gc_finish(mrb, gc); - gc->majorgc_old_threshold = gc->live_after_mark/100 * MAJOR_GC_INC_RATIO; + gc->oldgen_threshold = gc->live_after_mark/100 * MAJOR_GC_INC_RATIO; gc->full = FALSE; } gc->generational = enable; @@ -1624,10 +1740,9 @@ gc_each_objects(mrb_state *mrb, mrb_gc *gc, mrb_each_object_callback *callback, page = gc->heaps; while (page != NULL) { RVALUE *p; - int i; - p = objects(page); - for (i=0; i < MRB_HEAP_PAGE_SIZE; i++) { + p = page->objects; + for (int i=0; i < MRB_HEAP_PAGE_SIZE; i++) { if ((*callback)(mrb, &p[i].as.basic, data) == MRB_EACH_OBJ_BREAK) return; } @@ -1638,11 +1753,8 @@ gc_each_objects(mrb_state *mrb, mrb_gc *gc, mrb_each_object_callback *callback, void mrb_objspace_each_objects(mrb_state *mrb, mrb_each_object_callback *callback, void *data) { - mrb_bool iterating = mrb->gc.iterating; - mrb_full_gc(mrb); - mrb->gc.iterating = TRUE; - if (iterating) { + if (mrb->gc.iterating) { gc_each_objects(mrb, &mrb->gc, callback, data); } else { @@ -1651,11 +1763,12 @@ mrb_objspace_each_objects(mrb_state *mrb, mrb_each_object_callback *callback, vo MRB_TRY(&c_jmp) { mrb->jmp = &c_jmp; + mrb->gc.iterating = TRUE; gc_each_objects(mrb, &mrb->gc, callback, data); mrb->jmp = prev_jmp; - mrb->gc.iterating = iterating; - } MRB_CATCH(&c_jmp) { - mrb->gc.iterating = iterating; + mrb->gc.iterating = FALSE; + } MRB_CATCH(&c_jmp) { + mrb->gc.iterating = FALSE; mrb->jmp = prev_jmp; MRB_THROW(prev_jmp); } MRB_END_EXC(&c_jmp); @@ -1669,22 +1782,70 @@ mrb_objspace_page_slot_size(void) } +/* + * call-seq: + * GC.stat -> Hash + * + * Returns a Hash with GC statistics. + * Keys: :live, :debt, :state, :generational, :full, + * :step_limit, :malloc_increase, :malloc_threshold + * With MRB_GC_STATS: :total, :minor, :major + * + */ + +static mrb_value +gc_stat(mrb_state *mrb, mrb_value self) +{ + mrb_gc *gc = &mrb->gc; + mrb_value hash = mrb_hash_new_capa(mrb, 8); + + mrb_hash_set(mrb, hash, mrb_symbol_value(MRB_SYM(live)), mrb_int_value(mrb, (mrb_int)gc->live)); + mrb_hash_set(mrb, hash, mrb_symbol_value(MRB_SYM(debt)), mrb_int_value(mrb, gc->gc_debt)); + mrb_hash_set(mrb, hash, mrb_symbol_value(MRB_SYM(state)), mrb_int_value(mrb, (mrb_int)gc->state)); + mrb_hash_set(mrb, hash, mrb_symbol_value(MRB_SYM(generational)), mrb_bool_value(gc->generational)); + mrb_hash_set(mrb, hash, mrb_symbol_value(MRB_SYM(full)), mrb_bool_value(gc->full)); + mrb_hash_set(mrb, hash, mrb_symbol_value(MRB_SYM(step_limit)), mrb_int_value(mrb, (mrb_int)gc->step_limit)); + mrb_hash_set(mrb, hash, mrb_symbol_value(MRB_SYM(malloc_increase)), mrb_int_value(mrb, (mrb_int)gc->malloc_increase)); + mrb_hash_set(mrb, hash, mrb_symbol_value(MRB_SYM(malloc_threshold)), mrb_int_value(mrb, (mrb_int)gc->malloc_threshold)); + mrb_hash_set(mrb, hash, mrb_symbol_value(MRB_SYM(symbol_count)), mrb_int_value(mrb, (mrb_int)(MRB_PRESYM_MAX + mrb->symidx))); + mrb_hash_set(mrb, hash, mrb_symbol_value(MRB_SYM(dynamic_symbol_count)), mrb_int_value(mrb, (mrb_int)mrb->dynamic_sym_count)); + +#ifdef MRB_GC_STATS + mrb_hash_set(mrb, hash, mrb_symbol_value(MRB_SYM(total)), mrb_int_value(mrb, (mrb_int)gc->gc_total_count)); + mrb_hash_set(mrb, hash, mrb_symbol_value(MRB_SYM(minor)), mrb_int_value(mrb, (mrb_int)gc->minor_gc_count)); + mrb_hash_set(mrb, hash, mrb_symbol_value(MRB_SYM(major)), mrb_int_value(mrb, (mrb_int)gc->major_gc_count)); +#endif + + return hash; +} + void mrb_init_gc(mrb_state *mrb) { struct RClass *gc; +#if defined(MRB_WORD_BOXING) && defined(MRB_32BIT) && defined(MRB_USE_FLOAT32) && !defined(MRB_WORDBOX_NO_INLINE_FLOAT) + /* 6 words: padded to 8-byte alignment for inline float word boxing */ + mrb_static_assert(sizeof(RVALUE) <= sizeof(void*) * 6, + "RVALUE size must be within 6 words"); +#else mrb_static_assert_object_size(RVALUE); +#endif - gc = mrb_define_module(mrb, "GC"); - - mrb_define_class_method(mrb, gc, "start", gc_start, MRB_ARGS_NONE()); - mrb_define_class_method(mrb, gc, "enable", gc_enable, MRB_ARGS_NONE()); - mrb_define_class_method(mrb, gc, "disable", gc_disable, MRB_ARGS_NONE()); - mrb_define_class_method(mrb, gc, "interval_ratio", gc_interval_ratio_get, MRB_ARGS_NONE()); - mrb_define_class_method(mrb, gc, "interval_ratio=", gc_interval_ratio_set, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, gc, "step_ratio", gc_step_ratio_get, MRB_ARGS_NONE()); - mrb_define_class_method(mrb, gc, "step_ratio=", gc_step_ratio_set, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, gc, "generational_mode=", gc_generational_mode_set, MRB_ARGS_REQ(1)); - mrb_define_class_method(mrb, gc, "generational_mode", gc_generational_mode_get, MRB_ARGS_NONE()); + gc = mrb_define_module_id(mrb, MRB_SYM(GC)); + + mrb_define_class_method_id(mrb, gc, MRB_SYM(stat), gc_stat, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, gc, MRB_SYM(start), gc_start, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, gc, MRB_SYM(enable), gc_enable, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, gc, MRB_SYM(disable), gc_disable, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, gc, MRB_SYM(interval_ratio), gc_interval_ratio_get, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, gc, MRB_SYM_E(interval_ratio), gc_interval_ratio_set, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, gc, MRB_SYM(step_ratio), gc_step_ratio_get, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, gc, MRB_SYM_E(step_ratio), gc_step_ratio_set, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, gc, MRB_SYM(step_limit), gc_step_limit_get, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, gc, MRB_SYM_E(step_limit), gc_step_limit_set, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, gc, MRB_SYM(malloc_threshold), gc_malloc_threshold_get, MRB_ARGS_NONE()); + mrb_define_class_method_id(mrb, gc, MRB_SYM_E(malloc_threshold), gc_malloc_threshold_set, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, gc, MRB_SYM_E(generational_mode), gc_generational_mode_set, MRB_ARGS_REQ(1)); + mrb_define_class_method_id(mrb, gc, MRB_SYM(generational_mode), gc_generational_mode_get, MRB_ARGS_NONE()); } diff --git a/src/hash.c b/src/hash.c index aac9ab5707..3d82df6f0b 100644 --- a/src/hash.c +++ b/src/hash.c @@ -11,8 +11,9 @@ #include #include #include +#include #include -#include + /* * === Glossary @@ -55,7 +56,7 @@ */ #define EA_N_RESERVED_INDICES 2 /* empty and deleted */ -#define EA_INCREASE_RATIO 6 / 5 + 6 + #define EA_MAX_INCREASE UINT16_MAX #define EA_MAX_CAPA U32(lesser(IB_MAX_CAPA - EA_N_RESERVED_INDICES, MRB_INT_MAX)) #define IB_MAX_CAPA (U32(1) << IB_MAX_BIT) @@ -91,6 +92,7 @@ typedef struct index_buckets_iter { struct RHash *h; uint32_t bit; uint32_t mask; + uint32_t initial_pos; uint32_t pos; uint32_t ary_index; uint32_t ea_index; @@ -145,78 +147,57 @@ typedef struct index_buckets_iter { } #ifdef MRB_64BIT -DEFINE_ACCESSOR(ar, ea_capa, uint32_t, ea_capa) -DEFINE_ACCESSOR(ar, ea_n_used, uint32_t, ea_n_used) -DEFINE_ACCESSOR(ht, ea_capa, uint32_t, ea_capa) -DEFINE_ACCESSOR(ht, ea_n_used, uint32_t, ea_n_used) +DEFINE_ACCESSOR(ar, ea_capa, uint32_t, ea_capa) /* ar_ea_capa ar_set_ea_capa */ +DEFINE_ACCESSOR(ar, ea_n_used, uint32_t, ea_n_used) /* ar_ea_n_used ar_set_ea_n_used */ +DEFINE_ACCESSOR(ht, ea_capa, uint32_t, ea_capa) /* ht_ea_capa ht_set_ea_capa */ +DEFINE_ACCESSOR(ht, ea_n_used, uint32_t, ea_n_used) /* ht_ea_n_used ht_set_ea_n_used */ #else -DEFINE_FLAG_ACCESSOR(ar, ea_capa, uint32_t, AR_EA_CAPA) -DEFINE_FLAG_ACCESSOR(ar, ea_n_used, uint32_t, AR_EA_N_USED) -DEFINE_ACCESSOR(ht, ea_capa, uint32_t, hsh.ht->ea_capa) -DEFINE_ACCESSOR(ht, ea_n_used, uint32_t, hsh.ht->ea_n_used) +DEFINE_FLAG_ACCESSOR(ar, ea_capa, uint32_t, AR_EA_CAPA) /* ar_ea_capa ar_set_ea_capa */ +DEFINE_FLAG_ACCESSOR(ar, ea_n_used, uint32_t, AR_EA_N_USED) /* ar_ea_n_used ar_set_ea_n_used */ +DEFINE_ACCESSOR(ht, ea_capa, uint32_t, hsh.ht->ea_capa) /* ht_ea_capa ht_set_ea_capa */ +DEFINE_ACCESSOR(ht, ea_n_used, uint32_t, hsh.ht->ea_n_used) /* ht_ea_n_used ht_set_ea_n_used */ #endif -DEFINE_FLAG_ACCESSOR(ib, bit, uint32_t, IB_BIT) -DEFINE_ACCESSOR(ar, size, uint32_t, size) -DEFINE_ACCESSOR(ar, ea, hash_entry*, hsh.ea) -DEFINE_DECREMENTER(ar, size) -DEFINE_ACCESSOR(ht, size, uint32_t, size) -DEFINE_ACCESSOR(ht, ea, hash_entry*, hsh.ht->ea) -DEFINE_GETTER(ht, ib, uint32_t*, hsh.ht->ib) -DEFINE_INCREMENTER(ht, size) -DEFINE_DECREMENTER(ht, size) -DEFINE_GETTER(h, size, uint32_t, size) -DEFINE_ACCESSOR(h, ht, hash_table*, hsh.ht) -DEFINE_SWITCHER(ht, HT) - -#define ea_each_used(ea, n_used, entry_var, code) do { \ - hash_entry *entry_var = ea, *ea_end__ = entry_var + (n_used); \ - for (; entry_var < ea_end__; ++entry_var) { \ - code; \ - } \ -} while (0) - -#define ea_each(ea, size, entry_var, code) do { \ - hash_entry *entry_var = ea; \ - uint32_t size__ = size; \ - for (; 0 < size__; ++entry_var) { \ - if (entry_deleted_p(entry_var)) continue; \ - --size__; \ - code; \ - } \ -} while (0) - -#define ib_cycle_by_key(mrb, h, key, it_var, code) do { \ - index_buckets_iter it_var[1]; \ - ib_it_init(mrb, it_var, h, key); \ - for (;;) { \ - ib_it_next(it_var); \ - code; \ - } \ -} while (0) - -#define ib_find_by_key(mrb, h_, key_, it_var, code) do { \ - mrb_value ib_fbk_key__ = key_; \ - ib_cycle_by_key(mrb, h_, ib_fbk_key__, it_var, { \ - if (ib_it_empty_p(it_var)) break; \ - if (ib_it_deleted_p(it_var)) continue; \ - if (obj_eql(mrb, ib_fbk_key__, ib_it_entry(it_var)->key, it_var->h)) { \ - code; \ - break; \ - } \ - }); \ -} while (0) - -#define h_each(h, entry_var, code) do { \ - struct RHash *h__ = h; \ - hash_entry *h_e_ea__; \ - uint32_t h_e_size__; \ - h_ar_p(h) ? (h_e_ea__ = ar_ea(h__), h_e_size__ = ar_size(h__)) : \ - (h_e_ea__ = ht_ea(h__), h_e_size__ = ht_size(h__)); \ - ea_each(h_e_ea__, h_e_size__, entry_var, code); \ -} while (0) +DEFINE_FLAG_ACCESSOR(ib, bit, uint32_t, IB_BIT) /* ib_bit ib_set_bit */ +DEFINE_ACCESSOR(ar, size, uint32_t, size) /* ar_size ar_set_size */ +DEFINE_ACCESSOR(ar, ea, hash_entry*, hsh.ea) /* ar_ea ar_set_ea */ +DEFINE_DECREMENTER(ar, size) /* ar_dec_size */ +DEFINE_ACCESSOR(ht, size, uint32_t, size) /* ht_size ht_set_size */ +DEFINE_ACCESSOR(ht, ea, hash_entry*, hsh.ht->ea) /* ht_ea ht_set_ea */ +DEFINE_GETTER(ht, ib, uint32_t*, hsh.ht->ib) /* ht_ib */ +DEFINE_INCREMENTER(ht, size) /* ht_inc_size */ +DEFINE_DECREMENTER(ht, size) /* ht_dec_size */ +DEFINE_GETTER(h, size, uint32_t, size) /* h_size */ +DEFINE_ACCESSOR(h, ht, hash_table*, hsh.ht) /* h_ht h_set_ht */ +DEFINE_SWITCHER(ht, HT) /* h_ht_on h_ht_off h_ht_p */ + +#define EA_EACH_USED(ea, n_used, entry_var) \ + for (hash_entry *entry_var = (ea), *ea_end__ = (entry_var) + (n_used); \ + entry_var < ea_end__; \ + entry_var++) + +#define EA_EACH(ea, size, entry_var) \ + for (uint32_t ea_size__ = (size); ea_size__; ea_size__ = 0) \ + for (hash_entry *entry_var = (ea); \ + ea_size__ && (entry_var = entry_skip_deleted(entry_var), TRUE); \ + entry_var++, ea_size__--) + +#define IB_CYCLE_BY_KEY(mrb, h, key, it_var) \ + for (index_buckets_iter it_var[1] = { ib_it_init(mrb, h, key) }; \ + (ib_it_next(it_var), TRUE); \ + /* do nothing */) + +#define IB_FIND_BY_KEY(mrb, h, key, it_var) \ + for (index_buckets_iter it_var[1] = { ib_it_init(mrb, h, key) }; \ + ib_it_find_by_key(mrb, it_var, key); \ + it_var[0].h = NULL) + +#define H_EACH(h, entry_var) \ + EA_EACH((h_ar_p(h) ? ar_ea(h) : ht_ea(h)), \ + (h_ar_p(h) ? ar_size(h) : ht_size(h)), \ + entry_var) /* - * In `h_check_modified()`, in the case of `MRB_NO_BOXING`, `ht_ea()` or + * In `H_CHECK_MODIFIED()`, in the case of `MRB_NO_BOXING`, `ht_ea()` or * `ht_ea_capa()` for AR may read uninitialized area (#5332). Therefore, do * not use those macros for AR in `MRB_NO_BOXING` (but in the case of * `MRB_64BIT`, `ht_ea_capa()` is the same as `ar_ea_capa()`, so use it). @@ -232,10 +213,10 @@ DEFINE_SWITCHER(ht, HT) # define H_CHECK_MODIFIED_USE_HT_EA_FOR_AR TRUE # define H_CHECK_MODIFIED_USE_HT_EA_CAPA_FOR_AR TRUE /* - * `h_check_modified` raises an exception when a dangerous modification is + * `H_CHECK_MODIFIED` raises an exception when a dangerous modification is * made to `h` by executing `code`. * - * `h_check_modified` macro is not called if `h->hsh.ht` (`h->hsh.ea`) is `NULL` + * `H_CHECK_MODIFIED` macro is not called if `h->hsh.ht` (`h->hsh.ea`) is `NULL` * (`Hash` size is zero). And because the `hash_entry` is rather large, * `h->hsh.ht->ea` and `h->hsh.ht->ea_capa` are able to be safely accessed even for * AR. This nature is used to eliminate branch of AR or HT. @@ -255,32 +236,13 @@ HT_ASSERT_SAFE_READ(ea_capa); #endif /* MRB_NO_BOXING */ /* - * `h_check_modified` raises an exception when a dangerous modification is - * made to `h` by executing `code`. + * `H_CHECK_MODIFIED` raises an exception when a dangerous modification is + * made to `h` by executing code block. */ -#define h_check_modified(mrb, h, code) do { \ - struct RHash *h__ = h; \ - uint32_t mask__ = MRB_HASH_HT|MRB_HASH_IB_BIT_MASK|MRB_HASH_AR_EA_CAPA_MASK; \ - uint32_t flags__ = h__->flags & mask__; \ - void* tbl__ = (mrb_assert(h__->hsh.ht), h__->hsh.ht); \ - uint32_t ht_ea_capa__ = 0; \ - hash_entry *ht_ea__ = NULL; \ - if (H_CHECK_MODIFIED_USE_HT_EA_CAPA_FOR_AR || h_ht_p(h__)) { \ - ht_ea_capa__ = ht_ea_capa(h__); \ - } \ - if (H_CHECK_MODIFIED_USE_HT_EA_FOR_AR || h_ht_p(h__)) { \ - ht_ea__ = ht_ea(h__); \ - } \ - code; \ - if (flags__ != (h__->flags & mask__) || \ - tbl__ != h__->hsh.ht || \ - ((H_CHECK_MODIFIED_USE_HT_EA_CAPA_FOR_AR || h_ht_p(h__)) && \ - ht_ea_capa__ != ht_ea_capa(h__)) || \ - ((H_CHECK_MODIFIED_USE_HT_EA_FOR_AR || h_ht_p(h__)) && \ - ht_ea__ != ht_ea(h__))) { \ - mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); \ - } \ -} while (0) +#define H_CHECK_MODIFIED(mrb, h) \ + for (struct h_check_modified h_checker__ = h_check_modified_init(mrb, h); \ + h_checker__.tbl; \ + h_check_modified_validate(mrb, &h_checker__, h), h_checker__.tbl = NULL) #define U32(v) ((uint32_t)(v)) #define h_ar_p(h) (!h_ht_p(h)) @@ -291,12 +253,11 @@ HT_ASSERT_SAFE_READ(ea_capa); static uint32_t ib_upper_bound_for(uint32_t capa); static uint32_t ib_bit_to_capa(uint32_t bit); +static hash_entry *ib_it_entry(index_buckets_iter *it); static void ht_init( mrb_state *mrb, struct RHash *h, uint32_t size, hash_entry *ea, uint32_t ea_capa, hash_table *ht, uint32_t ib_bit); static void ht_set(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val); -static void ht_set_without_ib_adjustment( - mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val); static uint32_t next_power2(uint32_t v) @@ -310,13 +271,58 @@ next_power2(uint32_t v) v |= v >> 4; v |= v >> 8; v |= v >> 16; - ++v; + v++; return v; #endif } +struct h_check_modified { + uint32_t flags; + void *tbl; + uint32_t ht_ea_capa; + hash_entry *ht_ea; +}; + +#define H_CHECK_MODIFIED_FLAGS_MASK (MRB_HASH_HT | MRB_HASH_IB_BIT_MASK | MRB_HASH_AR_EA_CAPA_MASK) + +static struct h_check_modified +h_check_modified_init(mrb_state *mrb, struct RHash *h) +{ + mrb_assert(h->hsh.ht); + + struct h_check_modified checker; + checker.flags = h->flags & H_CHECK_MODIFIED_FLAGS_MASK; + checker.tbl = h->hsh.ht; + checker.ht_ea_capa = (H_CHECK_MODIFIED_USE_HT_EA_CAPA_FOR_AR || h_ht_p(h)) ? ht_ea_capa(h) : 0; + checker.ht_ea = (H_CHECK_MODIFIED_USE_HT_EA_FOR_AR || h_ht_p(h)) ? ht_ea(h) : NULL; + return checker; +} + +static void +h_check_modified_validate(mrb_state *mrb, struct h_check_modified *checker, struct RHash *h) +{ + if (checker->flags != (h->flags & H_CHECK_MODIFIED_FLAGS_MASK) || + checker->tbl != h->hsh.ht || + ((H_CHECK_MODIFIED_USE_HT_EA_CAPA_FOR_AR || h_ht_p(h)) && + checker->ht_ea_capa != ht_ea_capa(h)) || + ((H_CHECK_MODIFIED_USE_HT_EA_FOR_AR || h_ht_p(h)) && + checker->ht_ea != ht_ea(h))) { + mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); + } +} + +#ifndef MRB_NO_FLOAT static uint32_t -obj_hash_code(mrb_state *mrb, mrb_value key, struct RHash *h) +float_hash_code(mrb_float f) +{ + /* normalize -0.0 to 0.0 */ + if (f == 0.0) f = 0.0; + return mrb_byte_hash((const uint8_t*)&f, sizeof(f)); +} +#endif + +uint32_t +mrb_obj_hash_code(mrb_state *mrb, mrb_value key) { enum mrb_vtype tt = mrb_type(key); uint32_t hash_code; @@ -333,32 +339,50 @@ obj_hash_code(mrb_state *mrb, mrb_value key, struct RHash *h) case MRB_TT_INTEGER: if (mrb_fixnum_p(key)) { hash_code = U32(mrb_fixnum(key)); - break; } + else { +#ifdef MRB_USE_BIGINT + hash_code = U32(mrb_integer(mrb_bint_hash(mrb, key))); +#else + /* This path should not be reached if bignum is not configured. + * Hashing object_id is a fallback to avoid uninitialized value. */ + hash_code = U32(mrb_obj_id(key)); +#endif + } + break; #ifndef MRB_NO_FLOAT - /* fall through */ case MRB_TT_FLOAT: -#endif - hash_code = U32(mrb_obj_id(key)); + hash_code = float_hash_code(mrb_float(key)); break; +#endif default: - h_check_modified(mrb, h, { - hash_code_obj = mrb_funcall_argv(mrb, key, MRB_SYM(hash), 0, NULL); - }); - + hash_code_obj = mrb_funcall_argv(mrb, key, MRB_SYM(hash), 0, NULL); hash_code = U32(tt) ^ U32(mrb_integer(hash_code_obj)); break; } - return hash_code ^ (hash_code << 2) ^ (hash_code >> 2); + hash_code ^= hash_code >> 16; + hash_code *= 0x45d9f3b; + hash_code ^= hash_code >> 16; + return hash_code; +} + +static uint32_t +obj_hash_code(mrb_state *mrb, mrb_value key, struct RHash *h) +{ + uint32_t hash_code = 0; + + H_CHECK_MODIFIED(mrb, h) { + hash_code = mrb_obj_hash_code(mrb, key); + } + return hash_code; } static mrb_bool obj_eql(mrb_state *mrb, mrb_value a, mrb_value b, struct RHash *h) { - enum mrb_vtype tt = mrb_type(a); - mrb_bool eql; + mrb_bool eql = FALSE; - switch (tt) { + switch (mrb_type(a)) { case MRB_TT_STRING: return mrb_str_equal(mrb, a, b); @@ -377,12 +401,12 @@ obj_eql(mrb_state *mrb, mrb_value a, mrb_value b, struct RHash *h) #endif default: - h_check_modified(mrb, h, {eql = mrb_eql(mrb, a, b);}); + H_CHECK_MODIFIED(mrb, h) {eql = mrb_eql(mrb, a, b);} return eql; } } -static mrb_bool +static inline mrb_bool entry_deleted_p(const hash_entry* entry) { return mrb_undef_p(entry->key); @@ -394,6 +418,14 @@ entry_delete(hash_entry* entry) entry->key = mrb_undef_value(); } +static hash_entry* +entry_skip_deleted(hash_entry *e) +{ + for (; entry_deleted_p(e); e++) + ; + return e; +} + static uint32_t ea_next_capa_for(uint32_t size, uint32_t max_capa) { @@ -407,7 +439,8 @@ ea_next_capa_for(uint32_t size, uint32_t max_capa) * `EA_INCREASE_RATIO` is the current value, 32-bit range will not be * exceeded during the calculation of `capa`, so `size_t` is used. */ - size_t capa = (size_t)size * EA_INCREASE_RATIO, inc = capa - size; + size_t capa = ((size_t)size * 6) / 5 + 6; + size_t inc = capa - size; if (EA_MAX_INCREASE < inc) capa = size + EA_MAX_INCREASE; return capa <= max_capa ? U32(capa) : max_capa; } @@ -423,11 +456,11 @@ static void ea_compress(hash_entry *ea, uint32_t n_used) { hash_entry *w_entry = ea; - ea_each_used(ea, n_used, r_entry, { + EA_EACH_USED(ea, n_used, r_entry) { if (entry_deleted_p(r_entry)) continue; if (r_entry != w_entry) *w_entry = *r_entry; - ++w_entry; - }); + w_entry++; + } } /* @@ -454,9 +487,9 @@ static hash_entry* ea_get_by_key(mrb_state *mrb, hash_entry *ea, uint32_t size, mrb_value key, struct RHash *h) { - ea_each(ea, size, entry, { + EA_EACH(ea, size, entry) { if (obj_eql(mrb, key, entry->key, h)) return entry; - }); + } return NULL; } @@ -511,11 +544,11 @@ ar_compress(mrb_state *mrb, struct RHash *h) static mrb_bool ar_get(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) { - ea_each(ar_ea(h), ar_size(h), entry, { + EA_EACH(ar_ea(h), ar_size(h), entry) { if (!obj_eql(mrb, key, entry->key, h)) continue; *valp = entry->val; return TRUE; - }); + } return FALSE; } @@ -566,13 +599,13 @@ static void ar_shift(mrb_state *mrb, struct RHash *h, mrb_value *keyp, mrb_value *valp) { uint32_t size = ar_size(h); - ea_each(ar_ea(h), size, entry, { + EA_EACH(ar_ea(h), size, entry) { *keyp = entry->key; *valp = entry->val; entry_delete(entry); ar_set_size(h, --size); return; - }); + } } static void @@ -581,7 +614,7 @@ ar_rehash(mrb_state *mrb, struct RHash *h) /* see comments in `h_rehash` */ uint32_t size = ar_size(h), w_size = 0, ea_capa = ar_ea_capa(h); hash_entry *ea = ar_ea(h), *w_entry; - ea_each(ea, size, r_entry, { + EA_EACH(ea, size, r_entry) { if ((w_entry = ea_get_by_key(mrb, ea, w_size, r_entry->key, h))) { w_entry->val = r_entry->val; ar_set_size(h, --size); @@ -592,9 +625,9 @@ ar_rehash(mrb_state *mrb, struct RHash *h) ea_set(ea, w_size, r_entry->key, r_entry->val); entry_delete(r_entry); } - ++w_size; + w_size++; } - }); + } mrb_assert(size == w_size); ar_set_ea_n_used(h, size); ar_adjust_ea(mrb, h, size, ea_capa); @@ -636,14 +669,17 @@ ib_it_active_p(const index_buckets_iter *it) return it->ea_index < ib_it_deleted_value(it); } -static void -ib_it_init(mrb_state *mrb, index_buckets_iter *it, struct RHash *h, mrb_value key) +static index_buckets_iter +ib_it_init(mrb_state *mrb, struct RHash *h, mrb_value key) { - it->h = h; - it->bit = ib_bit(h); - it->mask = ib_bit_to_capa(it->bit) - 1; - it->pos = ib_it_pos_for(it, obj_hash_code(mrb, key, h)); - it->step = 0; + index_buckets_iter it; + it.h = h; + it.bit = ib_bit(h); + it.mask = ib_bit_to_capa(it.bit) - 1; + it.initial_pos = ib_it_pos_for(&it, obj_hash_code(mrb, key, h)); + it.pos = it.initial_pos; + it.step = 0; + return it; } static void @@ -682,7 +718,23 @@ ib_it_next(index_buckets_iter *it) else { it->shift1 = 0; } - it->pos = ib_it_pos_for(it, it->pos + (++it->step)); + it->step++; + it->pos = ib_it_pos_for(it, it->initial_pos + (it->step * it->step + it->step) / 2); +} + +static mrb_bool +ib_it_find_by_key(mrb_state *mrb, index_buckets_iter *it, mrb_value key) +{ + if (!it->h) return FALSE; + + for (;;) { + ib_it_next(it); + if (ib_it_empty_p(it)) return FALSE; + if (!ib_it_deleted_p(it) && + obj_eql(mrb, key, ib_it_entry(it)->key, it->h)) { + return TRUE; + } + } } static uint32_t @@ -769,13 +821,13 @@ ib_init(mrb_state *mrb, struct RHash *h, uint32_t ib_bit, size_t ib_byte_size) hash_entry *ea = ht_ea(h); memset(ht_ib(h), 0xff, ib_byte_size); ib_set_bit(h, ib_bit); - ea_each_used(ea, ht_ea_n_used(h), entry, { - ib_cycle_by_key(mrb, h, entry->key, it, { + EA_EACH_USED(ea, ht_ea_n_used(h), entry) { + IB_CYCLE_BY_KEY(mrb, h, entry->key, it) { if (!ib_it_empty_p(it)) continue; ib_it_set(it, U32(entry - ea)); break; - }); - }); + } + } } static void @@ -833,10 +885,10 @@ ht_to_ar(mrb_state *mrb, struct RHash *h) static mrb_bool ht_get(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) { - ib_find_by_key(mrb, h, key, it, { + IB_FIND_BY_KEY(mrb, h, key, it) { *valp = ib_it_entry(it)->val; return TRUE; - }); + } return FALSE; } @@ -848,15 +900,39 @@ ht_set_as_ar(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val) } static void -ht_set_without_ib_adjustment(mrb_state *mrb, struct RHash *h, - mrb_value key, mrb_value val) +ht_set(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val) { + uint32_t size = ht_size(h); + uint32_t ib_bit_width = ib_bit(h), ib_capa = ib_bit_to_capa(ib_bit_width); + if (ib_upper_bound_for(ib_capa) <= size) { + if (size != ht_ea_n_used(h)) ea_compress(ht_ea(h), ht_ea_n_used(h)); + ht_init(mrb, h, size, ht_ea(h), ht_ea_capa(h), h_ht(h), ++ib_bit_width); + } + else if (size != ht_ea_n_used(h)) { + if (ib_capa - EA_N_RESERVED_INDICES <= ht_ea_n_used(h)) goto compress; + if (ht_ea_capa(h) == ht_ea_n_used(h)) { + if (size <= AR_MAX_SIZE) { + ht_set_as_ar(mrb, h, key, val); + return; + } + if (ea_next_capa_for(size, EA_MAX_CAPA) <= ht_ea_capa(h)) { + compress: + ea_compress(ht_ea(h), ht_ea_n_used(h)); + ht_adjust_ea(mrb, h, size, ht_ea_capa(h)); + ht_init(mrb, h, size, ht_ea(h), ht_ea_capa(h), h_ht(h), ib_bit_width); + } + } + } + mrb_assert(ht_size(h) < ib_bit_to_capa(ib_bit(h))); - ib_cycle_by_key(mrb, h, key, it, { + IB_CYCLE_BY_KEY(mrb, h, key, it) { if (ib_it_active_p(it)) { if (!obj_eql(mrb, key, ib_it_entry(it)->key, h)) continue; ib_it_entry(it)->val = val; } + else if (ib_it_deleted_p(it)) { + continue; + } else { uint32_t ea_n_used = ht_ea_n_used(h); if (ea_n_used == H_MAX_SIZE) { @@ -870,44 +946,20 @@ ht_set_without_ib_adjustment(mrb_state *mrb, struct RHash *h, ht_set_ea_n_used(h, ++ea_n_used); } return; - }); -} - -static void -ht_set(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val) -{ - uint32_t size = ht_size(h); - uint32_t ib_bit_width = ib_bit(h), ib_capa = ib_bit_to_capa(ib_bit_width); - if (ib_upper_bound_for(ib_capa) <= size) { - if (size != ht_ea_n_used(h)) ea_compress(ht_ea(h), ht_ea_n_used(h)); - ht_init(mrb, h, size, ht_ea(h), ht_ea_capa(h), h_ht(h), ++ib_bit_width); - } - else if (size != ht_ea_n_used(h)) { - if (ib_capa - EA_N_RESERVED_INDICES <= ht_ea_n_used(h)) goto compress; - if (ht_ea_capa(h) == ht_ea_n_used(h)) { - if (size <= AR_MAX_SIZE) {ht_set_as_ar(mrb, h, key, val); return;} - if (ea_next_capa_for(size, EA_MAX_CAPA) <= ht_ea_capa(h)) { - compress: - ea_compress(ht_ea(h), ht_ea_n_used(h)); - ht_adjust_ea(mrb, h, size, ht_ea_capa(h)); - ht_init(mrb, h, size, ht_ea(h), ht_ea_capa(h), h_ht(h), ib_bit_width); - } - } } - ht_set_without_ib_adjustment(mrb, h, key, val); } static mrb_bool ht_delete(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) { - ib_find_by_key(mrb, h, key, it, { + IB_FIND_BY_KEY(mrb, h, key, it) { hash_entry *entry = ib_it_entry(it); *valp = entry->val; ib_it_delete(it); entry_delete(entry); ht_dec_size(h); return TRUE; - }); + } return FALSE; } @@ -915,8 +967,8 @@ static void ht_shift(mrb_state *mrb, struct RHash *h, mrb_value *keyp, mrb_value *valp) { hash_entry *ea = ht_ea(h); - ea_each(ea, ht_size(h), entry, { - ib_cycle_by_key(mrb, h, entry->key, it, { + EA_EACH(ea, ht_size(h), entry) { + IB_CYCLE_BY_KEY(mrb, h, entry->key, it) { if (ib_it_get(it) != U32(entry - ea)) continue; *keyp = entry->key; *valp = entry->val; @@ -924,8 +976,8 @@ ht_shift(mrb_state *mrb, struct RHash *h, mrb_value *keyp, mrb_value *valp) entry_delete(entry); ht_dec_size(h); return; - }); - }); + } + } } static void @@ -943,8 +995,8 @@ ht_rehash(mrb_state *mrb, struct RHash *h) ht_init(mrb, h, 0, ea, ea_capa, h_ht(h), ib_bit_for(size)); ht_set_size(h, size); ht_set_ea_n_used(h, ht_ea_n_used(h)); - ea_each(ea, size, r_entry, { - ib_cycle_by_key(mrb, h, r_entry->key, it, { + EA_EACH(ea, size, r_entry) { + IB_CYCLE_BY_KEY(mrb, h, r_entry->key, it) { if (ib_it_active_p(it)) { if (!obj_eql(mrb, r_entry->key, ib_it_entry(it)->key, h)) continue; ib_it_entry(it)->val = r_entry->val; @@ -959,8 +1011,8 @@ ht_rehash(mrb_state *mrb, struct RHash *h) ib_it_set(it, w_size++); } break; - }); - }); + } + } mrb_assert(size == w_size); ht_set_ea_n_used(h, size); size <= AR_MAX_SIZE ? ht_to_ar(mrb, h) : ht_adjust_ea(mrb, h, size, ea_capa); @@ -969,9 +1021,9 @@ ht_rehash(mrb_state *mrb, struct RHash *h) static mrb_value h_key_for(mrb_state *mrb, mrb_value key) { - if (mrb_string_p(key) && !MRB_FROZEN_P(mrb_str_ptr(key))) { + if (mrb_string_p(key) && !mrb_frozen_p(mrb_str_ptr(key))) { key = mrb_str_dup(mrb, key); - MRB_SET_FROZEN_FLAG(mrb_str_ptr(key)); + mrb_str_ptr(key)->frozen = 1; } return key; } @@ -1072,18 +1124,13 @@ h_replace(mrb_state *mrb, struct RHash *h, struct RHash *orig_h) } } -void +size_t mrb_gc_mark_hash(mrb_state *mrb, struct RHash *h) { - h_each(h, entry, { + H_EACH(h, entry) { mrb_gc_mark_value(mrb, entry->key); mrb_gc_mark_value(mrb, entry->val); - }); -} - -size_t -mrb_gc_mark_hash_size(mrb_state *mrb, struct RHash *h) -{ + } return h_size(h) * 2; } @@ -1104,15 +1151,63 @@ mrb_hash_memsize(mrb_value self) ib_byte_size_for(ib_bit(h)))); } -/* Iterates over the key/value pairs. */ +/** + * Iterates over each key-value pair in the given hash. + * + * This function calls the provided callback function `func` for each entry + * in the hash `h`. The iteration order is the insertion order. + * + * The callback function `func` has the signature: + * `int callback(mrb_state *mrb, mrb_value key, mrb_value value, void *data)` + * - `mrb`: The mruby state. + * - `key`: The key of the current hash entry. + * - `value`: The value of the current hash entry. + * - `data`: The user-supplied data pointer passed to `mrb_hash_foreach`. + * + * If the callback function returns a non-zero value, the iteration stops. + * + * Important: Modifying the hash `h` within the callback function can lead + * to undefined behavior if not handled carefully (e.g., using `H_CHECK_MODIFIED` + * as done internally for Ruby methods, though direct C API users must be cautious). + * The `H_CHECK_MODIFIED` macro within this function is for internal safety + * when this function is used to implement Ruby methods that might call + * arbitrary Ruby code during iteration. + * + * @param mrb The mruby state. + * @param h A pointer to the RHash structure to iterate over. + * @param func The callback function to be called for each key-value pair. + * @param data A void pointer that will be passed to the callback function. + */ MRB_API void mrb_hash_foreach(mrb_state *mrb, struct RHash *h, mrb_hash_foreach_func *func, void *data) { - h_each(h, entry, { - if (func(mrb, entry->key, entry->val, data) != 0) return; - }); + H_EACH(h, entry) { + int n; + H_CHECK_MODIFIED(mrb, h) { + n = func(mrb, entry->key, entry->val, data); + } + if (n != 0) return; + } } +mrb_value +mrb_hash_first_key(mrb_state *mrb, mrb_value h) +{ + H_EACH(mrb_hash_ptr(h), entry) { + return entry->key; + } + return mrb_nil_value(); +} + +/** + * Creates a new, empty hash. + * + * This function allocates and initializes a new hash object. + * The returned hash is empty and ready to have elements added to it. + * + * @param mrb The mruby state. + * @return An mrb_value representing the new empty hash. + */ MRB_API mrb_value mrb_hash_new(mrb_state *mrb) { @@ -1120,9 +1215,21 @@ mrb_hash_new(mrb_state *mrb) return mrb_obj_value(h); } -/* - * Set the capacity of EA and IB to minimum capacity (and appropriate load - * factor) that does not cause expansion when inserting `capa` elements. +/** + * Creates a new, empty hash with a specified initial capacity. + * + * This function allocates and initializes a new hash object, pre-allocating + * internal structures to hold at least `capa` elements. This can be an + * optimization if the number of elements to be stored is known in advance, + * as it can prevent reallocations. + * + * If `capa` is 0, it behaves like `mrb_hash_new()`. + * An error will be raised if `capa` is negative or excessively large. + * + * @param mrb The mruby state. + * @param capa The initial capacity (number of elements) the hash should be + * able to hold without needing to resize. + * @return An mrb_value representing the new empty hash with preallocated capacity. */ MRB_API mrb_value mrb_hash_new_capa(mrb_state *mrb, mrb_int capa) @@ -1161,7 +1268,7 @@ hash_default(mrb_state *mrb, mrb_value hash, mrb_value key) { if (MRB_RHASH_DEFAULT_P(hash)) { if (MRB_RHASH_PROCDEFAULT_P(hash)) { - return mrb_funcall_id(mrb, RHASH_PROCDEFAULT(hash), MRB_SYM(call), 2, hash, key); + return mrb_funcall_argv2(mrb, RHASH_PROCDEFAULT(hash), MRB_SYM(call), hash, key); } else { return RHASH_IFNONE(hash); @@ -1198,6 +1305,18 @@ mrb_hash_init_copy(mrb_state *mrb, mrb_value self) return self; } +/** + * Creates a new hash that is a duplicate of the given hash. + * + * This function creates a shallow copy of the original hash `self`. + * The keys and values themselves are not duplicated, but the internal + * structure of the hash (entry array, hash table, default values/procs) + * is copied. + * + * @param mrb The mruby state. + * @param self The hash object (mrb_value) to duplicate. + * @return An mrb_value representing the new duplicated hash. + */ MRB_API mrb_value mrb_hash_dup(mrb_state *mrb, mrb_value self) { @@ -1208,6 +1327,24 @@ mrb_hash_dup(mrb_state *mrb, mrb_value self) return copy; } +/** + * Retrieves the value associated with a given key from the hash. + * + * If the key is found in the hash, its corresponding value is returned. + * If the key is not found, this function considers the hash's default settings: + * - If a default proc is set for the hash, it is called with the hash and key, + * and its result is returned. + * - If a default value is set, that value is returned. + * - Otherwise (no key found and no default settings), nil is returned. + * This function may also invoke a user-defined `default` method on the hash + * if it has been overridden and no basic default proc/value handles the lookup. + * + * @param mrb The mruby state. + * @param hash The hash object (mrb_value) to search. + * @param key The key (mrb_value) to look up. + * @return The associated mrb_value, or the result of the default proc, + * or the default value, or nil if not found and no defaults apply. + */ MRB_API mrb_value mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) { @@ -1226,6 +1363,21 @@ mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) return mrb_funcall_argv(mrb, hash, mid, 1, &key); } +/** + * Retrieves the value associated with a given key from the hash, + * returning a C-provided default value if the key is not found. + * + * If the `key` is found in the `hash`, its corresponding value is returned. + * If the `key` is not found, the `def` mrb_value provided to this function + * is returned. This function does *not* use the hash's own default proc + * or default value. + * + * @param mrb The mruby state. + * @param hash The hash object (mrb_value) to search. + * @param key The key (mrb_value) to look up. + * @param def The default mrb_value to return if the key is not found. + * @return The associated mrb_value if the key is found, otherwise `def`. + */ MRB_API mrb_value mrb_hash_fetch(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value def) { @@ -1238,6 +1390,22 @@ mrb_hash_fetch(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value def) return def; } +/** + * Sets or updates a key-value pair in the hash. + * + * Associates `val` with `key` in the `hash`. If `key` already exists, + * its value is updated. If `key` does not exist, a new entry is created. + * The hash is modified in place. + * + * If the `key` is a `MRB_TT_STRING` and not frozen, it will be duplicated + * and the duplicate will be frozen before use. + * Write barriers are triggered for garbage collection purposes for the key and value. + * + * @param mrb The mruby state. + * @param hash The hash object (mrb_value) to modify. + * @param key The key (mrb_value) for the entry. + * @param val The value (mrb_value) to associate with the key. + */ MRB_API void mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) { @@ -1248,6 +1416,22 @@ mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) mrb_field_write_barrier_value(mrb, mrb_basic_ptr(hash), val); } +static void +hash_set_default_proc(mrb_state *mrb, mrb_value hash, mrb_value proc) +{ + struct RProc *p = mrb_proc_ptr(proc); + if (MRB_PROC_STRICT_P(p)) { + mrb_int n = mrb_proc_arity(p); + if (n != 2 && (n >= 0 || n < -3)) { + if (n < 0) n = -n-1; + mrb_raisef(mrb, E_TYPE_ERROR, "default_proc takes two arguments (2 for %d)", n); + } + } + mrb_iv_set(mrb, hash, MRB_SYM(ifnone), proc); + RHASH(hash)->flags |= MRB_HASH_PROC_DEFAULT; + RHASH(hash)->flags |= MRB_HASH_DEFAULT; +} + /* 15.2.13.4.16 */ /* * call-seq: @@ -1257,10 +1441,10 @@ mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) * * Returns a new, empty hash. If this hash is subsequently accessed by * a key that doesn't correspond to a hash entry, the value returned - * depends on the style of new used to create the hash. In - * the first form, the access returns nil. If - * obj is specified, this single object will be used for - * all default values. If a block is specified, it will be + * depends on the style of `new` used to create the hash. In + * the first form, the access returns `nil`. If + * `obj` is specified, this single object will be used for + * all default values. If a block is specified, it will be * called with the hash object and the key, and should return the * default value. It is the block's responsibility to store the value * in the hash if required. @@ -1297,10 +1481,10 @@ mrb_hash_init(mrb_state *mrb, mrb_value hash) if (ifnone_p) { mrb_argnum_error(mrb, 1, 0, 0); } - RHASH(hash)->flags |= MRB_HASH_PROC_DEFAULT; - ifnone = block; + hash_set_default_proc(mrb, hash, block); + return hash; } - if (!mrb_nil_p(ifnone)) { + if (ifnone_p && !mrb_nil_p(ifnone)) { RHASH(hash)->flags |= MRB_HASH_DEFAULT; mrb_iv_set(mrb, hash, MRB_SYM(ifnone), ifnone); } @@ -1312,9 +1496,9 @@ mrb_hash_init(mrb_state *mrb, mrb_value hash) * call-seq: * hsh[key] -> value * - * Element Reference---Retrieves the value object corresponding - * to the key object. If not found, returns the default value (see - * Hash::new for details). + * Element Reference---Retrieves the `value` object corresponding + * to the `key` object. If not found, returns the default value (see + * `Hash::new` for details). * * h = { "a" => 100, "b" => 200 } * h["a"] #=> 100 @@ -1335,8 +1519,8 @@ mrb_hash_aget(mrb_state *mrb, mrb_value self) * hsh.default(key=nil) -> obj * * Returns the default value, the value that would be returned by - * hsh[key] if key did not exist in hsh. - * See also Hash::new and Hash#default=. + * `hsh`[`key`] if `key` did not exist in `hsh`. + * See also `Hash::new` and `Hash#default=`. * * h = Hash.new #=> {} * h.default #=> nil @@ -1361,7 +1545,7 @@ mrb_hash_default(mrb_state *mrb, mrb_value hash) if (MRB_RHASH_DEFAULT_P(hash)) { if (MRB_RHASH_PROCDEFAULT_P(hash)) { if (!given) return mrb_nil_value(); - return mrb_funcall_id(mrb, RHASH_PROCDEFAULT(hash), MRB_SYM(call), 2, hash, key); + return mrb_funcall_argv2(mrb, RHASH_PROCDEFAULT(hash), MRB_SYM(call), hash, key); } else { return RHASH_IFNONE(hash); @@ -1377,7 +1561,7 @@ mrb_hash_default(mrb_state *mrb, mrb_value hash) * * Sets the default value, the value returned for a key that does not * exist in the hash. It is not possible to set the default to a - * Proc that will be executed on each key lookup. + * `Proc` that will be executed on each key lookup. * * h = { "a" => 100, "b" => 200 } * h.default = "Go fish" @@ -1413,8 +1597,8 @@ mrb_hash_set_default(mrb_state *mrb, mrb_value hash) * call-seq: * hsh.default_proc -> anObject * - * If Hash::new was invoked with a block, return that - * block, otherwise return nil. + * If `Hash::new` was invoked with a block, return that + * block, otherwise return `nil`. * * h = Hash.new {|h,k| h[k] = k*k } #=> {} * p = h.default_proc #=> # @@ -1451,19 +1635,32 @@ mrb_hash_set_default_proc(mrb_state *mrb, mrb_value hash) mrb_value ifnone = mrb_get_arg1(mrb); hash_modify(mrb, hash); + mrb_bool has_ifnone = !mrb_nil_p(ifnone); + if (has_ifnone) { + mrb_check_type(mrb, ifnone, MRB_TT_PROC); + } mrb_iv_set(mrb, hash, MRB_SYM(ifnone), ifnone); - if (!mrb_nil_p(ifnone)) { - RHASH(hash)->flags |= MRB_HASH_PROC_DEFAULT; - RHASH(hash)->flags |= MRB_HASH_DEFAULT; + if (has_ifnone) { + hash_set_default_proc(mrb, hash, ifnone); } else { RHASH(hash)->flags &= ~MRB_HASH_DEFAULT; RHASH(hash)->flags &= ~MRB_HASH_PROC_DEFAULT; } - return ifnone; } +/** + * Deletes a key-value pair from the hash. + * + * Removes the entry associated with `key` from the `hash`. + * The hash is modified in place. + * + * @param mrb The mruby state. + * @param hash The hash object (mrb_value) to modify. + * @param key The key (mrb_value) of the entry to delete. + * @return The value associated with the deleted key if found, otherwise nil. + */ MRB_API mrb_value mrb_hash_delete_key(mrb_state *mrb, mrb_value hash, mrb_value key) { @@ -1491,8 +1688,8 @@ mrb_hash_delete(mrb_state *mrb, mrb_value self) * call-seq: * hsh.shift -> anArray or obj * - * Removes a key-value pair from hsh and returns it as the - * two-item array [ key, value ], or + * Removes a key-value pair from `hsh` and returns it as the + * two-item array [ `key`, `value` ], or * the hash's default value if the hash is empty. * * h = { 1 => "a", 2 => "b", 3 => "c" } @@ -1546,9 +1743,9 @@ mrb_hash_clear(mrb_state *mrb, mrb_value hash) * hsh.store(key, value) -> value * * Element Assignment---Associates the value given by - * value with the key given by key. - * key should not have its value changed while it is in - * use as a key (a String passed as a key will be + * `value` with the key given by `key`. + * `key` should not have its value changed while it is in + * use as a key (a `String` passed as a key will be * duplicated and frozen). * * h = { "a" => 100, "b" => 200 } @@ -1560,13 +1757,27 @@ mrb_hash_clear(mrb_state *mrb, mrb_value hash) static mrb_value mrb_hash_aset(mrb_state *mrb, mrb_value self) { - mrb_value key, val; + mrb_int argc = mrb_get_argc(mrb); + + if (argc != 2) { + mrb_argnum_error(mrb, argc, 2, 2); + } + + const mrb_value *argv = mrb_get_argv(mrb); + mrb_value key = argv[0]; + mrb_value val = argv[1]; - mrb_get_args(mrb, "oo", &key, &val); mrb_hash_set(mrb, self, key, val); return val; } +/** + * Returns the number of key-value pairs in the hash. + * + * @param mrb The mruby state (unused in the current implementation, but part of MRB_API convention). + * @param hash The hash object (mrb_value) to get the size of. + * @return An mrb_int representing the number of entries in the hash. + */ MRB_API mrb_int mrb_hash_size(mrb_state *mrb, mrb_value hash) { @@ -1605,7 +1816,7 @@ mrb_hash_empty_p(mrb_state *mrb, mrb_value self) * call-seq: * hsh.empty? -> true or false * - * Returns true if hsh contains no key-value pairs. + * Returns `true` if `hsh` contains no key-value pairs. * * {}.empty? #=> true * @@ -1622,7 +1833,7 @@ mrb_hash_empty_m(mrb_state *mrb, mrb_value self) * hsh.keys -> array * * Returns a new array populated with the keys from this hash. See also - * Hash#values. + * `Hash#values`. * * h = { "a" => 100, "b" => 200, "c" => 300, "d" => 400 } * h.keys #=> ["a", "b", "c", "d"] @@ -1634,9 +1845,9 @@ mrb_hash_keys(mrb_state *mrb, mrb_value hash) { struct RHash *h = mrb_hash_ptr(hash); mrb_value ary = mrb_ary_new_capa(mrb, (mrb_int)h_size(h)); - h_each(h, entry, { + H_EACH(h, entry) { mrb_ary_push(mrb, ary, entry->key); - }); + } return ary; } @@ -1645,8 +1856,8 @@ mrb_hash_keys(mrb_state *mrb, mrb_value hash) * call-seq: * hsh.values -> array * - * Returns a new array populated with the values from hsh. See - * also Hash#keys. + * Returns a new array populated with the values from `hsh`. See + * also `Hash#keys`. * * h = { "a" => 100, "b" => 200, "c" => 300 } * h.values #=> [100, 200, 300] @@ -1658,9 +1869,9 @@ mrb_hash_values(mrb_state *mrb, mrb_value hash) { struct RHash *h = mrb_hash_ptr(hash); mrb_value ary = mrb_ary_new_capa(mrb, (mrb_int)h_size(h)); - h_each(h, entry, { + H_EACH(h, entry) { mrb_ary_push(mrb, ary, entry->val); - }); + } return ary; } @@ -1675,7 +1886,7 @@ mrb_hash_values(mrb_state *mrb, mrb_value hash) * hsh.key?(key) -> true or false * hsh.member?(key) -> true or false * - * Returns true if the given key is present in hsh. + * Returns `true` if the given key is present in `hsh`. * * h = { "a" => 100, "b" => 200 } * h.has_key?("a") #=> true @@ -1707,8 +1918,8 @@ mrb_hash_has_key(mrb_state *mrb, mrb_value hash) * hsh.has_value?(value) -> true or false * hsh.value?(value) -> true or false * - * Returns true if the given value is present for some key - * in hsh. + * Returns `true` if the given value is present for some key + * in `hsh`. * * h = { "a" => 100, "b" => 200 } * h.has_value?(100) #=> true @@ -1720,14 +1931,32 @@ mrb_hash_has_value(mrb_state *mrb, mrb_value hash) { mrb_value val = mrb_get_arg1(mrb); struct RHash *h = mrb_hash_ptr(hash); - h_each(h, entry, { - h_check_modified(mrb, h, { + H_EACH(h, entry) { + H_CHECK_MODIFIED(mrb, h) { if (mrb_equal(mrb, val, entry->val)) return mrb_true_value(); - }); - }); + } + } return mrb_false_value(); } +/** + * Merges the contents of `hash2` into `hash1`. + * + * Iterates over `hash2` and for each key-value pair, sets it in `hash1`. + * If a key from `hash2` already exists in `hash1`, its value in `hash1` + * will be overwritten. `hash1` is modified in place. + * + * - `hash1` must not be frozen. + * - `hash2` must be a hash. + * - If `hash1` and `hash2` are the same object, or if `hash2` is empty, + * the function returns without doing anything. + * - Write barriers are triggered for keys and values from `hash2` as they + * are inserted into `hash1`. + * + * @param mrb The mruby state. + * @param hash1 The hash object (mrb_value) to be modified. + * @param hash2 The hash object (mrb_value) whose contents will be merged into `hash1`. + */ MRB_API void mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) { @@ -1740,11 +1969,11 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) if (h1 == h2) return; if (h_size(h2) == 0) return; - h_each(h2, entry, { - h_check_modified(mrb, h2, {h_set(mrb, h1, entry->key, entry->val);}); - mrb_field_write_barrier_value(mrb, (struct RBasic *)h1, entry->key); - mrb_field_write_barrier_value(mrb, (struct RBasic *)h1, entry->val); - }); + H_EACH(h2, entry) { + H_CHECK_MODIFIED(mrb, h2) {h_set(mrb, h1, entry->key, entry->val);} + mrb_field_write_barrier_value(mrb, (struct RBasic*)h1, entry->key); + mrb_field_write_barrier_value(mrb, (struct RBasic*)h1, entry->val); + } } static mrb_value @@ -1766,7 +1995,7 @@ mrb_hash_merge_m(mrb_state *mrb, mrb_value hash) * * Rebuilds the hash based on the current hash values for each key. If * values of key objects have changed since they were inserted, this - * method will reindex hsh. + * method will reindex `hsh`. * * keys = (1..17).map{|n| [n]} * k = keys[0] @@ -1783,42 +2012,345 @@ mrb_hash_merge_m(mrb_state *mrb, mrb_value hash) static mrb_value mrb_hash_rehash(mrb_state *mrb, mrb_value self) { + hash_modify(mrb, self); h_rehash(mrb, mrb_hash_ptr(self)); return self; } +static mrb_value +mrb_hash_compact(mrb_state *mrb, mrb_value hash) +{ + struct RHash *h = mrb_hash_ptr(hash); + mrb_bool ht_p = h_ht_p(h); + uint32_t size = ht_p ? ht_size(h) : ar_size(h); + uint32_t dec = 0; + + hash_modify(mrb, hash); + H_EACH(h, entry) { + if (mrb_nil_p(entry->val)) { + entry_delete(entry); + dec++; + } + } + if (dec == 0) return mrb_nil_value(); + size -= dec; + if (ht_p) { + ht_set_size(h, size); + } + else { + ar_set_size(h, size); + } + return hash; +} + +/* + * Internal method for pattern matching key check + value extraction. + * Returns an array of values if all keys exist, false otherwise. + * + * {a: 1, b: 2}.__pat_values([:a, :b]) #=> [1, 2] + * {a: 1}.__pat_values([:a, :b]) #=> false + */ +static mrb_value +mrb_hash_pat_values(mrb_state *mrb, mrb_value hash) +{ + mrb_value keys; + mrb_get_args(mrb, "A", &keys); + + const mrb_value *ary = RARRAY_PTR(keys); + mrb_int klen = RARRAY_LEN(keys); + struct RHash *h = mrb_hash_ptr(hash); + mrb_value result = mrb_ary_new_capa(mrb, klen); + int ai = mrb_gc_arena_save(mrb); + + for (mrb_int i = 0; i < klen; i++) { + mrb_value val; + if (!h_get(mrb, h, ary[i], &val)) { + return mrb_false_value(); + } + mrb_ary_push(mrb, result, val); + mrb_gc_arena_restore(mrb, ai); + } + return result; +} + +/* + * Internal method for pattern matching **rest. + * Returns a new hash excluding keys in the given array. + * + * {a: 1, b: 2, c: 3}.__except([:a, :c]) #=> {b: 2} + */ +static mrb_value +mrb_hash_except_keys(mrb_state *mrb, mrb_value hash) +{ + mrb_value keys; + mrb_get_args(mrb, "A", &keys); + + const mrb_value *ary = RARRAY_PTR(keys); + mrb_int klen = RARRAY_LEN(keys); + mrb_value result = mrb_hash_new(mrb); + struct RHash *h = mrb_hash_ptr(hash); + int ai = mrb_gc_arena_save(mrb); + + H_EACH(h, entry) { + mrb_bool found = FALSE; + for (mrb_int i = 0; i < klen; i++) { + if (mrb_equal(mrb, entry->key, ary[i])) { + found = TRUE; + break; + } + } + if (!found) { + mrb_hash_set(mrb, result, entry->key, entry->val); + } + mrb_gc_arena_restore(mrb, ai); + } + return result; +} + +/* + * call-seq: + * hash.to_s -> string + * hash.inspect -> string + * + * Return the contents of this hash as a string. + */ +static mrb_value +mrb_hash_to_s(mrb_state *mrb, mrb_value self) +{ + mrb->c->ci->mid = MRB_SYM(inspect); + mrb_value ret = mrb_str_new_lit(mrb, "{"); + int ai = mrb_gc_arena_save(mrb); + if (MRB_RECURSIVE_UNARY_P(mrb, MRB_SYM(inspect), self)) { + mrb_str_cat_lit(mrb, ret, "...}"); + return ret; + } + + mrb_int i = 0; + struct RHash *h = mrb_hash_ptr(self); + H_EACH(h, entry) { + if (i++ > 0) mrb_str_cat_lit(mrb, ret, ", "); + if (mrb_symbol_p(entry->key)) { + mrb_str_cat_str(mrb, ret, mrb_obj_as_string(mrb, entry->key)); + mrb_gc_arena_restore(mrb, ai); + mrb_str_cat_lit(mrb, ret, ": "); + } + else { + H_CHECK_MODIFIED(mrb, h) { + mrb_str_cat_str(mrb, ret, mrb_inspect(mrb, entry->key)); + } + mrb_gc_arena_restore(mrb, ai); + mrb_str_cat_lit(mrb, ret, " => "); + } + H_CHECK_MODIFIED(mrb, h) { + mrb_str_cat_str(mrb, ret, mrb_inspect(mrb, entry->val)); + } + mrb_gc_arena_restore(mrb, ai); + } + mrb_str_cat_lit(mrb, ret, "}"); + + return ret; +} + +/* + * call-seq: + * hash.to_hash -> self + * + * Returns self. + */ +static mrb_value +mrb_hash_to_hash(mrb_state *mrb, mrb_value self) +{ + return self; +} + +/* + * call-seq: + * hash.assoc(key) -> new_array or nil + * + * If the given key is found, returns a 2-element Array containing that key + * and its value: + * + * h = {foo: 0, bar: 1, baz: 2} + * h.assoc(:bar) # => [:bar, 1] + * + * Returns nil if key key is not found. + */ +static mrb_value +mrb_hash_assoc(mrb_state *mrb, mrb_value hash) +{ + mrb_value key = mrb_get_arg1(mrb); + struct RHash *h = mrb_hash_ptr(hash); + H_EACH(h, entry) { + if (obj_eql(mrb, entry->key, key, h)) { + return mrb_assoc_new(mrb, entry->key, entry->val); + } + } + return mrb_nil_value(); +} + +/* + * call-seq: + * hash.rassoc(value) -> new_array or nil + * + * Returns a new 2-element Array consisting of the key and value of the + * first-found entry whose value is == to value. + * + * h = {foo: 0, bar: 1, baz: 1} + * h.rassoc(1) # => [:bar, 1] + * + * Returns nil if no such value found. + */ +static mrb_value +mrb_hash_rassoc(mrb_state *mrb, mrb_value hash) +{ + mrb_value value = mrb_get_arg1(mrb); + struct RHash *h = mrb_hash_ptr(hash); + H_EACH(h, entry) { + if (obj_eql(mrb, entry->val, value, h)) { + return mrb_assoc_new(mrb, entry->key, entry->val); + } + } + return mrb_nil_value(); +} + +/* 15.2.13.4.1 */ +static mrb_value +mrb_hash_equal(mrb_state *mrb, mrb_value hash) +{ + mrb_value hash2 = mrb_get_arg1(mrb); + + if (mrb_obj_equal(mrb, hash, hash2)) return mrb_true_value(); + if (!mrb_hash_p(hash2)) { + return mrb_false_value(); + } + if (mrb_hash_size(mrb, hash) != mrb_hash_size(mrb, hash2)) { + return mrb_false_value(); + } + + /* Check for recursion */ + if (MRB_RECURSIVE_BINARY_FUNC_P(mrb, MRB_OPSYM(eq), hash, hash2)) { + return mrb_false_value(); + } + + struct RHash *h1 = mrb_hash_ptr(hash); + struct RHash *h2 = mrb_hash_ptr(hash2); + + H_EACH(h1, entry) { + mrb_value val2; + mrb_bool found; + + H_CHECK_MODIFIED(mrb, h1) { + found = h_get(mrb, h2, entry->key, &val2); + } + if (!found) { + return mrb_false_value(); + } + H_CHECK_MODIFIED(mrb, h1) { + if (!mrb_equal(mrb, entry->val, val2)) { + return mrb_false_value(); + } + } + } + + return mrb_true_value(); +} + +/* + * call-seq: + * hash.eql?(other) -> true or false + * + * Returns true if hash and other are both hashes with the same content + * compared by eql?. + */ +static mrb_value +mrb_hash_eql(mrb_state *mrb, mrb_value hash) +{ + mrb_value hash2 = mrb_get_arg1(mrb); + + if (mrb_obj_equal(mrb, hash, hash2)) return mrb_true_value(); + if (!mrb_hash_p(hash2)) { + return mrb_false_value(); + } + if (mrb_hash_size(mrb, hash) != mrb_hash_size(mrb, hash2)) { + return mrb_false_value(); + } + + /* Check for recursion */ + if (MRB_RECURSIVE_BINARY_FUNC_P(mrb, MRB_SYM_Q(eql), hash, hash2)) { + return mrb_false_value(); + } + + struct RHash *h1 = mrb_hash_ptr(hash); + struct RHash *h2 = mrb_hash_ptr(hash2); + + H_EACH(h1, entry) { + mrb_value val2; + mrb_bool found; + + H_CHECK_MODIFIED(mrb, h1) { + found = h_get(mrb, h2, entry->key, &val2); + } + if (!found) { + return mrb_false_value(); + } + H_CHECK_MODIFIED(mrb, h1) { + if (!mrb_eql(mrb, entry->val, val2)) { + return mrb_false_value(); + } + } + } + + return mrb_true_value(); +} + +/* ---------------------------*/ +static const mrb_mt_entry hash_rom_entries[] = { + MRB_MT_ENTRY(mrb_hash_equal, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), /* 15.2.13.4.1 */ + MRB_MT_ENTRY(mrb_hash_aget, MRB_OPSYM(aref), MRB_ARGS_REQ(1)), /* 15.2.13.4.2 */ + MRB_MT_ENTRY(mrb_hash_aset, MRB_OPSYM(aset), MRB_ARGS_REQ(2)), /* 15.2.13.4.3 */ + MRB_MT_ENTRY(mrb_hash_clear, MRB_SYM(clear), MRB_ARGS_NONE()), /* 15.2.13.4.4 */ + MRB_MT_ENTRY(mrb_hash_default, MRB_SYM(default), MRB_ARGS_OPT(1)), /* 15.2.13.4.5 */ + MRB_MT_ENTRY(mrb_hash_set_default, MRB_SYM_E(default), MRB_ARGS_REQ(1)), /* 15.2.13.4.6 */ + MRB_MT_ENTRY(mrb_hash_default_proc, MRB_SYM(default_proc), MRB_ARGS_NONE()), /* 15.2.13.4.7 */ + MRB_MT_ENTRY(mrb_hash_set_default_proc, MRB_SYM_E(default_proc), MRB_ARGS_REQ(1)), /* 15.2.13.4.7 */ + MRB_MT_ENTRY(mrb_hash_delete, MRB_SYM(__delete), MRB_ARGS_REQ(1)), /* core of 15.2.13.4.8 */ + MRB_MT_ENTRY(mrb_hash_eql, MRB_SYM_Q(eql), MRB_ARGS_REQ(1)), /* Hash#eql? */ + MRB_MT_ENTRY(mrb_hash_empty_m, MRB_SYM_Q(empty), MRB_ARGS_NONE()), /* 15.2.13.4.12 */ + MRB_MT_ENTRY(mrb_hash_has_key, MRB_SYM_Q(has_key), MRB_ARGS_REQ(1)), /* 15.2.13.4.13 */ + MRB_MT_ENTRY(mrb_hash_has_value, MRB_SYM_Q(has_value), MRB_ARGS_REQ(1)), /* 15.2.13.4.14 */ + MRB_MT_ENTRY(mrb_hash_has_key, MRB_SYM_Q(include), MRB_ARGS_REQ(1)), /* 15.2.13.4.15 */ + MRB_MT_ENTRY(mrb_hash_init, MRB_SYM(initialize), MRB_ARGS_OPT(1)|MRB_ARGS_BLOCK() | MRB_MT_PRIVATE), /* 15.2.13.4.16 */ + MRB_MT_ENTRY(mrb_hash_init_copy, MRB_SYM(initialize_copy), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), /* 15.2.13.4.17 */ + MRB_MT_ENTRY(mrb_hash_has_key, MRB_SYM_Q(key), MRB_ARGS_REQ(1)), /* 15.2.13.4.18 */ + MRB_MT_ENTRY(mrb_hash_keys, MRB_SYM(keys), MRB_ARGS_NONE()), /* 15.2.13.4.19 */ + MRB_MT_ENTRY(mrb_hash_size_m, MRB_SYM(length), MRB_ARGS_NONE()), /* 15.2.13.4.20 */ + MRB_MT_ENTRY(mrb_hash_has_key, MRB_SYM_Q(member), MRB_ARGS_REQ(1)), /* 15.2.13.4.21 */ + MRB_MT_ENTRY(mrb_hash_init_copy, MRB_SYM(replace), MRB_ARGS_REQ(1)), /* 15.2.13.4.23 */ + MRB_MT_ENTRY(mrb_hash_shift, MRB_SYM(shift), MRB_ARGS_NONE()), /* 15.2.13.4.24 */ + MRB_MT_ENTRY(mrb_hash_size_m, MRB_SYM(size), MRB_ARGS_NONE()), /* 15.2.13.4.25 */ + MRB_MT_ENTRY(mrb_hash_aset, MRB_SYM(store), MRB_ARGS_REQ(2)), /* 15.2.13.4.26 */ + MRB_MT_ENTRY(mrb_hash_has_value, MRB_SYM_Q(value), MRB_ARGS_REQ(1)), /* 15.2.13.4.27 */ + MRB_MT_ENTRY(mrb_hash_values, MRB_SYM(values), MRB_ARGS_NONE()), /* 15.2.13.4.28 */ + MRB_MT_ENTRY(mrb_hash_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_hash_to_s, MRB_SYM(inspect), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_hash_rehash, MRB_SYM(rehash), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_hash_to_hash, MRB_SYM(to_hash), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_hash_assoc, MRB_SYM(assoc), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_hash_rassoc, MRB_SYM(rassoc), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_hash_merge_m, MRB_SYM(__merge), MRB_ARGS_ANY()), + MRB_MT_ENTRY(mrb_hash_compact, MRB_SYM(__compact), MRB_ARGS_NONE()), /* implementation of Hash#compact! */ + MRB_MT_ENTRY(mrb_hash_pat_values, MRB_SYM(__pat_values), MRB_ARGS_REQ(1)), /* for pattern matching keys */ + MRB_MT_ENTRY(mrb_hash_except_keys, MRB_SYM(__except), MRB_ARGS_REQ(1)), /* for pattern matching **rest */ +}; + void mrb_init_hash(mrb_state *mrb) { struct RClass *h; - mrb->hash_class = h = mrb_define_class(mrb, "Hash", mrb->object_class); /* 15.2.13 */ + mrb->hash_class = h = mrb_define_class_id(mrb, MRB_SYM(Hash), mrb->object_class); /* 15.2.13 */ MRB_SET_INSTANCE_TT(h, MRB_TT_HASH); - mrb_define_method(mrb, h, "[]", mrb_hash_aget, MRB_ARGS_REQ(1)); /* 15.2.13.4.2 */ - mrb_define_method(mrb, h, "[]=", mrb_hash_aset, MRB_ARGS_REQ(2)); /* 15.2.13.4.3 */ - mrb_define_method(mrb, h, "clear", mrb_hash_clear, MRB_ARGS_NONE()); /* 15.2.13.4.4 */ - mrb_define_method(mrb, h, "default", mrb_hash_default, MRB_ARGS_OPT(1)); /* 15.2.13.4.5 */ - mrb_define_method(mrb, h, "default=", mrb_hash_set_default, MRB_ARGS_REQ(1)); /* 15.2.13.4.6 */ - mrb_define_method(mrb, h, "default_proc", mrb_hash_default_proc,MRB_ARGS_NONE()); /* 15.2.13.4.7 */ - mrb_define_method(mrb, h, "default_proc=", mrb_hash_set_default_proc,MRB_ARGS_REQ(1)); /* 15.2.13.4.7 */ - mrb_define_method(mrb, h, "__delete", mrb_hash_delete, MRB_ARGS_REQ(1)); /* core of 15.2.13.4.8 */ - mrb_define_method(mrb, h, "empty?", mrb_hash_empty_m, MRB_ARGS_NONE()); /* 15.2.13.4.12 */ - mrb_define_method(mrb, h, "has_key?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.13 */ - mrb_define_method(mrb, h, "has_value?", mrb_hash_has_value, MRB_ARGS_REQ(1)); /* 15.2.13.4.14 */ - mrb_define_method(mrb, h, "include?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.15 */ - mrb_define_method(mrb, h, "initialize", mrb_hash_init, MRB_ARGS_OPT(1)|MRB_ARGS_BLOCK()); /* 15.2.13.4.16 */ - mrb_define_method(mrb, h, "initialize_copy", mrb_hash_init_copy, MRB_ARGS_REQ(1)); /* 15.2.13.4.17 */ - mrb_define_method(mrb, h, "key?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.18 */ - mrb_define_method(mrb, h, "keys", mrb_hash_keys, MRB_ARGS_NONE()); /* 15.2.13.4.19 */ - mrb_define_method(mrb, h, "length", mrb_hash_size_m, MRB_ARGS_NONE()); /* 15.2.13.4.20 */ - mrb_define_method(mrb, h, "member?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.21 */ - mrb_define_method(mrb, h, "replace", mrb_hash_init_copy, MRB_ARGS_REQ(1)); /* 15.2.13.4.23 */ - mrb_define_method(mrb, h, "shift", mrb_hash_shift, MRB_ARGS_NONE()); /* 15.2.13.4.24 */ - mrb_define_method(mrb, h, "size", mrb_hash_size_m, MRB_ARGS_NONE()); /* 15.2.13.4.25 */ - mrb_define_method(mrb, h, "store", mrb_hash_aset, MRB_ARGS_REQ(2)); /* 15.2.13.4.26 */ - mrb_define_method(mrb, h, "value?", mrb_hash_has_value, MRB_ARGS_REQ(1)); /* 15.2.13.4.27 */ - mrb_define_method(mrb, h, "values", mrb_hash_values, MRB_ARGS_NONE()); /* 15.2.13.4.28 */ - mrb_define_method(mrb, h, "rehash", mrb_hash_rehash, MRB_ARGS_NONE()); - mrb_define_method(mrb, h, "__merge", mrb_hash_merge_m, MRB_ARGS_REQ(1)); + MRB_MT_INIT_ROM(mrb, h, hash_rom_entries); } +#undef lesser diff --git a/src/init.c b/src/init.c index afd69975af..53e29a6a5c 100644 --- a/src/init.c +++ b/src/init.c @@ -6,46 +6,32 @@ #include -void mrb_init_symtbl(mrb_state*); -void mrb_init_class(mrb_state*); -void mrb_init_object(mrb_state*); -void mrb_init_kernel(mrb_state*); -void mrb_init_comparable(mrb_state*); -void mrb_init_enumerable(mrb_state*); -void mrb_init_symbol(mrb_state*); -void mrb_init_string(mrb_state*); -void mrb_init_exception(mrb_state*); -void mrb_init_proc(mrb_state*); -void mrb_init_array(mrb_state*); -void mrb_init_hash(mrb_state*); -void mrb_init_numeric(mrb_state*); -void mrb_init_range(mrb_state*); -void mrb_init_gc(mrb_state*); -void mrb_init_math(mrb_state*); -void mrb_init_version(mrb_state*); -void mrb_init_mrblib(mrb_state*); +#define INIT_FUNC_FOREACH(def) \ + def(mrb_init_symtbl) \ + def(mrb_init_class) \ + def(mrb_init_object) \ + def(mrb_init_kernel) \ + def(mrb_init_enumerable) \ + def(mrb_init_symbol) \ + def(mrb_init_string) \ + def(mrb_init_exception) \ + def(mrb_init_proc) \ + def(mrb_init_array) \ + def(mrb_init_hash) \ + def(mrb_init_numeric) \ + def(mrb_init_range) \ + def(mrb_init_gc) \ + def(mrb_init_version) \ + def(mrb_init_mrblib) + +#define INIT_FUNC_DECLS(func) void func(mrb_state*); +INIT_FUNC_FOREACH(INIT_FUNC_DECLS) #define DONE mrb_gc_arena_restore(mrb, 0); void mrb_init_core(mrb_state *mrb) { - mrb_init_symtbl(mrb); DONE; - - mrb_init_class(mrb); DONE; - mrb_init_object(mrb); DONE; - mrb_init_kernel(mrb); DONE; - mrb_init_comparable(mrb); DONE; - mrb_init_enumerable(mrb); DONE; - - mrb_init_symbol(mrb); DONE; - mrb_init_string(mrb); DONE; - mrb_init_exception(mrb); DONE; - mrb_init_proc(mrb); DONE; - mrb_init_array(mrb); DONE; - mrb_init_hash(mrb); DONE; - mrb_init_numeric(mrb); DONE; - mrb_init_range(mrb); DONE; - mrb_init_gc(mrb); DONE; - mrb_init_version(mrb); DONE; - mrb_init_mrblib(mrb); DONE; +#define INIT_FUNC_CALL(func) func(mrb); DONE; + INIT_FUNC_FOREACH(INIT_FUNC_CALL) } +#undef DONE diff --git a/src/kernel.c b/src/kernel.c index 73e28f4689..98be70cd5b 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -14,8 +14,11 @@ #include #include #include -#include +/* + * Checks if the method `mid` for object `obj` is implemented by + * the C function `func`. + */ MRB_API mrb_bool mrb_func_basic_p(mrb_state *mrb, mrb_value obj, mrb_sym mid, mrb_func_t func) { @@ -38,15 +41,59 @@ mrb_obj_basic_to_s_p(mrb_state *mrb, mrb_value obj) return mrb_func_basic_p(mrb, obj, MRB_SYM(to_s), mrb_any_to_s); } +struct inspect_i { + mrb_value obj, str; +}; + +static int +inspect_i(mrb_state *mrb, mrb_sym sym, mrb_value v, void *p) +{ + struct inspect_i *a = (struct inspect_i*)p; + if (mrb_nil_p(a->str)) { + const char *cn = mrb_obj_classname(mrb, a->obj); + a->str = mrb_str_new_capa(mrb, 30); + + mrb_str_cat_lit(mrb, a->str, "-<"); + mrb_str_cat_cstr(mrb, a->str, cn); + mrb_str_cat_lit(mrb, a->str, ":"); + mrb_str_cat_str(mrb, a->str, mrb_ptr_to_str(mrb, mrb_obj_ptr(a->obj))); + + if (MRB_RECURSIVE_UNARY_P(mrb, MRB_SYM(inspect), a->obj)) { + mrb_str_cat_lit(mrb, a->str, " ..."); + return 1; + } + } + + char *sp = RSTRING_PTR(a->str); + + /* need not to show internal data */ + if (sp[0] == '-') { /* first element */ + sp[0] = '#'; + mrb_str_cat_lit(mrb, a->str, " "); + } + else { + mrb_str_cat_lit(mrb, a->str, ", "); + } + + mrb_int len; + const char *s = mrb_sym_name_len(mrb, sym, &len); + mrb_str_cat(mrb, a->str, s, len); + mrb_str_cat_lit(mrb, a->str, "="); + + mrb_value ins = mrb_inspect(mrb, v); + mrb_str_cat_str(mrb, a->str, ins); + return 0; +} + /* 15.3.1.3.17 */ /* * call-seq: * obj.inspect -> string * * Returns a string containing a human-readable representation of - * obj. If not overridden and no instance variables, uses the - * to_s method to generate the string. - * obj. If not overridden, uses the to_s method to + * *obj*. If not overridden and no instance variables, uses the + * `to_s` method to generate the string. + * *obj*. If not overridden, uses the `to_s` method to * generate the string. * * [ 1, 2, 3..4, 'five' ].inspect #=> "[1, 2, 3..4, \"five\"]" @@ -56,7 +103,13 @@ MRB_API mrb_value mrb_obj_inspect(mrb_state *mrb, mrb_value obj) { if (mrb_object_p(obj) && mrb_obj_basic_to_s_p(mrb, obj)) { - return mrb_obj_iv_inspect(mrb, mrb_obj_ptr(obj)); + struct inspect_i a = { obj, mrb_nil_value() }; + mrb_iv_foreach(mrb, obj, inspect_i, &a); + if (!mrb_nil_p(a.str)) { + mrb_assert(mrb_string_p(a.str)); + mrb_str_cat_lit(mrb, a.str, ">"); + return a.str; + } } return mrb_any_to_s(mrb, obj); } @@ -66,18 +119,97 @@ mrb_obj_inspect(mrb_state *mrb, mrb_value obj) * call-seq: * obj === other -> true or false * - * Case Equality---For class Object, effectively the same - * as calling #==, but typically overridden by descendants - * to provide meaningful semantics in case statements. + * Case Equality---For class `Object`, effectively the same + * as calling `#==`, but typically overridden by descendants + * to provide meaningful semantics in `case` statements. */ static mrb_value -mrb_equal_m(mrb_state *mrb, mrb_value self) +mrb_eqq_m(mrb_state *mrb, mrb_value self) { mrb_value arg = mrb_get_arg1(mrb); return mrb_bool_value(mrb_equal(mrb, self, arg)); } +static mrb_value +mrb_cmp_m(mrb_state *mrb, mrb_value self) +{ + mrb_value arg = mrb_get_arg1(mrb); + + /* recursion check */ + for (mrb_callinfo *ci=&mrb->c->ci[-1]; ci>=mrb->c->cibase; ci--) { + if (ci->mid == MRB_OPSYM(cmp) && + mrb_obj_eq(mrb, self, ci->stack[0]) && + mrb_obj_eq(mrb, arg, ci->stack[1])) { + /* recursive <=> calling returns `nil` */ + return mrb_nil_value(); + } + } + + if (mrb_equal(mrb, self, arg)) + return mrb_fixnum_value(0); + return mrb_nil_value(); +} + +MRB_API mrb_bool +mrb_recursive_method_p(mrb_state *mrb, mrb_sym mid, mrb_value obj1, mrb_value obj2) +{ + for (mrb_callinfo *ci=&mrb->c->ci[-1]; ci>=mrb->c->cibase; ci--) { + if (ci->mid == mid && mrb_obj_eq(mrb, obj1, ci->stack[0])) { + /* For unary methods, only check first argument */ + if (mrb_nil_p(obj2)) return TRUE; + + /* For binary methods, check both arguments */ + if (mrb_obj_eq(mrb, obj2, ci->stack[1])) return TRUE; + } + } + return FALSE; +} + +/** + * Check if a C function call is recursive. + * + * Like mrb_recursive_method_p, but starts from ci[-2] to skip the immediate + * parent frame. Use this from C functions implementing Ruby methods that can + * be overridden with super calls. + */ +MRB_API mrb_bool +mrb_recursive_func_p(mrb_state *mrb, mrb_sym mid, mrb_value obj1, mrb_value obj2) +{ + /* Start from ci[-2] to skip immediate parent frame which may be a + Ruby override calling super */ + for (mrb_callinfo *ci=&mrb->c->ci[-2]; ci>=mrb->c->cibase; ci--) { + if (ci->mid == mid && mrb_obj_eq(mrb, obj1, ci->stack[0])) { + /* For unary methods, only check first argument */ + if (mrb_nil_p(obj2)) return TRUE; + + /* For binary methods, check both arguments */ + if (mrb_obj_eq(mrb, obj2, ci->stack[1])) return TRUE; + } + } + return FALSE; +} + +static mrb_value +mrb_obj_method_recursive_p(mrb_state *mrb, mrb_value obj) +{ + mrb_sym mid; + mrb_value arg2 = mrb_nil_value(); + mrb_int argc = mrb_get_args(mrb, "n|o", &mid, &arg2); + + /* Use frame-skipping version for Ruby method calls */ + for (mrb_callinfo *ci=&mrb->c->ci[-2]; ci>=mrb->c->cibase; ci--) { + if (ci->mid == mid && mrb_obj_eq(mrb, obj, ci->stack[0])) { + /* For unary methods, only check first argument */ + if (argc == 1 || mrb_nil_p(arg2)) return mrb_true_value(); + + /* For binary methods, check both arguments */ + if (mrb_obj_eq(mrb, arg2, ci->stack[1])) return mrb_true_value(); + } + } + return mrb_false_value(); +} + /* 15.3.1.3.3 */ /* 15.3.1.3.33 */ /* @@ -88,12 +220,12 @@ mrb_equal_m(mrb_state *mrb, mrb_value self) * obj.__id__ -> int * obj.object_id -> int * - * Returns an integer identifier for obj. The same number will - * be returned on all calls to id for a given object, and + * Returns an integer identifier for *obj*. The same number will + * be returned on all calls to `id` for a given object, and * no two active objects will share an id. - * Object#object_id is a different concept from the - * :name notation, which returns the symbol id of - * name. Replaces the deprecated Object#id. + * `Object#object_id` is a different concept from the + * `:name` notation, which returns the symbol id of + * `name`. Replaces the deprecated `Object#id`. */ mrb_value mrb_obj_id_m(mrb_state *mrb, mrb_value self) @@ -122,8 +254,8 @@ env_bidx(struct REnv *e) * block_given? -> true or false * iterator? -> true or false * - * Returns true if yield would execute a - * block in the current context. The iterator? form + * Returns `true` if `yield` would execute a + * block in the current context. The `iterator?` form * is mildly deprecated. * * def try @@ -203,8 +335,8 @@ mrb_f_block_given_p_m(mrb_state *mrb, mrb_value self) * call-seq: * obj.class -> class * - * Returns the class of obj. This method must always be - * called with an explicit receiver, as class is also a + * Returns the class of *obj*. This method must always be + * called with an explicit receiver, as `class` is also a * reserved word in Ruby. * * 1.class #=> Integer @@ -216,14 +348,18 @@ mrb_obj_class_m(mrb_state *mrb, mrb_value self) return mrb_obj_value(mrb_obj_class(mrb, self)); } +/* + * Freezes the object `self`, preventing further modifications. + * Immediate values cannot be frozen. + */ MRB_API mrb_value mrb_obj_freeze(mrb_state *mrb, mrb_value self) { if (!mrb_immediate_p(self)) { struct RBasic *b = mrb_basic_ptr(self); if (!mrb_frozen_p(b)) { - MRB_SET_FROZEN_FLAG(b); - if (b->c->tt == MRB_TT_SCLASS) MRB_SET_FROZEN_FLAG(b->c); + b->frozen = 1; + if (b->c->tt == MRB_TT_SCLASS) b->c->frozen = 1; } } return self; @@ -240,11 +376,11 @@ mrb_obj_frozen(mrb_state *mrb, mrb_value self) * call-seq: * obj.hash -> int * - * Generates a Integer hash value for this object. This - * function must have the property that a.eql?(b) implies - * a.hash == b.hash. The hash value is used by class - * Hash. Any hash value that exceeds the capacity of a - * Integer will be truncated before being used. + * Generates a `Integer` hash value for this object. This + * function must have the property that `a.eql?(b)` implies + * `a.hash == b.hash`. The hash value is used by class + * `Hash`. Any hash value that exceeds the capacity of a + * `Integer` will be truncated before being used. */ static mrb_value mrb_obj_hash(mrb_state *mrb, mrb_value self) @@ -270,8 +406,11 @@ mrb_obj_init_copy(mrb_state *mrb, mrb_value self) return self; } +/* + * Checks if the object `obj` is an instance of the class `c`. + */ MRB_API mrb_bool -mrb_obj_is_instance_of(mrb_state *mrb, mrb_value obj, struct RClass* c) +mrb_obj_is_instance_of(mrb_state *mrb, mrb_value obj, const struct RClass* c) { if (mrb_obj_class(mrb, obj) == c) return TRUE; return FALSE; @@ -282,8 +421,8 @@ mrb_obj_is_instance_of(mrb_state *mrb, mrb_value obj, struct RClass* c) * call-seq: * obj.instance_of?(class) -> true or false * - * Returns true if obj is an instance of the given - * class. See also Object#kind_of?. + * Returns `true` if *obj* is an instance of the given + * class. See also `Object#kind_of?`. */ static mrb_value obj_is_instance_of(mrb_state *mrb, mrb_value self) @@ -302,9 +441,9 @@ obj_is_instance_of(mrb_state *mrb, mrb_value self) * obj.is_a?(class) -> true or false * obj.kind_of?(class) -> true or false * - * Returns true if class is the class of - * obj, or if class is one of the superclasses of - * obj or modules included in obj. + * Returns `true` if *class* is the class of + * *obj*, or if *class* is one of the superclasses of + * *obj* or modules included in *obj*. * * module M; end * class A @@ -338,7 +477,7 @@ mrb_obj_is_kind_of_m(mrb_state *mrb, mrb_value self) * nil.nil? -> true * .nil? -> false * - * Only the object nil responds true to nil?. + * Only the object *nil* responds `true` to `nil?`. */ static mrb_value mrb_false(mrb_state *mrb, mrb_value self) @@ -354,40 +493,41 @@ mrb_false(mrb_state *mrb, mrb_value self) * raise(string) * raise(exception [, string]) * - * With no arguments, raises a RuntimeError - * With a single +String+ argument, raises a - * +RuntimeError+ with the string as a message. Otherwise, - * the first parameter should be the name of an +Exception+ - * class (or an object that returns an +Exception+ object when sent - * an +exception+ message). The optional second parameter sets the + * With no arguments, raises a `RuntimeError` + * With a single `String` argument, raises a + * `RuntimeError` with the string as a message. Otherwise, + * the first parameter should be the name of an `Exception` + * class (or an object that returns an `Exception` object when sent + * an `exception` message). The optional second parameter sets the * message associated with the exception, and the third parameter is an * array of callback information. Exceptions are caught by the - * +rescue+ clause of begin...end blocks. + * `rescue` clause of `begin...end` blocks. * * raise "Failed to create socket" * raise ArgumentError, "No parameters", caller */ -MRB_API mrb_value +mrb_value mrb_f_raise(mrb_state *mrb, mrb_value self) { - mrb_value a[2], exc; - mrb_int argc; + mrb_value exc, mesg; + mrb_int argc = mrb_get_args(mrb, "|oo", &exc, &mesg); - argc = mrb_get_args(mrb, "|oo", &a[0], &a[1]); mrb->c->ci->mid = 0; switch (argc) { case 0: mrb_raise(mrb, E_RUNTIME_ERROR, ""); break; case 1: - if (mrb_string_p(a[0])) { - a[1] = a[0]; - argc = 2; - a[0] = mrb_obj_value(E_RUNTIME_ERROR); + if (mrb_string_p(exc)) { + mesg = exc; + exc = mrb_obj_value(E_RUNTIME_ERROR); + } + else { + mesg = mrb_nil_value(); } /* fall through */ default: - exc = mrb_make_exception(mrb, argc, a); + exc = mrb_make_exception(mrb, exc, mesg); mrb_exc_raise(mrb, exc); break; } @@ -399,7 +539,7 @@ mrb_f_raise(mrb_state *mrb, mrb_value self) * call-seq: * obj.remove_instance_variable(symbol) -> obj * - * Removes the named instance variable from obj, returning that + * Removes the named instance variable from *obj*, returning that * variable's value. * * class Dummy @@ -420,54 +560,44 @@ static mrb_value mrb_obj_remove_instance_variable(mrb_state *mrb, mrb_value self) { mrb_sym sym; - mrb_value val; mrb_get_args(mrb, "n", &sym); mrb_iv_name_sym_check(mrb, sym); - val = mrb_iv_remove(mrb, self, sym); + mrb_value val = mrb_iv_remove(mrb, self, sym); if (mrb_undef_p(val)) { mrb_name_error(mrb, sym, "instance variable %n not defined", sym); } return val; } -static inline mrb_bool -basic_obj_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym id, int pub) -{ - return mrb_respond_to(mrb, obj, id); -} - /* 15.3.1.3.43 */ /* * call-seq: * obj.respond_to?(symbol, include_private=false) -> true or false * - * Returns +true+ if _obj_ responds to the given + * Returns `true` if _obj_ responds to the given * method. Private methods are included in the search only if the - * optional second parameter evaluates to +true+. + * optional second parameter evaluates to `true`. * * If the method is not implemented, * as Process.fork on Windows, File.lchmod on GNU/Linux, etc., * false is returned. * - * If the method is not defined, respond_to_missing? + * If the method is not defined, `respond_to_missing?` * method is called and the result is returned. */ static mrb_value obj_respond_to(mrb_state *mrb, mrb_value self) { - mrb_sym id, rtm_id; + mrb_sym id; mrb_bool priv = FALSE, respond_to_p; mrb_get_args(mrb, "n|b", &id, &priv); - respond_to_p = basic_obj_respond_to(mrb, self, id, !priv); + respond_to_p = mrb_respond_to(mrb, self, id); if (!respond_to_p) { - rtm_id = MRB_SYM_Q(respond_to_missing); - if (basic_obj_respond_to(mrb, self, rtm_id, !priv)) { - mrb_value args[2], v; - args[0] = mrb_symbol_value(id); - args[1] = mrb_bool_value(priv); - v = mrb_funcall_argv(mrb, self, rtm_id, 2, args); + mrb_sym rtm_id = MRB_SYM_Q(respond_to_missing); + if (!mrb_func_basic_p(mrb, self, rtm_id, mrb_false)) { + mrb_value v = mrb_funcall_argv2(mrb, self, rtm_id, mrb_symbol_value(id), mrb_bool_value(priv)); return mrb_bool_value(mrb_bool(v)); } } @@ -478,7 +608,6 @@ static mrb_value mrb_obj_ceqq(mrb_state *mrb, mrb_value self) { mrb_value v = mrb_get_arg1(mrb); - mrb_int i, len; mrb_sym eqq = MRB_OPSYM(eqq); mrb_value ary; @@ -495,69 +624,94 @@ mrb_obj_ceqq(mrb_state *mrb, mrb_value self) return mrb_false_value(); } else { - ary = mrb_funcall_id(mrb, self, MRB_SYM(to_a), 0); + ary = mrb_funcall_argv(mrb, self, MRB_SYM(to_a), 0, NULL); if (mrb_nil_p(ary)) { return mrb_funcall_argv(mrb, self, eqq, 1, &v); } mrb_ensure_array_type(mrb, ary); } - len = RARRAY_LEN(ary); - for (i=0; ikernel_module = krn = mrb_define_module(mrb, "Kernel"); /* 15.3.1 */ - mrb_define_class_method(mrb, krn, "block_given?", mrb_f_block_given_p_m, MRB_ARGS_NONE()); /* 15.3.1.2.2 */ - mrb_define_class_method(mrb, krn, "iterator?", mrb_f_block_given_p_m, MRB_ARGS_NONE()); /* 15.3.1.2.5 */ - mrb_define_class_method(mrb, krn, "raise", mrb_f_raise, MRB_ARGS_OPT(2)); /* 15.3.1.2.12 */ - - - mrb_define_method(mrb, krn, "===", mrb_equal_m, MRB_ARGS_REQ(1)); /* 15.3.1.3.2 */ - mrb_define_method(mrb, krn, "block_given?", mrb_f_block_given_p_m, MRB_ARGS_NONE()); /* 15.3.1.3.6 */ - mrb_define_method(mrb, krn, "class", mrb_obj_class_m, MRB_ARGS_NONE()); /* 15.3.1.3.7 */ - mrb_define_method(mrb, krn, "clone", mrb_obj_clone, MRB_ARGS_NONE()); /* 15.3.1.3.8 */ - mrb_define_method(mrb, krn, "dup", mrb_obj_dup, MRB_ARGS_NONE()); /* 15.3.1.3.9 */ - mrb_define_method(mrb, krn, "eql?", mrb_obj_equal_m, MRB_ARGS_REQ(1)); /* 15.3.1.3.10 */ - mrb_define_method(mrb, krn, "freeze", mrb_obj_freeze, MRB_ARGS_NONE()); - mrb_define_method(mrb, krn, "frozen?", mrb_obj_frozen, MRB_ARGS_NONE()); - mrb_define_method(mrb, krn, "hash", mrb_obj_hash, MRB_ARGS_NONE()); /* 15.3.1.3.15 */ - mrb_define_method(mrb, krn, "initialize_copy", mrb_obj_init_copy, MRB_ARGS_REQ(1)); /* 15.3.1.3.16 */ - mrb_define_method(mrb, krn, "inspect", mrb_obj_inspect, MRB_ARGS_NONE()); /* 15.3.1.3.17 */ - mrb_define_method(mrb, krn, "instance_of?", obj_is_instance_of, MRB_ARGS_REQ(1)); /* 15.3.1.3.19 */ - - mrb_define_method(mrb, krn, "is_a?", mrb_obj_is_kind_of_m, MRB_ARGS_REQ(1)); /* 15.3.1.3.24 */ - mrb_define_method(mrb, krn, "iterator?", mrb_f_block_given_p_m, MRB_ARGS_NONE()); /* 15.3.1.3.25 */ - mrb_define_method(mrb, krn, "kind_of?", mrb_obj_is_kind_of_m, MRB_ARGS_REQ(1)); /* 15.3.1.3.26 */ - mrb_define_method(mrb, krn, "nil?", mrb_false, MRB_ARGS_NONE()); /* 15.3.1.3.32 */ - mrb_define_method(mrb, krn, "object_id", mrb_obj_id_m, MRB_ARGS_NONE()); /* 15.3.1.3.33 */ - mrb_define_method(mrb, krn, "raise", mrb_f_raise, MRB_ARGS_ANY()); /* 15.3.1.3.40 */ - mrb_define_method(mrb, krn, "remove_instance_variable", mrb_obj_remove_instance_variable,MRB_ARGS_REQ(1)); /* 15.3.1.3.41 */ - mrb_define_method(mrb, krn, "respond_to?", obj_respond_to, MRB_ARGS_ARG(1,1)); /* 15.3.1.3.43 */ - mrb_define_method(mrb, krn, "to_s", mrb_any_to_s, MRB_ARGS_NONE()); /* 15.3.1.3.46 */ - mrb_define_method(mrb, krn, "__case_eqq", mrb_obj_ceqq, MRB_ARGS_REQ(1)); /* internal */ - mrb_define_method(mrb, krn, "__to_int", mrb_ensure_int_type, MRB_ARGS_NONE()); /* internal */ - mrb_define_method(mrb, krn, "__ENCODING__", mrb_encoding, MRB_ARGS_NONE()); + mrb->kernel_module = krn = mrb_define_module_id(mrb, MRB_SYM(Kernel)); /* 15.3.1 */ +#if 0 + mrb_define_class_method_id(mrb, krn, MRB_SYM_Q(block_given), mrb_f_block_given_p_m, MRB_ARGS_NONE()); /* 15.3.1.2.2 */ + mrb_define_class_method_id(mrb, krn, MRB_SYM_Q(iterator), mrb_f_block_given_p_m, MRB_ARGS_NONE()); /* 15.3.1.2.5 */ +#endif + mrb_define_class_method_id(mrb, krn, MRB_SYM(raise), mrb_f_raise, MRB_ARGS_OPT(2)); /* 15.3.1.2.12 */ + + MRB_MT_INIT_ROM(mrb, krn, kernel_rom_entries); mrb_include_module(mrb, mrb->object_class, mrb->kernel_module); } diff --git a/src/load.c b/src/load.c index 1916c6c9d9..f106c7e6ec 100644 --- a/src/load.c +++ b/src/load.c @@ -59,6 +59,95 @@ str_to_double(mrb_state *mrb, const char *p) } #endif +#define ALIGN_UP(x, a) (((x) + (a) - 1) & ~((size_t)(a) - 1)) + +/* + * Lightweight prescan of pool/syms binary data to learn plen and slen + * without allocating. Must stay in sync with the pool/syms parsing + * in read_irep_record_1(). + */ +static mrb_bool +prescan_pool_syms(const uint8_t *src, const uint8_t *end, uint16_t *plenp, uint16_t *slenp) +{ + uint16_t plen; + int i; + + if (src + sizeof(uint16_t) > end) return FALSE; + plen = bin_to_uint16(src); + src += sizeof(uint16_t); + + for (i = 0; i < plen; i++) { + if (src >= end) return FALSE; + switch (*src++) { + case IREP_TT_INT32: + src += sizeof(uint32_t); + break; + case IREP_TT_INT64: + src += sizeof(uint32_t) * 2; + break; + case IREP_TT_BIGINT: + if (src >= end) return FALSE; + src += bin_to_uint8(src) + 2; + break; + case IREP_TT_FLOAT: + src += sizeof(double); + break; + case IREP_TT_STR: + if (src + sizeof(uint16_t) > end) return FALSE; + src += sizeof(uint16_t) + bin_to_uint16(src) + 1; + break; + default: + return FALSE; + } + if (src > end) return FALSE; + } + + if (src + sizeof(uint16_t) > end) return FALSE; + *plenp = plen; + *slenp = bin_to_uint16(src); + return TRUE; +} + +/* + * Single consolidated allocation for irep struct + pool + syms + reps arrays. + * Memory layout: [mrb_irep] [mrb_irep_pool[plen]] [mrb_irep*[rlen]] [mrb_sym[slen]] + * Ordered by descending alignment to minimize inter-array padding. + */ +static mrb_irep* +irep_alloc_consolidated(mrb_state *mrb, uint16_t plen, uint16_t slen, uint16_t rlen) +{ + size_t off = sizeof(mrb_irep); + size_t pool_off = 0, syms_off = 0, reps_off = 0; + uint8_t *block; + mrb_irep *irep; + + /* pool (8-byte aligned: contains int64_t/double in union) */ + if (plen > 0) { + pool_off = ALIGN_UP(off, 8); + off = pool_off + sizeof(mrb_irep_pool) * plen; + } + /* reps (pointer-aligned: naturally follows 8-byte-aligned pool) */ + if (rlen > 0) { + reps_off = ALIGN_UP(off, sizeof(void*)); + off = reps_off + sizeof(mrb_irep*) * rlen; + } + /* syms (4-byte aligned: naturally follows pointer-aligned reps) */ + if (slen > 0) { + syms_off = ALIGN_UP(off, sizeof(mrb_sym)); + off = syms_off + sizeof(mrb_sym) * slen; + } + + block = (uint8_t*)mrb_calloc(mrb, 1, off); + irep = (mrb_irep*)block; + irep->flags = MRB_IREP_CONSOLIDATED; + irep->refcnt = 1; + if (plen > 0) irep->pool = (const mrb_irep_pool*)(block + pool_off); + if (slen > 0) irep->syms = (const mrb_sym*)(block + syms_off); + if (rlen > 0) irep->reps = (const struct mrb_irep *const*)(block + reps_off); + + return irep; +} + static mrb_bool read_irep_record_1(mrb_state *mrb, const uint8_t *bin, const uint8_t *end, size_t *len, uint8_t flags, mrb_irep **irepp) { @@ -66,43 +155,58 @@ read_irep_record_1(mrb_state *mrb, const uint8_t *bin, const uint8_t *end, size_ const uint8_t *src = bin; ptrdiff_t diff; uint16_t tt, pool_data_len, snl; + uint16_t nlocals, nregs, rlen, clen; + uint32_t ilen; int plen; - mrb_pool_value *pool; + mrb_irep_pool *pool; mrb_sym *syms; + mrb_irep *irep; int ai = mrb_gc_arena_save(mrb); - mrb_irep *irep = mrb_add_irep(mrb); - - *irepp = irep; /* skip record size */ src += sizeof(uint32_t); - /* number of local variable */ - irep->nlocals = bin_to_uint16(src); + /* parse header into local variables */ + nlocals = bin_to_uint16(src); src += sizeof(uint16_t); - - /* number of register variable */ - irep->nregs = bin_to_uint16(src); + nregs = bin_to_uint16(src); src += sizeof(uint16_t); - - /* number of child irep */ - irep->rlen = bin_to_uint16(src); + rlen = bin_to_uint16(src); src += sizeof(uint16_t); /* Binary Data Section */ /* ISEQ BLOCK (and CATCH HANDLER TABLE BLOCK) */ - irep->clen = bin_to_uint16(src); /* number of catch handler */ + clen = bin_to_uint16(src); /* number of catch handler */ src += sizeof(uint16_t); - irep->ilen = bin_to_uint32(src); + ilen = bin_to_uint32(src); src += sizeof(uint32_t); + /* prescan pool/syms to learn counts for consolidated allocation */ + { + const uint8_t *pool_start = src; + uint16_t pre_plen, pre_slen; + if (ilen > 0) { + size_t iseq_len; + if (SIZE_ERROR_MUL(ilen, sizeof(mrb_code))) return FALSE; + iseq_len = sizeof(mrb_code) * ilen + + sizeof(struct mrb_irep_catch_handler) * clen; + if (src + iseq_len > end) return FALSE; + pool_start = src + iseq_len; + } + if (!prescan_pool_syms(pool_start, end, &pre_plen, &pre_slen)) return FALSE; + irep = irep_alloc_consolidated(mrb, pre_plen, pre_slen, rlen); + } + *irepp = irep; + irep->nlocals = nlocals; + irep->nregs = nregs; + irep->rlen = rlen; + irep->clen = clen; + irep->ilen = ilen; + if (irep->ilen > 0) { size_t data_len = sizeof(mrb_code) * irep->ilen + sizeof(struct mrb_irep_catch_handler) * irep->clen; mrb_static_assert(sizeof(struct mrb_irep_catch_handler) == 13); - if (SIZE_ERROR_MUL(irep->ilen, sizeof(mrb_code))) { - return FALSE; - } if (src + data_len > end) return FALSE; if ((flags & FLAG_SRC_MALLOC) == 0) { irep->iseq = (mrb_code*)src; @@ -110,7 +214,7 @@ read_irep_record_1(mrb_state *mrb, const uint8_t *bin, const uint8_t *end, size_ } else { void *buf = mrb_malloc(mrb, data_len); - irep->iseq = (mrb_code *)buf; + irep->iseq = (mrb_code*)buf; memcpy(buf, src, data_len); } src += data_len; @@ -121,10 +225,7 @@ read_irep_record_1(mrb_state *mrb, const uint8_t *bin, const uint8_t *end, size_ src += sizeof(uint16_t); if (src > end) return FALSE; if (plen > 0) { - if (SIZE_ERROR_MUL(plen, sizeof(mrb_value))) { - return FALSE; - } - irep->pool = pool = (mrb_pool_value*)mrb_calloc(mrb, sizeof(mrb_pool_value), plen); + pool = (mrb_irep_pool*)irep->pool; /* in consolidated block */ for (i = 0; i < plen; i++) { mrb_bool st = (flags & FLAG_SRC_MALLOC)==0; @@ -189,7 +290,7 @@ read_irep_record_1(mrb_state *mrb, const uint8_t *bin, const uint8_t *end, size_ case IREP_TT_STR: pool_data_len = bin_to_uint16(src); /* pool data length */ src += sizeof(uint16_t); - if (src + pool_data_len > end) return FALSE; + if (src + pool_data_len + 1 > end) return FALSE; if (st) { pool[i].tt = (pool_data_len<<2) | IREP_TT_SSTR; pool[i].u.str = (const char*)src; @@ -217,10 +318,7 @@ read_irep_record_1(mrb_state *mrb, const uint8_t *bin, const uint8_t *end, size_ src += sizeof(uint16_t); if (src > end) return FALSE; if (irep->slen > 0) { - if (SIZE_ERROR_MUL(irep->slen, sizeof(mrb_sym))) { - return FALSE; - } - irep->syms = syms = (mrb_sym *)mrb_malloc(mrb, sizeof(mrb_sym) * irep->slen); + syms = (mrb_sym*)irep->syms; /* in consolidated block */ for (i = 0; i < irep->slen; i++) { snl = bin_to_uint16(src); /* symbol name length */ @@ -231,12 +329,12 @@ read_irep_record_1(mrb_state *mrb, const uint8_t *bin, const uint8_t *end, size_ continue; } - if (src + snl > end) return FALSE; + if (src + snl + 1 > end) return FALSE; if (flags & FLAG_SRC_MALLOC) { - syms[i] = mrb_intern(mrb, (char *)src, snl); + syms[i] = mrb_intern(mrb, (char*)src, snl); } else { - syms[i] = mrb_intern_static(mrb, (char *)src, snl); + syms[i] = mrb_intern_static(mrb, (char*)src, snl); } src += snl + 1; mrb_gc_arena_restore(mrb, ai); @@ -263,8 +361,7 @@ read_irep_record(mrb_state *mrb, const uint8_t *bin, const uint8_t *end, size_t return FALSE; } - reps = (mrb_irep**)mrb_calloc(mrb, (*irepp)->rlen, sizeof(mrb_irep*)); - (*irepp)->reps = (const mrb_irep**)reps; + reps = (mrb_irep**)(*irepp)->reps; /* in consolidated block */ bin += *len; for (i=0; i<(*irepp)->rlen; i++) { @@ -326,12 +423,12 @@ read_debug_record(mrb_state *mrb, const uint8_t *start, const uint8_t *end, mrb_ if (bin > end) return MRB_DUMP_GENERAL_FAILURE; debug->files = (mrb_irep_debug_info_file**)mrb_calloc(mrb, irep->debug_info->flen, sizeof(mrb_irep_debug_info*)); - for (f_idx = 0; f_idx < debug->flen; ++f_idx) { + for (f_idx = 0; f_idx < debug->flen; f_idx++) { mrb_irep_debug_info_file *file; uint16_t filename_idx; if (bin > end) return MRB_DUMP_GENERAL_FAILURE; - file = (mrb_irep_debug_info_file *)mrb_calloc(mrb, 1, sizeof(*file)); + file = (mrb_irep_debug_info_file*)mrb_calloc(mrb, 1, sizeof(*file)); debug->files[f_idx] = file; file->start_pos = bin_to_uint32(bin); @@ -352,8 +449,8 @@ read_debug_record(mrb_state *mrb, const uint8_t *start, const uint8_t *end, mrb_ size_t l = sizeof(uint16_t) * (size_t)file->line_entry_count; if (bin + l > end) return MRB_DUMP_GENERAL_FAILURE; - uint16_t *ary = (uint16_t *)mrb_malloc(mrb, l); - for (l = 0; l < file->line_entry_count; ++l) { + uint16_t *ary = (uint16_t*)mrb_malloc(mrb, l); + for (l = 0; l < file->line_entry_count; l++) { ary[l] = bin_to_uint16(bin); bin += sizeof(uint16_t); } @@ -366,7 +463,7 @@ read_debug_record(mrb_state *mrb, const uint8_t *start, const uint8_t *end, mrb_ if (bin + c*n > end) return MRB_DUMP_GENERAL_FAILURE; mrb_irep_debug_info_line *flat_map = (mrb_irep_debug_info_line*)mrb_calloc(mrb, c, n); - for (size_t l = 0; l < file->line_entry_count; ++l) { + for (size_t l = 0; l < file->line_entry_count; l++) { flat_map[l].start_pos = bin_to_uint32(bin); bin += sizeof(uint32_t); flat_map[l].line = bin_to_uint16(bin); @@ -415,27 +512,21 @@ read_debug_record(mrb_state *mrb, const uint8_t *start, const uint8_t *end, mrb_ static int read_section_debug(mrb_state *mrb, const uint8_t *start, size_t size, mrb_irep *irep, uint8_t flags) { - const uint8_t *bin; const uint8_t *end = start + size; ptrdiff_t diff; - struct rite_section_debug_header *header; - uint16_t i; size_t len = 0; int result; - uint16_t filenames_len; - mrb_sym *filenames; - mrb_value filenames_obj; - bin = start; - header = (struct rite_section_debug_header *)bin; + const uint8_t *bin = start; + struct rite_section_debug_header *header = (struct rite_section_debug_header*)bin; bin += sizeof(struct rite_section_debug_header); - filenames_len = bin_to_uint16(bin); + uint16_t filenames_len = bin_to_uint16(bin); bin += sizeof(uint16_t); if (bin > end) return MRB_DUMP_GENERAL_FAILURE; - filenames_obj = mrb_str_new(mrb, NULL, sizeof(mrb_sym) * (size_t)filenames_len); - filenames = (mrb_sym*)RSTRING_PTR(filenames_obj); - for (i = 0; i < filenames_len; ++i) { + mrb_value filenames_obj = mrb_str_new(mrb, NULL, sizeof(mrb_sym) * (size_t)filenames_len); + mrb_sym *filenames = (mrb_sym*)RSTRING_PTR(filenames_obj); + for (uint16_t i = 0; i < filenames_len; i++) { uint16_t f_len = bin_to_uint16(bin); bin += sizeof(uint16_t); if (bin + f_len > end) { @@ -443,10 +534,10 @@ read_section_debug(mrb_state *mrb, const uint8_t *start, size_t size, mrb_irep * goto debug_exit; } if (flags & FLAG_SRC_MALLOC) { - filenames[i] = mrb_intern(mrb, (const char *)bin, (size_t)f_len); + filenames[i] = mrb_intern(mrb, (const char*)bin, (size_t)f_len); } else { - filenames[i] = mrb_intern_static(mrb, (const char *)bin, (size_t)f_len); + filenames[i] = mrb_intern_static(mrb, (const char*)bin, (size_t)f_len); } bin += f_len; } @@ -470,14 +561,12 @@ static int read_lv_record(mrb_state *mrb, const uint8_t *start, mrb_irep *irep, size_t *record_len, mrb_sym const *syms, uint32_t syms_len) { const uint8_t *bin = start; - mrb_sym *lv; - ptrdiff_t diff; - int i; if (irep->nlocals == 0) return MRB_DUMP_GENERAL_FAILURE; - irep->lv = lv = (mrb_sym*)mrb_malloc(mrb, sizeof(mrb_sym) * (irep->nlocals - 1)); + mrb_sym *lv = (mrb_sym*)mrb_malloc(mrb, sizeof(mrb_sym) * (irep->nlocals - 1)); + irep->lv = lv; - for (i = 0; i + 1 < irep->nlocals; ++i) { + for (int i = 0; i + 1 < irep->nlocals; i++) { uint16_t const sym_idx = bin_to_uint16(bin); bin += sizeof(uint16_t); if (sym_idx == RITE_LV_NULL_MARK) { @@ -491,7 +580,7 @@ read_lv_record(mrb_state *mrb, const uint8_t *start, mrb_irep *irep, size_t *rec } } - for (i = 0; i < irep->rlen; ++i) { + for (int i = 0; i < irep->rlen; i++) { size_t len; int ret; @@ -500,7 +589,7 @@ read_lv_record(mrb_state *mrb, const uint8_t *start, mrb_irep *irep, size_t *rec bin += len; } - diff = bin - start; + ptrdiff_t diff = bin - start; mrb_assert_int_fit(ptrdiff_t, diff, size_t, SIZE_MAX); *record_len = (size_t)diff; @@ -532,7 +621,7 @@ read_section_lv(mrb_state *mrb, const uint8_t *start, size_t size, mrb_irep *ire if (bin > end) return MRB_DUMP_READ_FAULT; syms_obj = mrb_str_new(mrb, NULL, sizeof(mrb_sym) * (size_t)syms_len); syms = (mrb_sym*)RSTRING_PTR(syms_obj); - for (i = 0; i < syms_len; ++i) { + for (i = 0; i < syms_len; i++) { uint16_t const str_len = bin_to_uint16(bin); bin += sizeof(uint16_t); if (bin > end) return MRB_DUMP_READ_FAULT; @@ -558,7 +647,7 @@ read_section_lv(mrb_state *mrb, const uint8_t *start, size_t size, mrb_irep *ire static int read_binary_header(const uint8_t *bin, size_t bufsize, size_t *bin_size, uint8_t *flags) { - const struct rite_binary_header *header = (const struct rite_binary_header *)bin; + const struct rite_binary_header *header = (const struct rite_binary_header*)bin; if (bufsize < sizeof(struct rite_binary_header)) { return MRB_DUMP_READ_FAULT; @@ -607,7 +696,7 @@ read_irep(mrb_state *mrb, const uint8_t *bin, size_t bufsize, uint8_t flags) bin += sizeof(struct rite_binary_header); bin_size -= sizeof(struct rite_binary_header); while (bin_size > sizeof(struct rite_section_header)) { - section_header = (const struct rite_section_header *)bin; + section_header = (const struct rite_section_header*)bin; uint32_t section_size = bin_to_uint32(section_header->section_size); if (bin_size < section_size) return NULL; if (memcmp(section_header->section_ident, RITE_SECTION_IREP_IDENT, sizeof(section_header->section_ident)) == 0) { @@ -654,7 +743,7 @@ DEFINE_READ_IREP_FUNC( static struct RProc* mrb_proc_read_irep_buf(mrb_state *mrb, const void *buf, size_t bufsize) { - return read_irep(mrb, (const uint8_t *)buf, bufsize, FLAG_SRC_MALLOC); + return read_irep(mrb, (const uint8_t*)buf, bufsize, FLAG_SRC_MALLOC); } DEFINE_READ_IREP_FUNC( @@ -669,10 +758,8 @@ irep_error(mrb_state *mrb) mrb_exc_set(mrb, mrb_exc_new_lit(mrb, E_SCRIPT_ERROR, "irep load error")); } -void mrb_codedump_all(mrb_state*, struct RProc*); - static mrb_value -load_irep(mrb_state *mrb, struct RProc *proc, mrbc_context *c) +load_irep(mrb_state *mrb, struct RProc *proc, mrb_ccontext *c) { if (!proc || !proc->body.irep) { irep_error(mrb); @@ -685,7 +772,7 @@ load_irep(mrb_state *mrb, struct RProc *proc, mrbc_context *c) } MRB_API mrb_value -mrb_load_irep_cxt(mrb_state *mrb, const uint8_t *bin, mrbc_context *c) +mrb_load_irep_cxt(mrb_state *mrb, const uint8_t *bin, mrb_ccontext *c) { struct RProc *proc = mrb_proc_read_irep(mrb, bin); if (!proc) return mrb_undef_value(); @@ -693,7 +780,7 @@ mrb_load_irep_cxt(mrb_state *mrb, const uint8_t *bin, mrbc_context *c) } MRB_API mrb_value -mrb_load_irep_buf_cxt(mrb_state *mrb, const void *buf, size_t bufsize, mrbc_context *c) +mrb_load_irep_buf_cxt(mrb_state *mrb, const void *buf, size_t bufsize, mrb_ccontext *c) { return load_irep(mrb, mrb_proc_read_irep_buf(mrb, buf, bufsize), c); } @@ -757,7 +844,7 @@ DEFINE_READ_IREP_FUNC( mrb_proc_read_irep_file(mrb, fp)) MRB_API mrb_value -mrb_load_irep_file_cxt(mrb_state *mrb, FILE* fp, mrbc_context *c) +mrb_load_irep_file_cxt(mrb_state *mrb, FILE* fp, mrb_ccontext *c) { return load_irep(mrb, mrb_proc_read_irep_file(mrb, fp), c); } diff --git a/src/mempool.c b/src/mempool.c new file mode 100644 index 0000000000..651c2c2ed1 --- /dev/null +++ b/src/mempool.c @@ -0,0 +1,225 @@ +/* +** mempool.c - memory pool +** +** See Copyright Notice in mruby.h +*/ + +#include +#include +#include + +/* configuration section */ +/* allocated memory address should be multiple of POOL_ALIGNMENT */ +/* or undef it if alignment does not matter */ +#ifndef POOL_ALIGNMENT +#if INTPTR_MAX == INT64_MAX +#define POOL_ALIGNMENT 8 +#else +#define POOL_ALIGNMENT 4 +#endif +#endif +/* page size of memory pool */ +#ifndef POOL_PAGE_SIZE +#define POOL_PAGE_SIZE 16000 +#endif +/* end of configuration section */ + +/* Disable MSVC warning "C4200: nonstandard extension used: zero-sized array + * in struct/union" when in C++ mode */ +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4200) +#endif + +/* +** Represents a page in the memory pool. +*/ +struct mempool_page { + struct mempool_page *next; /* Pointer to the next page in the pool. */ + size_t offset; /* Current offset in the page for allocations. */ + size_t len; /* Total length of the page. */ + void *last; /* Pointer to the last allocation made from this page. */ + char page[]; /* Flexible array member for the actual page data. */ +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +/* +** Represents a memory pool. +*/ +struct mempool { + struct mempool_page *pages; /* Pointer to the first page in the pool. */ +}; + +#ifndef TEST_POOL + +/* use mruby's memory allocator */ +#define malloc(s) mrb_basic_alloc_func(NULL, (s)) +#define free(p) mrb_basic_alloc_func((p), 0) + +#endif + +/* +** Calculates the padding needed to align a memory address. +** +** @param x The memory address/size to align. +** @return The padding needed. +*/ +#ifdef POOL_ALIGNMENT +# define ALIGN_PADDING(x) ((SIZE_MAX - (x) + 1) & (POOL_ALIGNMENT - 1)) +#else +# define ALIGN_PADDING(x) (0) +#endif + +/* +** Creates a new memory pool. +** +** @return A pointer to the new memory pool, or NULL if allocation fails. +*/ +MRB_API mempool* +mempool_open(void) +{ + mempool *pool = (mempool*)malloc(sizeof(struct mempool)); + + if (pool) { + pool->pages = NULL; + } + return pool; +} + +/* +** Closes a memory pool and frees all associated memory. +** +** @param pool A pointer to the memory pool to close. +*/ +MRB_API void +mempool_close(mempool *pool) +{ + struct mempool_page *page; + + if (!pool) return; + page = pool->pages; + while (page) { + struct mempool_page *tmp = page; + page = page->next; + free(tmp); + } + free(pool); +} + +/* +** Allocates a new page for the memory pool. +** +** @param pool A pointer to the memory pool. +** @param len The minimum size of the page. +** @return A pointer to the new page, or NULL if allocation fails. +*/ +static struct mempool_page* +page_alloc(mempool *pool, size_t len) +{ + if (len < POOL_PAGE_SIZE) + len = POOL_PAGE_SIZE; + + struct mempool_page *page = (struct mempool_page*)malloc(sizeof(struct mempool_page)+len); + if (page) { + page->offset = 0; + page->len = len; + } + + return page; +} + +/* +** Allocates memory from the memory pool. +** +** @param pool A pointer to the memory pool. +** @param len The size of memory to allocate. +** @return A pointer to the allocated memory, or NULL if allocation fails. +*/ +MRB_API void* +mempool_alloc(mempool *pool, size_t len) +{ + struct mempool_page *page; + + if (!pool) return NULL; + len += ALIGN_PADDING(len); + for (page = pool->pages; page; page = page->next) { + if (page->offset + len <= page->len) { + size_t n = page->offset; + page->offset += len; + page->last = (void*)(page->page+n); + return page->last; + } + } + page = page_alloc(pool, len); + if (!page) return NULL; + page->offset = len; + page->next = pool->pages; + pool->pages = page; + + page->last = (void*)page->page; + return page->last; +} + +/* +** Reallocates memory from the memory pool. +** +** @param pool A pointer to the memory pool. +** @param p A pointer to the previously allocated memory. +** @param oldlen The old size of the memory. +** @param newlen The new size of the memory. +** @return A pointer to the reallocated memory, or NULL if reallocation fails. +*/ +MRB_API void* +mempool_realloc(mempool *pool, void *p, size_t oldlen, size_t newlen) +{ + if (!pool) return NULL; + if (newlen < oldlen) return p; + oldlen += ALIGN_PADDING(oldlen); + newlen += ALIGN_PADDING(newlen); + for (struct mempool_page *page = pool->pages; page; page = page->next) { + if (page->last == p) { + /* if p is a last allocation from the page */ + size_t beg = (char*)p - page->page; + /* check beg + oldlen points bottom */ + /* assert(beg + oldlen == page->offset) */ + if (beg + oldlen != page->offset) break; + if (beg + newlen > page->len) { + /* new allocation need more space */ + /* abandon this space */ + page->offset = beg; + break; + } + page->offset = beg + newlen; + return p; + } + } + void *np = mempool_alloc(pool, newlen); + if (np == NULL) { + return NULL; + } + memcpy(np, p, oldlen); + return np; +} + +#ifdef TEST_POOL +int +main(void) +{ + int i, len = 250; + mempool *pool; + void *p; + + pool = mempool_open(); + p = mempool_alloc(pool, len); + for (i=1; i<20; i++) { + printf("%p (len=%d)\n", p, len); + p = mempool_realloc(pool, p, len, len*2); + len *= 2; + } + mempool_close(pool); + return 0; +} +#endif diff --git a/src/numeric.c b/src/numeric.c index 50510fba7d..a6a59e96bd 100644 --- a/src/numeric.c +++ b/src/numeric.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #ifndef MRB_NO_FLOAT @@ -21,12 +20,27 @@ #endif #endif +/** + * This function is called to raise a RangeError when an integer operation + * results in an overflow. It's marked mrb_noreturn as it always raises an + * exception and does not return. + * + * @param mrb The mruby state. + * @param reason A string describing the operation that caused the overflow + * (e.g., "addition", "multiplication"). + */ mrb_noreturn void mrb_int_overflow(mrb_state *mrb, const char *reason) { mrb_raisef(mrb, E_RANGE_ERROR, "integer overflow in %s", reason); } +/** + * This function is called to raise a ZeroDivisionError. It's marked + * mrb_noreturn as it always raises an exception and does not return. + * + * @param mrb The mruby state. + */ mrb_noreturn void mrb_int_zerodiv(mrb_state *mrb) { @@ -39,6 +53,21 @@ mrb_int_noconv(mrb_state *mrb, mrb_value y) mrb_raisef(mrb, E_TYPE_ERROR, "can't convert %Y into Integer", y); } +/** + * Calculates x raised to the power of y, where x is an integer. + * y can be an integer or float. The result type can be Integer, + * Float, or BigInt depending on the inputs and intermediate calculations. + * + * @param mrb The mruby state. + * @param x The base (must be an integer type, possibly BigInt). + * @param y The exponent (can be Integer or Float). + * @return An mrb_value representing the result of the exponentiation. + * This can be an Integer, Float, or BigInt. + * Handles potential overflows by promoting to BigInt if MRB_USE_BIGINT is defined, + * or by raising RangeError if not. + * Handles negative exponents by returning a Float if MRB_NO_FLOAT is not defined, + * or raising RangeError if it is. + */ mrb_value mrb_int_pow(mrb_state *mrb, mrb_value x, mrb_value y) { @@ -66,7 +95,7 @@ mrb_int_pow(mrb_state *mrb, mrb_value x, mrb_value y) else #endif { - mrb_get_args(mrb, "i", &exp); + exp = mrb_as_int(mrb, y); } if (exp < 0) { #ifndef MRB_NO_FLOAT @@ -103,7 +132,7 @@ mrb_int_pow(mrb_state *mrb, mrb_value x, mrb_value y) * * num ** other -> num * - * Raises num the other power. + * Raises `num` the `other` power. * * 2.0**3 #=> 8.0 */ @@ -113,6 +142,17 @@ int_pow(mrb_state *mrb, mrb_value x) return mrb_int_pow(mrb, x, mrb_get_arg1(mrb)); } +/** + * Performs integer division of x by y. This function implements specific + * rounding behavior for negative numbers to match Ruby's / operator for + * integers (floor division). + * + * @param x The dividend. + * @param y The divisor. + * @return The result of the integer division (mrb_int). + * Note: This function does not handle division by zero; the caller is + * expected to check for this. + */ mrb_int mrb_div_int(mrb_int x, mrb_int y) { @@ -124,13 +164,24 @@ mrb_div_int(mrb_int x, mrb_int y) return div; } +/** + * Performs integer division of x by y and returns the result as an mrb_value. + * It uses mrb_div_int for the division logic. + * + * @param mrb The mruby state. + * @param x The dividend. + * @param y The divisor. + * @return An mrb_value (integer) representing the result of the division. + * @raise ZeroDivisionError if y is 0. + * @raise RangeError for overflow conditions (specifically MRB_INT_MIN / -1). + */ mrb_value mrb_div_int_value(mrb_state *mrb, mrb_int x, mrb_int y) { if (y == 0) { mrb_int_zerodiv(mrb); } - else if(x == MRB_INT_MIN && y == -1) { + else if (x == MRB_INT_MIN && y == -1) { #ifdef MRB_USE_BIGINT return mrb_bint_mul_ii(mrb, x, y); #else @@ -140,14 +191,13 @@ mrb_div_int_value(mrb_state *mrb, mrb_int x, mrb_int y) return mrb_int_value(mrb, mrb_div_int(x, y)); } -/* 15.2.8.3.4 */ -/* 15.2.9.3.4 */ +/* 15.2.8.3.6 */ /* * call-seq: - * int / other -> num + * int / num -> num * * Performs division: the class of the resulting object depends on - * the class of num and on the magnitude of the + * the class of `num` and on the magnitude of the * result. */ static mrb_value @@ -156,31 +206,32 @@ int_div(mrb_state *mrb, mrb_value x) mrb_value y = mrb_get_arg1(mrb); #ifdef MRB_USE_BIGINT if (mrb_bigint_p(x)) { - return mrb_bint_div(mrb, x, y); - } + if (mrb_bigint_p(y) || mrb_integer_p(y)) { + return mrb_bint_div(mrb, x, y); + } + } else #endif - mrb_int a = mrb_integer(x); - if (mrb_integer_p(y)) { - return mrb_div_int_value(mrb, a, mrb_integer(y)); + return mrb_div_int_value(mrb, mrb_integer(x), mrb_integer(y)); } switch (mrb_type(y)) { #ifdef MRB_USE_BIGINT + case MRB_TT_INTEGER: case MRB_TT_BIGINT: - return mrb_bint_div(mrb, mrb_bint_new_int(mrb, a), y); + return mrb_bint_div(mrb, mrb_as_bint(mrb, x), y); #endif #ifdef MRB_USE_RATIONAL case MRB_TT_RATIONAL: - return mrb_rational_div(mrb, mrb_rational_new(mrb, a, 1), y); + return mrb_rational_div(mrb, mrb_as_rational(mrb, x), y); #endif #ifdef MRB_USE_COMPLEX case MRB_TT_COMPLEX: - x = mrb_complex_new(mrb, (mrb_float)a, 0); + x = mrb_complex_new(mrb, mrb_as_float(mrb, x), 0); return mrb_complex_div(mrb, x, y); #endif #ifndef MRB_NO_FLOAT case MRB_TT_FLOAT: - return mrb_float_value(mrb, mrb_div_float((mrb_float)a, mrb_as_float(mrb, y))); + return mrb_float_value(mrb, mrb_div_float(mrb_as_float(mrb, x), mrb_as_float(mrb, y))); #endif default: mrb_int_noconv(mrb, y); @@ -209,22 +260,16 @@ int_idiv(mrb_state *mrb, mrb_value x) return mrb_bint_div(mrb, x, mrb_get_arg1(mrb)); } #endif - mrb_int y; - - mrb_get_args(mrb, "i", &y); + mrb_int y = mrb_as_int(mrb, mrb_get_arg1(mrb)); return mrb_div_int_value(mrb, mrb_integer(x), y); } +#ifndef MRB_NO_FLOAT static mrb_value -int_quo(mrb_state *mrb, mrb_value x) +int_fdiv(mrb_state *mrb, mrb_value x) { -#ifndef MRB_USE_RATIONAL -#ifdef MRB_NO_FLOAT - return int_idiv(mrb, x); -#else - mrb_float y; + mrb_float y = mrb_as_float(mrb, mrb_get_arg1(mrb)); - mrb_get_args(mrb, "f", &y); if (y == 0) { mrb_int_zerodiv(mrb); } @@ -234,7 +279,20 @@ int_quo(mrb_state *mrb, mrb_value x) } #endif return mrb_float_value(mrb, mrb_integer(x) / y); +} #endif + +static mrb_value +int_quo(mrb_state *mrb, mrb_value x) +{ +#ifndef MRB_USE_RATIONAL + +#ifdef MRB_NO_FLOAT + return int_idiv(mrb, x); +#else + return int_fdiv(mrb, x); +#endif + #else mrb_int a = mrb_integer(x); mrb_value y = mrb_get_arg1(mrb); @@ -259,17 +317,13 @@ int_quo(mrb_state *mrb, mrb_value x) static mrb_value coerce_step_counter(mrb_state *mrb, mrb_value self) { - mrb_value num, step; - - mrb_get_args(mrb, "oo", &num, &step); - -#ifndef MRB_NO_FLOAT mrb->c->ci->mid = 0; - if (mrb_float_p(num) || mrb_float_p(step)) { +#ifndef MRB_NO_FLOAT + mrb_value step = mrb_get_arg1(mrb); + if (mrb_float_p(step)) { return mrb_ensure_float_type(mrb, self); } #endif - return self; } @@ -278,7 +332,7 @@ coerce_step_counter(mrb_state *mrb, mrb_value self) * * Document-class: Float * - * Float objects represent inexact real numbers using + * `Float` objects represent inexact real numbers using * the native architecture's double-precision floating-point * representation. */ @@ -294,10 +348,18 @@ flo_pow(mrb_state *mrb, mrb_value x) static mrb_value flo_idiv(mrb_state *mrb, mrb_value xv) { - mrb_int y; - - mrb_get_args(mrb, "i", &y); - return mrb_div_int_value(mrb, (mrb_int)mrb_float(xv), y); + mrb_float x = mrb_float(xv); + mrb_check_num_exact(mrb, x); + mrb_int y = mrb_as_int(mrb, mrb_get_arg1(mrb)); + /* (mrb_int)x is UB when x is outside mrb_int range. */ + if (!FIXABLE_FLOAT(x)) { +#ifdef MRB_USE_BIGINT + return mrb_bint_div(mrb, mrb_bint_new_float(mrb, x), mrb_int_value(mrb, y)); +#else + mrb_int_overflow(mrb, "div"); +#endif + } + return mrb_div_int_value(mrb, (mrb_int)x, y); } mrb_float @@ -314,6 +376,13 @@ mrb_div_float(mrb_float x, mrb_float y) } } +/* 15.2.9.3.6 */ +/* + * call-seq: + * float / num -> float + * + * Returns a new Float which is the result of dividing float by num. + */ static mrb_value flo_div(mrb_state *mrb, mrb_value x) { @@ -335,23 +404,36 @@ flo_div(mrb_state *mrb, mrb_value x) return mrb_float_value(mrb, a); } +static mrb_value +num_fdiv(mrb_state *mrb, mrb_value x) +{ + return flo_div(mrb, mrb_ensure_float_type(mrb, x)); +} + +/** + * Converts an mrb_value float to a new mrb_value string. + * It handles formatting to ensure the string representation includes a + * decimal point and fractional part (e.g., ".0" is appended if not present). + * + * @param mrb The mruby state. + * @param flo The float mrb_value to convert. + * @param fmt This argument is noted as no longer used and can be NULL. + * The function uses a default format. + * @return A new mrb_value string representing the float. + */ /* the argument `fmt` is no longer used; you can pass `NULL` */ mrb_value mrb_float_to_str(mrb_state *mrb, mrb_value flo, const char *fmt) { char buf[25]; -#ifdef MRB_USE_FLOAT32 - const int prec = 7; -#else - const int prec = 15; -#endif - mrb_format_float(mrb_float(flo), buf, sizeof(buf), 'g', prec, '\0'); + mrb_format_float(mrb_float(flo), buf, sizeof(buf), 'g', -2, '\0'); for (char *p = buf; *p; p++) { if (*p == '.') goto exit; if (*p == 'e') { memmove(p+2, p, strlen(p)+1); - memcpy(p, ".0", 2); + p[0] = '.'; + p[1] = '0'; goto exit; } } @@ -368,8 +450,8 @@ mrb_float_to_str(mrb_state *mrb, mrb_value flo, const char *fmt) * * Returns a string containing a representation of self. As well as a * fixed or exponential form of the number, the call may return - * "NaN", "Infinity", and - * "-Infinity". + * "`NaN`", "`Infinity`", and + * "`-Infinity`". * * 3.0.to_s #=> 3.0 * 3.25.to_s #=> 3.25 @@ -396,13 +478,13 @@ flo_to_s(mrb_state *mrb, mrb_value flt) return str; } -/* 15.2.9.3.1 */ +/* 15.2.9.3.3 */ /* * call-seq: * float + other -> float * - * Returns a new float which is the sum of float - * and other. + * Returns a new float which is the sum of `float` + * and `other`. */ static mrb_value flo_add(mrb_state *mrb, mrb_value x) @@ -422,13 +504,13 @@ flo_add(mrb_state *mrb, mrb_value x) } } -/* 15.2.9.3.2 */ +/* 15.2.9.3.4 */ /* * call-seq: * float - other -> float * - * Returns a new float which is the difference of float - * and other. + * Returns a new float which is the difference of `float` + * and `other`. */ static mrb_value @@ -449,13 +531,13 @@ flo_sub(mrb_state *mrb, mrb_value x) } } -/* 15.2.9.3.3 */ +/* 15.2.9.3.5 */ /* * call-seq: * float * other -> float * - * Returns a new float which is the product of float - * and other. + * Returns a new float which is the product of `float` + * and `other`. */ static mrb_value @@ -513,13 +595,13 @@ flodivmod(mrb_state *mrb, double x, double y, mrb_float *divp, mrb_float *modp) if (divp) *divp = div; } -/* 15.2.9.3.5 */ +/* 15.2.9.3.5 */ /* * call-seq: * flt % other -> float * flt.modulo(other) -> float * - * Return the modulo after division of flt by other. + * Return the modulo after division of `flt` by `other`. * * 6543.21.modulo(137) #=> 104.21 * 6543.21.modulo(137.24) #=> 92.9299999999996 @@ -531,7 +613,7 @@ flo_mod(mrb_state *mrb, mrb_value x) mrb_value y = mrb_get_arg1(mrb); mrb_float mod; - flodivmod(mrb, mrb_float(x), mrb_as_float(mrb, y), 0, &mod); + flodivmod(mrb, mrb_float(x), mrb_as_float(mrb, y), NULL, &mod); return mrb_float_value(mrb, mod); } #endif @@ -541,7 +623,7 @@ flo_mod(mrb_state *mrb, mrb_value x) * call-seq: * num.eql?(numeric) -> true or false * - * Returns true if num and numeric are the + * Returns `true` if `num` and `numeric` are the * same type and have equal values. * * 1 == 1.0 #=> true @@ -572,14 +654,14 @@ num_eql(mrb_state *mrb, mrb_value x) } #ifndef MRB_NO_FLOAT -/* 15.2.9.3.7 */ +/* 15.2.9.3.7 */ /* * call-seq: * flt == obj -> true or false * - * Returns true only if obj has the same value - * as flt. Contrast this with Float#eql?, which - * requires obj to be a Float. + * Returns `true` only if *obj* has the same value + * as *flt*. Contrast this with `Float#eql?`, which + * requires *obj* to be a `Float`. * * 1.0 == 1 #=> true * @@ -608,159 +690,22 @@ flo_eq(mrb_state *mrb, mrb_value x) } } -static int64_t -value_int64(mrb_state *mrb, mrb_value x) -{ - switch (mrb_type(x)) { - case MRB_TT_INTEGER: - return (int64_t)mrb_integer(x); - case MRB_TT_FLOAT: - { - double f = mrb_float(x); - - if ((mrb_float)INT64_MAX >= f && f >= (mrb_float)INT64_MIN) - return (int64_t)f; - } - default: - mrb_raise(mrb, E_TYPE_ERROR, "cannot convert to Integer"); - break; - } - /* not reached */ - return 0; -} - -static mrb_value -int64_value(mrb_state *mrb, int64_t v) -{ - if (!TYPED_FIXABLE(v,int64_t)) { - mrb_int_overflow(mrb, "bit operation"); - } - return mrb_fixnum_value((mrb_int)v); -} - -static mrb_value -flo_rev(mrb_state *mrb, mrb_value x) -{ - int64_t v1 = value_int64(mrb, x); - return int64_value(mrb, ~v1); -} - -static mrb_value -flo_and(mrb_state *mrb, mrb_value x) -{ - mrb_value y = mrb_get_arg1(mrb); - int64_t v1, v2; - - v1 = value_int64(mrb, x); - v2 = value_int64(mrb, y); - return int64_value(mrb, v1 & v2); -} - -static mrb_value -flo_or(mrb_state *mrb, mrb_value x) -{ - mrb_value y = mrb_get_arg1(mrb); - int64_t v1, v2; - - v1 = value_int64(mrb, x); - v2 = value_int64(mrb, y); - return int64_value(mrb, v1 | v2); -} - -static mrb_value -flo_xor(mrb_state *mrb, mrb_value x) -{ - mrb_value y = mrb_get_arg1(mrb); - int64_t v1, v2; - - v1 = value_int64(mrb, x); - v2 = value_int64(mrb, y); - return int64_value(mrb, v1 ^ v2); -} - -static mrb_value -flo_shift(mrb_state *mrb, mrb_value x, mrb_int width) -{ - mrb_float val; - - if (width == 0) { - return x; - } - val = mrb_float(x); - if (width < -MRB_INT_BIT/2) { - if (val < 0) return mrb_fixnum_value(-1); - return mrb_fixnum_value(0); - } - if (width < 0) { - while (width++) { - val /= 2; - if (val < 1.0) { - val = 0; - break; - } - } -#if defined(_ISOC99_SOURCE) - val = trunc(val); -#else - if (val > 0){ - val = floor(val); - } else { - val = ceil(val); - } -#endif - if (val == 0 && mrb_float(x) < 0) { - return mrb_fixnum_value(-1); - } - } - else { - while (width--) { - val *= 2; - } - } - if (FIXABLE_FLOAT(val)) - return mrb_int_value(mrb, (mrb_int)val); - return mrb_float_value(mrb, val); -} - -static mrb_value -flo_rshift(mrb_state *mrb, mrb_value x) -{ - mrb_int width; - - mrb_get_args(mrb, "i", &width); - if (width == MRB_INT_MIN) return flo_shift(mrb, x, -MRB_INT_BIT); - return flo_shift(mrb, x, -width); -} - -static mrb_value -flo_lshift(mrb_state *mrb, mrb_value x) -{ - mrb_int width; - - mrb_get_args(mrb, "i", &width); - return flo_shift(mrb, x, width); -} - /* 15.2.9.3.13 */ /* + * Document-method: Float#to_f + * * call-seq: * flt.to_f -> self * - * As flt is already a float, returns +self+. + * As `flt` is already a float, returns `self`. */ -static mrb_value -flo_to_f(mrb_state *mrb, mrb_value num) -{ - return num; -} - /* 15.2.9.3.11 */ /* * call-seq: * flt.infinite? -> nil, -1, +1 * - * Returns nil, -1, or +1 depending on whether flt + * Returns `nil`, -1, or +1 depending on whether *flt* * is finite, -infinity, or +infinity. * * (0.0).infinite? #=> nil @@ -779,14 +724,14 @@ flo_infinite_p(mrb_state *mrb, mrb_value num) return mrb_nil_value(); } -/* 15.2.9.3.9 */ +/* 15.2.9.3.9 */ /* * call-seq: * flt.finite? -> true or false * - * Returns true if flt is a valid IEEE floating - * point number (it is not infinite, and nan? is - * false). + * Returns `true` if *flt* is a valid IEEE floating + * point number (it is not infinite, and `nan?` is + * `false`). * */ @@ -810,6 +755,16 @@ flo_finite_p(mrb_state *mrb, mrb_value num) * FloatDomainError: Infinity */ /* ------------------------------------------------------------------------*/ +/** + * Checks if a mrb_float value is Infinity or NaN. If it is, this function + * raises a FloatDomainError. This is used to prevent conversions of these + * special float values to exact number types like Integer. + * + * @param mrb The mruby state. + * @param num The float value to check. + * It does not return a value (void function) but will raise an exception + * if the number is not exact. + */ void mrb_check_num_exact(mrb_state *mrb, mrb_float num) { @@ -872,13 +827,13 @@ flo_rounding(mrb_state *mrb, mrb_value num, double (*func)(double)) * call-seq: * float.floor([ndigits]) -> integer or float * - * Returns the largest number less than or equal to +float+ with - * a precision of +ndigits+ decimal digits (default: 0). + * Returns the largest number less than or equal to `float` with + * a precision of `ndigits` decimal digits (default: 0). * * When the precision is negative, the returned value is an integer - * with at least ndigits.abs trailing zeros. + * with at least `ndigits.abs` trailing zeros. * - * Returns a floating point number when +ndigits+ is positive, + * Returns a floating-point number when `ndigits` is positive, * otherwise returns an integer. * * 1.2.floor #=> 1 @@ -901,7 +856,7 @@ flo_rounding(mrb_state *mrb, mrb_value num, double (*func)(double)) * 34567.89.floor(2) #=> 34567.89 * 34567.89.floor(3) #=> 34567.89 * - * Note that the limited precision of floating point arithmetic + * Note that the limited precision of floating-point arithmetic * might lead to surprising results: * * (0.3 / 0.1).floor #=> 2 (!) @@ -912,18 +867,18 @@ flo_floor(mrb_state *mrb, mrb_value num) return flo_rounding(mrb, num, floor); } -/* 15.2.9.3.8 */ +/* 15.2.9.3.8 */ /* * call-seq: * float.ceil([ndigits]) -> integer or float * - * Returns the smallest number greater than or equal to +float+ with - * a precision of +ndigits+ decimal digits (default: 0). + * Returns the smallest number greater than or equal to `float` with + * a precision of `ndigits` decimal digits (default: 0). * * When the precision is negative, the returned value is an integer - * with at least ndigits.abs trailing zeros. + * with at least `ndigits.abs` trailing zeros. * - * Returns a floating point number when +ndigits+ is positive, + * Returns a floating-point number when `ndigits` is positive, * otherwise returns an integer. * * 1.2.ceil #=> 2 @@ -946,7 +901,7 @@ flo_floor(mrb_state *mrb, mrb_value num) * 34567.89.ceil(2) #=> 34567.89 * 34567.89.ceil(3) #=> 34567.89 * - * Note that the limited precision of floating point arithmetic + * Note that the limited precision of floating-point arithmetic * might lead to surprising results: * * (2.1 / 0.7).ceil #=> 4 (!) @@ -963,7 +918,7 @@ flo_ceil(mrb_state *mrb, mrb_value num) * call-seq: * flt.round([ndigits]) -> integer or float * - * Rounds flt to a given precision in decimal digits (default 0 digits). + * Rounds *flt* to a given precision in decimal digits (default 0 digits). * Precision may be negative. Returns a floating-point number when ndigits * is more than zero. * @@ -994,7 +949,6 @@ flo_round(mrb_state *mrb, mrb_value num) { double number, f; mrb_int ndigits = 0; - mrb_int i; mrb_get_args(mrb, "|i", &ndigits); number = mrb_float(num); @@ -1006,7 +960,8 @@ flo_round(mrb_state *mrb, mrb_value num) f = 1.0; if (ndigits < -DBL_DIG-2) return mrb_fixnum_value(0); - i = ndigits >= 0 ? ndigits : -ndigits; + + mrb_int i = ndigits >= 0 ? ndigits : -ndigits; if (ndigits > DBL_DIG+2) return num; while (--i >= 0) f = f*10.0; @@ -1069,7 +1024,7 @@ flo_to_i(mrb_state *mrb, mrb_value num) * flt.to_i -> integer * flt.truncate -> integer * - * Returns flt truncated to an Integer. + * Returns *flt* truncated to an `Integer`. */ static mrb_value @@ -1098,31 +1053,44 @@ flo_abs(mrb_state *mrb, mrb_value num) /* * Document-class: Integer * - * Integer is hold whole numbers. + * `Integer` is hold whole numbers. * */ - +/* 15.2.9.3.24 */ /* + * Document-method: Integer#to_i + * Document-method: Integer#to_int + * * call-seq: * int.to_i -> integer + * int.to_int -> integer * - * As int is already an Integer, all these + * As *int* is already an `Integer`, all these * methods simply return the receiver. */ -static mrb_value -int_to_i(mrb_state *mrb, mrb_value num) -{ - return num; -} - +/** + * Multiplies two mrb_values, x and y, where x is expected to be an integer. + * y can be an integer, BigInt, Rational, Complex, or Float. The function + * handles type promotion and dispatches to appropriate handlers + * (e.g., mrb_bint_mul for BigInts). + * + * @param mrb The mruby state. + * @param x The first operand (integer). + * @param y The second operand (can be various numeric types). + * @return An mrb_value representing the product. The type of the result + * depends on the types of the inputs and the magnitude of the result + * (e.g., could be Integer, BigInt, Float, Rational, Complex). + * Handles potential integer overflows by promoting to BigInt if MRB_USE_BIGINT + * is defined, or raising RangeError otherwise. + * If y is not a recognized numeric type, it raises E_TYPE_ERROR. + */ mrb_value mrb_int_mul(mrb_state *mrb, mrb_value x, mrb_value y) { - mrb_int a; + mrb_int a = mrb_integer(x); - a = mrb_integer(x); if (mrb_integer_p(y)) { mrb_int b, c; @@ -1169,13 +1137,13 @@ mrb_int_mul(mrb_state *mrb, mrb_value x, mrb_value y) } } -/* 15.2.8.3.3 */ +/* 15.2.8.3.5 */ /* * call-seq: * int * numeric -> numeric_result * * Performs multiplication: the class of the resulting object depends on - * the class of numeric and on the magnitude of the + * the class of `numeric` and on the magnitude of the * result. */ @@ -1198,7 +1166,7 @@ intdivmod(mrb_state *mrb, mrb_int x, mrb_int y, mrb_int *divp, mrb_int *modp) if (y == 0) { mrb_int_zerodiv(mrb); } - else if(x == MRB_INT_MIN && y == -1) { + else if (x == MRB_INT_MIN && y == -1) { mrb_int_overflow(mrb, "division"); } else { @@ -1214,13 +1182,13 @@ intdivmod(mrb_state *mrb, mrb_int x, mrb_int y, mrb_int *divp, mrb_int *modp) } } -/* 15.2.8.3.5 */ +/* 15.2.8.3.7 */ /* * call-seq: - * int % other -> real + * int % num -> num * - * Returns int modulo other. - * See numeric.divmod for more information. + * Returns `int` modulo `other`. + * See `numeric.divmod` for more information. */ static mrb_value @@ -1233,6 +1201,9 @@ int_mod(mrb_state *mrb, mrb_value x) if (mrb_bigint_p(x)) { return mrb_bint_mod(mrb, x, y); } + if (mrb_bigint_p(y)) { + return mrb_bint_mod(mrb, mrb_as_bint(mrb, x), y); + } #endif a = mrb_integer(x); if (a == 0) return x; @@ -1264,7 +1235,7 @@ static mrb_value flo_divmod(mrb_state *mrb, mrb_value x); * call-seq: * int.divmod(numeric) -> array * - * See Numeric#divmod. + * See `Numeric#divmod`. */ static mrb_value int_divmod(mrb_state *mrb, mrb_value x) @@ -1281,6 +1252,9 @@ int_divmod(mrb_state *mrb, mrb_value x) #endif return mrb_bint_divmod(mrb, x, y); } + if (mrb_bigint_p(y)) { + return mrb_bint_divmod(mrb, mrb_as_bint(mrb, x), y); + } #endif if (mrb_integer_p(y)) { mrb_int div, mod; @@ -1291,7 +1265,7 @@ int_divmod(mrb_state *mrb, mrb_value x) #ifdef MRB_NO_FLOAT mrb_raise(mrb, E_TYPE_ERROR, "non integer divmod"); #else - return flo_divmod(mrb, x); + return flo_divmod(mrb, mrb_ensure_float_type(mrb, x)); #endif } @@ -1313,12 +1287,12 @@ flo_divmod(mrb_state *mrb, mrb_value x) } #endif -/* 15.2.8.3.7 */ +/* 15.2.8.3.2 */ /* * call-seq: * int == other -> true or false * - * Return true if int equals other + * Return `true` if `int` equals `other` * numerically. * * 1 == 2 #=> false @@ -1354,7 +1328,7 @@ int_equal(mrb_state *mrb, mrb_value x) } } -/* 15.2.8.3.8 */ +/* 15.2.8.3.8 */ /* * call-seq: * ~int -> integer @@ -1368,31 +1342,21 @@ int_equal(mrb_state *mrb, mrb_value x) static mrb_value int_rev(mrb_state *mrb, mrb_value num) { - mrb_int val = mrb_integer(num); - #ifdef MRB_USE_BIGINT if (mrb_bigint_p(num)) { - mrb_bint_rev(mrb, num); + return mrb_bint_rev(mrb, num); } #endif + mrb_int val = mrb_integer(num); return mrb_int_value(mrb, ~val); } -#ifdef MRB_NO_FLOAT #define bit_op(x,y,op1,op2) do {\ + if (!mrb_integer_p(y)) mrb_int_noconv(mrb, y);\ return mrb_int_value(mrb, (mrb_integer(x) op2 mrb_integer(y)));\ } while(0) -#else -static mrb_value flo_and(mrb_state *mrb, mrb_value x); -static mrb_value flo_or(mrb_state *mrb, mrb_value x); -static mrb_value flo_xor(mrb_state *mrb, mrb_value x); -#define bit_op(x,y,op1,op2) do {\ - if (mrb_integer_p(y)) return mrb_int_value(mrb, (mrb_integer(x) op2 mrb_integer(y))); \ - return flo_ ## op1(mrb, mrb_float_value(mrb, (mrb_float)mrb_integer(x)));\ -} while(0) -#endif -/* 15.2.8.3.9 */ +/* 15.2.8.3.9 */ /* * call-seq: * int & integer -> integer_result @@ -1461,11 +1425,30 @@ int_xor(mrb_state *mrb, mrb_value x) return mrb_bint_xor(mrb, mrb_as_bint(mrb, x), y); } #endif - bit_op(x, y, or, ^); + bit_op(x, y, xor, ^); } #define NUMERIC_SHIFT_WIDTH_MAX (MRB_INT_BIT-1) +/** + * Performs a bitwise shift operation (left or right) on an mrb_int value + * (val) by width positions. + * + * @param mrb The mruby state (though not directly used in the function + * logic, it's often part of MRB_API signatures). + * @param val The integer value to be shifted. + * @param width The number of positions to shift. Positive for left shift, + * negative for right shift. + * @param num A pointer to an mrb_int where the result of the shift will be + * stored. + * @return An mrb_bool indicating whether the shift was successful. + * - TRUE if the shift was performed without overflow. + * - FALSE if the shift would result in an overflow (e.g., shifting + * a large positive number too far left, or a negative number + * too far left). + * Special handling for right shifts of negative numbers (arithmetic shift) + * and large shift widths. + */ mrb_bool mrb_num_shift(mrb_state *mrb, mrb_int val, mrb_int width, mrb_int *num) { @@ -1515,7 +1498,7 @@ int_lshift(mrb_state *mrb, mrb_value x) { mrb_int width, val; - mrb_get_args(mrb, "i", &width); + width = mrb_as_int(mrb, mrb_get_arg1(mrb)); if (width == 0) { return x; } @@ -1550,7 +1533,7 @@ int_rshift(mrb_state *mrb, mrb_value x) { mrb_int width, val; - mrb_get_args(mrb, "i", &width); + width = mrb_as_int(mrb, mrb_get_arg1(mrb)); if (width == 0) { return x; } @@ -1604,7 +1587,7 @@ prepare_int_rounding(mrb_state *mrb, mrb_value x) * Returns self. * * When the precision (ndigits) is negative, the returned value is an integer - * with at least ndigits.abs trailing zeros. + * with at least `ndigits.abs` trailing zeros. */ static mrb_value int_ceil(mrb_state *mrb, mrb_value x) @@ -1614,16 +1597,26 @@ int_ceil(mrb_state *mrb, mrb_value x) if (mrb_nil_p(f)) return x; #ifdef MRB_USE_BIGINT if (mrb_bigint_p(x)) { - return mrb_bint_add(mrb, x, mrb_bint_sub(mrb, x, mrb_bint_mod(mrb, x, f))); + x = mrb_bint_add_n(mrb, x, f); + return mrb_bint_sub(mrb, x, mrb_bint_mod(mrb, x, f)); } #endif mrb_int a = mrb_integer(x); mrb_int b = mrb_integer(f); + mrb_int c = a % b; int neg = a < 0; - if (neg) a = -a; - else a += b - 1; - a = a / b * b; - if (neg) a = -a; + a -= c; + if (!neg) { + if (mrb_int_add_overflow(a, b, &c)) { +#ifdef MRB_USE_BIGINT + x = mrb_bint_new_int(mrb, a); + return mrb_bint_add(mrb, x, f); +#else + mrb_int_overflow(mrb, "ceil"); +#endif + } + a = c; + } return mrb_int_value(mrb, a); } @@ -1636,7 +1629,7 @@ int_ceil(mrb_state *mrb, mrb_value x) * Returns self. * * When the precision (ndigits) is negative, the returned value is an integer - * with at least ndigits.abs trailing zeros. + * with at least `ndigits.abs` trailing zeros. */ static mrb_value int_floor(mrb_state *mrb, mrb_value x) @@ -1651,10 +1644,20 @@ int_floor(mrb_state *mrb, mrb_value x) #endif mrb_int a = mrb_integer(x); mrb_int b = mrb_integer(f); + mrb_int c = a % b; int neg = a < 0; - if (neg) a = -a + b - 1; - a = a / b * b; - if (neg) a = -a; + a -= c; + if (neg) { + if (mrb_int_sub_overflow(a, b, &c)) { +#ifdef MRB_USE_BIGINT + x = mrb_bint_new_int(mrb, a); + return mrb_bint_sub(mrb, x, f); +#else + mrb_int_overflow(mrb, "floor"); +#endif + } + a = c; + } return mrb_int_value(mrb, a); } @@ -1667,7 +1670,7 @@ int_floor(mrb_state *mrb, mrb_value x) * Returns self. * * When the precision (ndigits) is negative, the returned value is an integer - * with at least ndigits.abs trailing zeros. + * with at least `ndigits.abs` trailing zeros. */ static mrb_value int_round(mrb_state *mrb, mrb_value x) @@ -1680,8 +1683,9 @@ int_round(mrb_state *mrb, mrb_value x) mrb_value r = mrb_bint_mod(mrb, x, f); mrb_value n = mrb_bint_sub(mrb, x, r); mrb_value h = mrb_bigint_p(f) ? mrb_bint_rshift(mrb, f, 1) : mrb_int_value(mrb, mrb_integer(f)>>1); - mrb_int cmp = mrb_bigint_p(r) ? mrb_bint_cmp(mrb, r, h) : (mrb_integer(r) - mrb_integer(h)); + mrb_int cmp = mrb_bigint_p(r) ? mrb_bint_cmp(mrb, r, h) : (mrb_bigint_p(h) ? -mrb_bint_cmp(mrb, h, r) : (mrb_integer(r)-mrb_integer(h))); if ((cmp > 0) || (cmp == 0 && mrb_bint_cmp(mrb, x, mrb_fixnum_value(0)) > 0)) { + n = mrb_as_bint(mrb, n); n = mrb_bint_add(mrb, n, f); } return n; @@ -1689,10 +1693,35 @@ int_round(mrb_state *mrb, mrb_value x) #endif mrb_int a = mrb_integer(x); mrb_int b = mrb_integer(f); - int neg = a < 0; - if (neg) a = -a; - a = (a + b / 2) / b * b; - if (neg) a = -a; + mrb_int c = a % b; + a -= c; + if (c < 0) { + c = -c; + if (b/2 < c) { + if (mrb_int_sub_overflow(a, b, &c)) { +#ifdef MRB_USE_BIGINT + x = mrb_bint_new_int(mrb, a); + return mrb_bint_sub(mrb, x, f); +#else + mrb_int_overflow(mrb, "round"); +#endif + } + } + a = c; + } + else { + if (b/2 < c) { + if (mrb_int_add_overflow(a, b, &c)) { +#ifdef MRB_USE_BIGINT + x = mrb_bint_new_int(mrb, a); + return mrb_bint_add(mrb, x, f); +#else + mrb_int_overflow(mrb, "round"); +#endif + } + } + a = c; + } return mrb_int_value(mrb, a); } @@ -1705,7 +1734,7 @@ int_round(mrb_state *mrb, mrb_value x) * Returns self. * * When the precision (ndigits) is negative, the returned value is an integer - * with at least ndigits.abs trailing zeros. + * with at least `ndigits.abs` trailing zeros. */ static mrb_value int_truncate(mrb_state *mrb, mrb_value x) @@ -1716,21 +1745,16 @@ int_truncate(mrb_state *mrb, mrb_value x) #ifdef MRB_USE_BIGINT if (mrb_bigint_p(x)) { mrb_value m = mrb_bint_mod(mrb, x, f); + x = mrb_bint_sub_n(mrb, x, m); if (mrb_bint_cmp(mrb, x, mrb_fixnum_value(0)) < 0) { - return mrb_bint_add(mrb, x, mrb_bint_sub(mrb, x, m)); - } - else { - return mrb_bint_sub(mrb, x, m); + return mrb_bint_add(mrb, x, f); } + return x; } #endif mrb_int a = mrb_integer(x); mrb_int b = mrb_integer(f); - int neg = a < 0; - if (neg) a = -a; - a = a / b * b; - if (neg) a = -a; - return mrb_int_value(mrb, a); + return mrb_int_value(mrb, a - (a % b)); } /* 15.2.8.3.23 */ @@ -1738,7 +1762,7 @@ int_truncate(mrb_state *mrb, mrb_value x) * call-seq: * int.to_f -> float * - * Converts int to a Float. + * Converts *int* to a `Float`. * */ @@ -1754,13 +1778,22 @@ int_to_f(mrb_state *mrb, mrb_value num) return mrb_float_value(mrb, (mrb_float)mrb_integer(num)); } +/** + * Converts an mrb_value float to an mrb_value integer. + * + * @param mrb The mruby state. + * @param x The float mrb_value to convert. + * @return An mrb_value integer if the conversion is successful. + * @raise E_TYPE_ERROR if the input is not a float. + * @raise E_RANGE_ERROR if the float is Infinity or NaN. + */ MRB_API mrb_value mrb_float_to_integer(mrb_state *mrb, mrb_value x) { if (!mrb_float_p(x)) { mrb_raise(mrb, E_TYPE_ERROR, "non float value"); } - float f = mrb_float(x); + mrb_float f = mrb_float(x); if (isinf(f) || isnan(f)) { mrb_raisef(mrb, E_RANGE_ERROR, "float %f out of range", f); } @@ -1768,12 +1801,27 @@ mrb_float_to_integer(mrb_state *mrb, mrb_value x) } #endif +/** + * Adds two mrb_values, x and y, where x is expected to be an integer. + * y can be an integer, BigInt, Rational, Complex, or Float. The function + * handles type promotion and dispatches to appropriate handlers. + * + * @param mrb The mruby state. + * @param x The first operand (integer). + * @param y The second operand (can be various numeric types). + * @return An mrb_value representing the sum. The type of the result depends + * on the types of the inputs and the magnitude of the result. + * Handles potential integer overflows by promoting to BigInt if MRB_USE_BIGINT + * is defined, or raising RangeError otherwise. + * If y is not a recognized numeric type and MRB_NO_FLOAT is defined, it + * raises E_TYPE_ERROR. If MRB_NO_FLOAT is not defined, it attempts to + * convert y to a float. + */ mrb_value mrb_int_add(mrb_state *mrb, mrb_value x, mrb_value y) { - mrb_int a; + mrb_int a = mrb_integer(x); - a = mrb_integer(x); if (mrb_integer_p(y)) { mrb_int b, c; @@ -1812,13 +1860,13 @@ mrb_int_add(mrb_state *mrb, mrb_value x, mrb_value y) } } -/* 15.2.8.3.1 */ +/* 15.2.8.3.3 */ /* * call-seq: * int + numeric -> numeric_result * * Performs addition: the class of the resulting object depends on - * the class of numeric and on the magnitude of the + * the class of `numeric` and on the magnitude of the * result. */ static mrb_value @@ -1834,12 +1882,27 @@ int_add(mrb_state *mrb, mrb_value self) return mrb_int_add(mrb, self, other); } +/** + * Subtracts mrb_value y from mrb_value x, where x is expected to be an + * integer. y can be an integer, BigInt, Rational, Complex, or Float. + * The function handles type promotion and dispatches to appropriate handlers. + * + * @param mrb The mruby state. + * @param x The minuend (integer). + * @param y The subtrahend (can be various numeric types). + * @return An mrb_value representing the difference. The type of the result + * depends on the types of the inputs and the magnitude of the result. + * Handles potential integer overflows by promoting to BigInt if MRB_USE_BIGINT + * is defined, or raising RangeError otherwise. + * If y is not a recognized numeric type and MRB_NO_FLOAT is defined, it + * raises E_TYPE_ERROR. If MRB_NO_FLOAT is not defined, it attempts to + * convert y to a float. + */ mrb_value mrb_int_sub(mrb_state *mrb, mrb_value x, mrb_value y) { - mrb_int a; + mrb_int a = mrb_integer(x); - a = mrb_integer(x); if (mrb_integer_p(y)) { mrb_int b, c; @@ -1876,14 +1939,13 @@ mrb_int_sub(mrb_state *mrb, mrb_value x, mrb_value y) } } -/* 15.2.8.3.2 */ -/* 15.2.8.3.16 */ +/* 15.2.8.3.4 */ /* * call-seq: - * int - numeric -> numeric_result + * int - numeric -> numeric * * Performs subtraction: the class of the resulting object depends on - * the class of numeric and on the magnitude of the + * the class of `numeric` and on the magnitude of the * result. */ static mrb_value @@ -1899,6 +1961,16 @@ int_sub(mrb_state *mrb, mrb_value self) return mrb_int_sub(mrb, self, other); } +/** + * Converts an mrb_int to a C-style string. + * + * @param buf The buffer to write the string to. + * @param len The size of the buffer. + * @param n The integer to convert. + * @param base The radix for conversion (2-36). + * @return A pointer to the beginning of the string in the buffer, + * or NULL if an error occurs (e.g., invalid base, buffer too small). + */ MRB_API char* mrb_int_to_cstr(char *buf, size_t len, mrb_int n, mrb_int base) { @@ -1932,11 +2004,19 @@ mrb_int_to_cstr(char *buf, size_t len, mrb_int n, mrb_int base) return b; } +/** + * Converts an mrb_value representing an integer to a new mrb_value string. + * + * @param mrb The mruby state. + * @param x The integer mrb_value to convert. + * @param base The radix for conversion (2-36). + * @return A new mrb_value string representing the integer, + * or raises an E_ARGUMENT_ERROR if the base is invalid. + */ MRB_API mrb_value mrb_integer_to_str(mrb_state *mrb, mrb_value x, mrb_int base) { char buf[MRB_INT_BIT+1]; - mrb_int val = mrb_integer(x); if (base < 2 || 36 < base) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid radix %i", base); @@ -1946,6 +2026,7 @@ mrb_integer_to_str(mrb_state *mrb, mrb_value x, mrb_int base) return mrb_bint_to_s(mrb, x, base); } #endif + mrb_int val = mrb_integer(x); const char *p = mrb_int_to_cstr(buf, sizeof(buf), val, base); mrb_assert(p != NULL); mrb_value str = mrb_str_new_cstr(mrb, p); @@ -1958,8 +2039,8 @@ mrb_integer_to_str(mrb_state *mrb, mrb_value x, mrb_int base) * call-seq: * int.to_s(base=10) -> string * - * Returns a string containing the representation of int radix - * base (between 2 and 36). + * Returns a string containing the representation of *int* radix + * *base* (between 2 and 36). * * 12345.to_s #=> "12345" * 12345.to_s(2) #=> "11000000111001" @@ -1972,9 +2053,14 @@ mrb_integer_to_str(mrb_state *mrb, mrb_value x, mrb_int base) static mrb_value int_to_s(mrb_state *mrb, mrb_value self) { - mrb_int base = 10; + mrb_int base; - mrb_get_args(mrb, "|i", &base); + if (mrb_get_argc(mrb) > 0) { + base = mrb_integer(mrb_get_arg1(mrb)); + } + else { + base = 10; + } return mrb_integer_to_str(mrb, self, base); } @@ -1982,51 +2068,89 @@ int_to_s(mrb_state *mrb, mrb_value self) static mrb_int cmpnum(mrb_state *mrb, mrb_value v1, mrb_value v2) { -#ifdef MRB_USE_BIGINT - if (mrb_bigint_p(v1)) { - return mrb_bint_cmp(mrb, v1, v2); +#ifdef MRB_NO_FLOAT /* integer version */ + + if (!mrb_fixnum_p(v2)) { + if (!mrb_obj_is_kind_of(mrb, v2, mrb_class_get_id(mrb, MRB_SYM(Numeric)))) { + return -2; + } + v1 = mrb_funcall_argv(mrb, v2, MRB_OPSYM(cmp), 1, &v1); + if (mrb_integer_p(v1)) { + return -mrb_integer(v1); + } + return -2; } -#endif + mrb_int x = mrb_as_int(mrb, v1); + mrb_int y = mrb_integer(v2); + +#else /* float version */ -#ifdef MRB_NO_FLOAT - mrb_int x, y; -#else mrb_float x, y; -#endif -#ifdef MRB_NO_FLOAT - x = mrb_integer(v1); -#else - x = mrb_as_float(mrb, v1); + if (mrb_fixnum_p(v1)) { + if (mrb_fixnum_p(v2)) { + mrb_int x = mrb_integer(v1); + mrb_int y = mrb_integer(v2); + + if (x > y) return 1; + else if (x < y) return -1; + return 0; + } +#ifdef MRB_USE_BIGINT + if (mrb_bigint_p(v2)) { + return -mrb_bint_cmp(mrb, v2, v1); + } #endif - switch (mrb_type(v2)) { - case MRB_TT_INTEGER: -#ifdef MRB_NO_FLOAT - y = mrb_integer(v2); -#else - y = (mrb_float)mrb_integer(v2); + x = (mrb_float)mrb_integer(v1); + } +#ifdef MRB_USE_BIGINT + else if (mrb_bigint_p(v1)) { + if (mrb_integer_p(v2) || mrb_bigint_p(v2)) { + return mrb_bint_cmp(mrb, v1, v2); + } + x = mrb_as_float(mrb, v1); + } #endif - break; -#ifndef MRB_NO_FLOAT - case MRB_TT_FLOAT: - y = mrb_float(v2); - break; + else { + x = mrb_as_float(mrb, v1); + } + + switch (mrb_type(v2)) { #ifdef MRB_USE_RATIONAL case MRB_TT_RATIONAL: - y = mrb_as_float(mrb, v2); - break; #endif +#ifdef MRB_USE_BIGINT + case MRB_TT_BIGINT: #endif + case MRB_TT_INTEGER: + if (mrb_fixnum_p(v2)) { + y = (mrb_float)mrb_integer(v2); + break; + } + /* fall through */ + case MRB_TT_FLOAT: + y = mrb_as_float(mrb, v2); + break; default: + if (!mrb_obj_is_kind_of(mrb, v2, mrb_class_get_id(mrb, MRB_SYM(Numeric)))) { + return -2; + } + /* fall through */ +#ifdef MRB_USE_COMPLEX + case MRB_TT_COMPLEX: +#endif + v1 = mrb_funcall_argv(mrb, v2, MRB_OPSYM(cmp), 1, &v1); + if (mrb_fixnum_p(v1)) { + return -mrb_integer(v1); + } return -2; } +#endif if (x > y) return 1; - else { - if (x < y) - return -1; - return 0; - } + else if (x < y) + return -1; + return 0; } static mrb_value @@ -2041,25 +2165,25 @@ int_hash(mrb_state *mrb, mrb_value self) return mrb_int_value(mrb, mrb_byte_hash((uint8_t*)&n, sizeof(n))); } -/* 15.2.9.3.6 */ +/* 15.2.8.3.1 */ +/* 15.2.9.3.1 */ /* * call-seq: * self.f <=> other.f => -1, 0, +1, or nil * < => -1 * = => 0 * > => +1 - * Comparison---Returns -1, 0, or +1 depending on whether int is - * less than, equal to, or greater than numeric. This is the - * basis for the tests in Comparable. When the operands are + * Comparison---Returns -1, 0, or +1 depending on whether *int* is + * less than, equal to, or greater than *numeric*. This is the + * basis for the tests in `Comparable`. When the operands are * not comparable, it returns nil instead of raising an exception. */ static mrb_value num_cmp(mrb_state *mrb, mrb_value self) { mrb_value other = mrb_get_arg1(mrb); - mrb_int n; + mrb_int n = cmpnum(mrb, self, other); - n = cmpnum(mrb, self, other); if (n == -2) return mrb_nil_value(); return mrb_fixnum_value(n); } @@ -2074,9 +2198,8 @@ static mrb_value num_lt(mrb_state *mrb, mrb_value self) { mrb_value other = mrb_get_arg1(mrb); - mrb_int n; + mrb_int n = cmpnum(mrb, self, other); - n = cmpnum(mrb, self, other); if (n == -2) cmperr(mrb, self, other); if (n < 0) return mrb_true_value(); return mrb_false_value(); @@ -2086,9 +2209,8 @@ static mrb_value num_le(mrb_state *mrb, mrb_value self) { mrb_value other = mrb_get_arg1(mrb); - mrb_int n; + mrb_int n = cmpnum(mrb, self, other); - n = cmpnum(mrb, self, other); if (n == -2) cmperr(mrb, self, other); if (n <= 0) return mrb_true_value(); return mrb_false_value(); @@ -2098,9 +2220,8 @@ static mrb_value num_gt(mrb_state *mrb, mrb_value self) { mrb_value other = mrb_get_arg1(mrb); - mrb_int n; + mrb_int n = cmpnum(mrb, self, other); - n = cmpnum(mrb, self, other); if (n == -2) cmperr(mrb, self, other); if (n > 0) return mrb_true_value(); return mrb_false_value(); @@ -2110,19 +2231,36 @@ static mrb_value num_ge(mrb_state *mrb, mrb_value self) { mrb_value other = mrb_get_arg1(mrb); - mrb_int n; + mrb_int n = cmpnum(mrb, self, other); - n = cmpnum(mrb, self, other); if (n == -2) cmperr(mrb, self, other); if (n >= 0) return mrb_true_value(); return mrb_false_value(); } +/** + * Compares two mrb_value objects (obj1 and obj2). + * + * @param mrb The mruby state. + * @param obj1 The first object. + * @param obj2 The second object. + * @return An mrb_int indicating the comparison result: + * - 0 if obj1 is equal to obj2. + * - 1 if obj1 is greater than obj2. + * - -1 if obj1 is less than obj2. + * - -2 if the objects are not comparable (error). + * It handles comparisons for integers, floats, bigints, and strings directly. + * For other types, it attempts to call the <=> (spaceship) operator on obj1 + * with obj2 as an argument. + */ MRB_API mrb_int mrb_cmp(mrb_state *mrb, mrb_value obj1, mrb_value obj2) { mrb_value v; + if (mrb_fixnum_p(obj1) || mrb_float_p(obj1)) { + return cmpnum(mrb, obj1, obj2); + } switch (mrb_type(obj1)) { case MRB_TT_INTEGER: case MRB_TT_FLOAT: @@ -2133,8 +2271,7 @@ mrb_cmp(mrb_state *mrb, mrb_value obj1, mrb_value obj2) return -2; return mrb_str_cmp(mrb, obj1, obj2); default: - if (!mrb_respond_to(mrb, obj1, MRB_OPSYM(cmp))) return -2; - v = mrb_funcall_id(mrb, obj1, MRB_OPSYM(cmp), 1, obj2); + v = mrb_funcall_argv(mrb, obj1, MRB_OPSYM(cmp), 1, &obj2); if (mrb_nil_p(v) || !mrb_integer_p(v)) return -2; return mrb_integer(v); @@ -2165,6 +2302,87 @@ flo_hash(mrb_state *mrb, mrb_value flo) #endif /* ------------------------------------------------------------------------*/ +static const mrb_mt_entry numeric_rom_entries[] = { + MRB_MT_ENTRY(num_finite_p, MRB_SYM_Q(finite), MRB_ARGS_NONE()), + MRB_MT_ENTRY(num_infinite_p, MRB_SYM_Q(infinite), MRB_ARGS_NONE()), + MRB_MT_ENTRY(num_eql, MRB_SYM_Q(eql), MRB_ARGS_REQ(1)), /* 15.2.8.3.16 */ +#ifndef MRB_NO_FLOAT + MRB_MT_ENTRY(num_fdiv, MRB_SYM(fdiv), MRB_ARGS_REQ(1)), +#endif +}; + +static const mrb_mt_entry integer_rom_entries[] = { + MRB_MT_ENTRY(int_pow, MRB_OPSYM(pow), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(num_cmp, MRB_OPSYM(cmp), MRB_ARGS_REQ(1)), /* 15.2.8.3.1 */ + MRB_MT_ENTRY(num_lt, MRB_OPSYM(lt), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(num_le, MRB_OPSYM(le), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(num_gt, MRB_OPSYM(gt), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(num_ge, MRB_OPSYM(ge), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_obj_itself, MRB_SYM(to_i), MRB_ARGS_NONE()), /* 15.2.8.3.24 */ + MRB_MT_ENTRY(mrb_obj_itself, MRB_SYM(to_int), MRB_ARGS_NONE()), + MRB_MT_ENTRY(int_add, MRB_OPSYM(add), MRB_ARGS_REQ(1)), /* 15.2.8.3.1 */ + MRB_MT_ENTRY(int_sub, MRB_OPSYM(sub), MRB_ARGS_REQ(1)), /* 15.2.8.3.2 */ + MRB_MT_ENTRY(int_mul, MRB_OPSYM(mul), MRB_ARGS_REQ(1)), /* 15.2.8.3.3 */ + MRB_MT_ENTRY(int_mod, MRB_OPSYM(mod), MRB_ARGS_REQ(1)), /* 15.2.8.3.5 */ + MRB_MT_ENTRY(int_div, MRB_OPSYM(div), MRB_ARGS_REQ(1)), /* 15.2.8.3.6 */ + MRB_MT_ENTRY(int_quo, MRB_SYM(quo), MRB_ARGS_REQ(1)), /* 15.2.7.4.5(x) */ + MRB_MT_ENTRY(int_idiv, MRB_SYM(div), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(int_equal, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), /* 15.2.8.3.7 */ + MRB_MT_ENTRY(int_rev, MRB_OPSYM(neg), MRB_ARGS_NONE()), /* 15.2.8.3.8 */ + MRB_MT_ENTRY(int_and, MRB_OPSYM(and), MRB_ARGS_REQ(1)), /* 15.2.8.3.9 */ + MRB_MT_ENTRY(int_or, MRB_OPSYM(or), MRB_ARGS_REQ(1)), /* 15.2.8.3.10 */ + MRB_MT_ENTRY(int_xor, MRB_OPSYM(xor), MRB_ARGS_REQ(1)), /* 15.2.8.3.11 */ + MRB_MT_ENTRY(int_lshift, MRB_OPSYM(lshift), MRB_ARGS_REQ(1)), /* 15.2.8.3.12 */ + MRB_MT_ENTRY(int_rshift, MRB_OPSYM(rshift), MRB_ARGS_REQ(1)), /* 15.2.8.3.13 */ + MRB_MT_ENTRY(int_ceil, MRB_SYM(ceil), MRB_ARGS_OPT(1)), /* 15.2.8.3.14 */ + MRB_MT_ENTRY(int_floor, MRB_SYM(floor), MRB_ARGS_OPT(1)), /* 15.2.8.3.17 */ + MRB_MT_ENTRY(int_round, MRB_SYM(round), MRB_ARGS_OPT(1)), /* 15.2.8.3.20 */ + MRB_MT_ENTRY(int_truncate, MRB_SYM(truncate), MRB_ARGS_OPT(1)), /* 15.2.8.3.26 */ + MRB_MT_ENTRY(int_hash, MRB_SYM(hash), MRB_ARGS_NONE()), /* 15.2.8.3.18 */ + MRB_MT_ENTRY(int_to_s, MRB_SYM(to_s), MRB_ARGS_OPT(1)), /* 15.2.8.3.25 */ + MRB_MT_ENTRY(int_to_s, MRB_SYM(inspect), MRB_ARGS_OPT(1)), + MRB_MT_ENTRY(int_divmod, MRB_SYM(divmod), MRB_ARGS_REQ(1)), /* 15.2.8.3.30(x) */ + MRB_MT_ENTRY(coerce_step_counter, MRB_SYM(__coerce_step_counter), MRB_ARGS_REQ(1)), +#ifndef MRB_NO_FLOAT + MRB_MT_ENTRY(int_fdiv, MRB_SYM(fdiv), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(int_to_f, MRB_SYM(to_f), MRB_ARGS_NONE()), /* 15.2.8.3.23 */ +#endif +}; + +#ifndef MRB_NO_FLOAT +static const mrb_mt_entry float_rom_entries[] = { + MRB_MT_ENTRY(flo_pow, MRB_OPSYM(pow), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(flo_div, MRB_OPSYM(div), MRB_ARGS_REQ(1)), /* 15.2.9.3.6 */ + MRB_MT_ENTRY(flo_div, MRB_SYM(quo), MRB_ARGS_REQ(1)), /* 15.2.7.4.5(x) */ + MRB_MT_ENTRY(flo_div, MRB_SYM(fdiv), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(flo_idiv, MRB_SYM(div), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(flo_add, MRB_OPSYM(add), MRB_ARGS_REQ(1)), /* 15.2.9.3.3 */ + MRB_MT_ENTRY(flo_sub, MRB_OPSYM(sub), MRB_ARGS_REQ(1)), /* 15.2.9.3.4 */ + MRB_MT_ENTRY(flo_mul, MRB_OPSYM(mul), MRB_ARGS_REQ(1)), /* 15.2.9.3.5 */ + MRB_MT_ENTRY(flo_mod, MRB_OPSYM(mod), MRB_ARGS_REQ(1)), /* 15.2.9.3.7 */ + MRB_MT_ENTRY(num_cmp, MRB_OPSYM(cmp), MRB_ARGS_REQ(1)), /* 15.2.8.3.1 */ + MRB_MT_ENTRY(num_lt, MRB_OPSYM(lt), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(num_le, MRB_OPSYM(le), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(num_gt, MRB_OPSYM(gt), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(num_ge, MRB_OPSYM(ge), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(flo_eq, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), /* 15.2.9.3.2 */ + MRB_MT_ENTRY(flo_ceil, MRB_SYM(ceil), MRB_ARGS_OPT(1)), /* 15.2.9.3.8 */ + MRB_MT_ENTRY(flo_finite_p, MRB_SYM_Q(finite), MRB_ARGS_NONE()), /* 15.2.9.3.9 */ + MRB_MT_ENTRY(flo_floor, MRB_SYM(floor), MRB_ARGS_OPT(1)), /* 15.2.9.3.10 */ + MRB_MT_ENTRY(flo_infinite_p, MRB_SYM_Q(infinite), MRB_ARGS_NONE()), /* 15.2.9.3.11 */ + MRB_MT_ENTRY(flo_round, MRB_SYM(round), MRB_ARGS_OPT(1)), /* 15.2.9.3.12 */ + MRB_MT_ENTRY(mrb_obj_itself, MRB_SYM(to_f), MRB_ARGS_NONE()), /* 15.2.9.3.13 */ + MRB_MT_ENTRY(flo_to_i, MRB_SYM(to_i), MRB_ARGS_NONE()), /* 15.2.9.3.14 */ + MRB_MT_ENTRY(flo_truncate, MRB_SYM(truncate), MRB_ARGS_OPT(1)), /* 15.2.9.3.15 */ + MRB_MT_ENTRY(flo_divmod, MRB_SYM(divmod), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(flo_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), /* 15.2.9.3.16(x) */ + MRB_MT_ENTRY(flo_to_s, MRB_SYM(inspect), MRB_ARGS_NONE()), + MRB_MT_ENTRY(flo_nan_p, MRB_SYM_Q(nan), MRB_ARGS_NONE()), + MRB_MT_ENTRY(flo_abs, MRB_SYM(abs), MRB_ARGS_NONE()), /* 15.2.7.4.3 */ + MRB_MT_ENTRY(flo_hash, MRB_SYM(hash), MRB_ARGS_NONE()), +}; +#endif /* !MRB_NO_FLOAT */ + void mrb_init_numeric(mrb_state *mrb) { @@ -2174,95 +2392,26 @@ mrb_init_numeric(mrb_state *mrb) #endif /* Numeric Class */ - numeric = mrb_define_class(mrb, "Numeric", mrb->object_class); /* 15.2.7 */ - mrb_define_method(mrb, numeric, "finite?", num_finite_p, MRB_ARGS_NONE()); - mrb_define_method(mrb, numeric, "infinite?",num_infinite_p, MRB_ARGS_NONE()); - mrb_define_method(mrb, numeric, "eql?", num_eql, MRB_ARGS_REQ(1)); /* 15.2.8.3.16 */ + numeric = mrb_define_class_id(mrb, MRB_SYM(Numeric), mrb->object_class); /* 15.2.7 */ + MRB_MT_INIT_ROM(mrb, numeric, numeric_rom_entries); /* Integer Class */ - mrb->integer_class = integer = mrb_define_class(mrb, "Integer", numeric); /* 15.2.8 */ + mrb->integer_class = integer = mrb_define_class_id(mrb, MRB_SYM(Integer), numeric); /* 15.2.8 */ MRB_SET_INSTANCE_TT(integer, MRB_TT_INTEGER); - mrb_undef_class_method(mrb, integer, "new"); - mrb_define_method(mrb, integer, "**", int_pow, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, integer, "<=>", num_cmp, MRB_ARGS_REQ(1)); /* 15.2.8.3.1 */ - mrb_define_method(mrb, integer, "<", num_lt, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, integer, "<=", num_le, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, integer, ">", num_gt, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, integer, ">=", num_ge, MRB_ARGS_REQ(1)); - - mrb_define_method(mrb, integer, "to_i", int_to_i, MRB_ARGS_NONE()); /* 15.2.8.3.24 */ - mrb_define_method(mrb, integer, "to_int", int_to_i, MRB_ARGS_NONE()); - - mrb_define_method(mrb, integer, "+", int_add, MRB_ARGS_REQ(1)); /* 15.2.8.3.1 */ - mrb_define_method(mrb, integer, "-", int_sub, MRB_ARGS_REQ(1)); /* 15.2.8.3.2 */ - mrb_define_method(mrb, integer, "*", int_mul, MRB_ARGS_REQ(1)); /* 15.2.8.3.3 */ - mrb_define_method(mrb, integer, "%", int_mod, MRB_ARGS_REQ(1)); /* 15.2.8.3.5 */ - mrb_define_method(mrb, integer, "/", int_div, MRB_ARGS_REQ(1)); /* 15.2.8.3.6 */ - mrb_define_method(mrb, integer, "quo", int_quo, MRB_ARGS_REQ(1)); /* 15.2.7.4.5 (x) */ - mrb_define_method(mrb, integer, "div", int_idiv, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, integer, "==", int_equal, MRB_ARGS_REQ(1)); /* 15.2.8.3.7 */ - mrb_define_method(mrb, integer, "~", int_rev, MRB_ARGS_NONE()); /* 15.2.8.3.8 */ - mrb_define_method(mrb, integer, "&", int_and, MRB_ARGS_REQ(1)); /* 15.2.8.3.9 */ - mrb_define_method(mrb, integer, "|", int_or, MRB_ARGS_REQ(1)); /* 15.2.8.3.10 */ - mrb_define_method(mrb, integer, "^", int_xor, MRB_ARGS_REQ(1)); /* 15.2.8.3.11 */ - mrb_define_method(mrb, integer, "<<", int_lshift, MRB_ARGS_REQ(1)); /* 15.2.8.3.12 */ - mrb_define_method(mrb, integer, ">>", int_rshift, MRB_ARGS_REQ(1)); /* 15.2.8.3.13 */ - mrb_define_method(mrb, integer, "ceil", int_ceil, MRB_ARGS_OPT(1)); /* 15.2.8.3.14 */ - mrb_define_method(mrb, integer, "floor", int_floor, MRB_ARGS_OPT(1)); /* 15.2.8.3.17 */ - mrb_define_method(mrb, integer, "round", int_round, MRB_ARGS_OPT(1)); /* 15.2.8.3.20 */ - mrb_define_method(mrb, integer, "truncate", int_truncate, MRB_ARGS_OPT(1)); /* 15.2.8.3.26 */ - mrb_define_method(mrb, integer, "hash", int_hash, MRB_ARGS_NONE()); /* 15.2.8.3.18 */ -#ifndef MRB_NO_FLOAT - mrb_define_method(mrb, integer, "to_f", int_to_f, MRB_ARGS_NONE()); /* 15.2.8.3.23 */ -#endif - mrb_define_method(mrb, integer, "to_s", int_to_s, MRB_ARGS_OPT(1)); /* 15.2.8.3.25 */ - mrb_define_method(mrb, integer, "inspect", int_to_s, MRB_ARGS_OPT(1)); - mrb_define_method(mrb, integer, "divmod", int_divmod, MRB_ARGS_REQ(1)); /* 15.2.8.3.30 (x) */ - mrb_define_method(mrb, integer, "__coerce_step_counter", coerce_step_counter, MRB_ARGS_REQ(2)); + MRB_UNDEF_ALLOCATOR(integer); + mrb_undef_class_method_id(mrb, integer, MRB_SYM(new)); + MRB_MT_INIT_ROM(mrb, integer, integer_rom_entries); /* Fixnum Class for compatibility */ - mrb_define_const(mrb, mrb->object_class, "Fixnum", mrb_obj_value(integer)); + mrb_define_const_id(mrb, mrb->object_class, MRB_SYM(Fixnum), mrb_obj_value(integer)); #ifndef MRB_NO_FLOAT /* Float Class */ - mrb->float_class = fl = mrb_define_class(mrb, "Float", numeric); /* 15.2.9 */ + mrb->float_class = fl = mrb_define_class_id(mrb, MRB_SYM(Float), numeric); /* 15.2.9 */ MRB_SET_INSTANCE_TT(fl, MRB_TT_FLOAT); + MRB_UNDEF_ALLOCATOR(fl); mrb_undef_class_method(mrb, fl, "new"); - mrb_define_method(mrb, fl, "**", flo_pow, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, fl, "/", flo_div, MRB_ARGS_REQ(1)); /* 15.2.9.3.6 */ - mrb_define_method(mrb, fl, "quo", flo_div, MRB_ARGS_REQ(1)); /* 15.2.7.4.5 (x) */ - mrb_define_method(mrb, fl, "div", flo_idiv, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, fl, "+", flo_add, MRB_ARGS_REQ(1)); /* 15.2.9.3.3 */ - mrb_define_method(mrb, fl, "-", flo_sub, MRB_ARGS_REQ(1)); /* 15.2.9.3.4 */ - mrb_define_method(mrb, fl, "*", flo_mul, MRB_ARGS_REQ(1)); /* 15.2.9.3.5 */ - mrb_define_method(mrb, fl, "%", flo_mod, MRB_ARGS_REQ(1)); /* 15.2.9.3.7 */ - mrb_define_method(mrb, fl, "<=>", num_cmp, MRB_ARGS_REQ(1)); /* 15.2.9.3.1 */ - mrb_define_method(mrb, fl, "<", num_lt, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, fl, "<=", num_le, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, fl, ">", num_gt, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, fl, ">=", num_ge, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, fl, "==", flo_eq, MRB_ARGS_REQ(1)); /* 15.2.9.3.2 */ - mrb_define_method(mrb, fl, "~", flo_rev, MRB_ARGS_NONE()); - mrb_define_method(mrb, fl, "&", flo_and, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, fl, "|", flo_or, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, fl, "^", flo_xor, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, fl, ">>", flo_rshift, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, fl, "<<", flo_lshift, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, fl, "ceil", flo_ceil, MRB_ARGS_OPT(1)); /* 15.2.9.3.8 */ - mrb_define_method(mrb, fl, "finite?", flo_finite_p, MRB_ARGS_NONE()); /* 15.2.9.3.9 */ - mrb_define_method(mrb, fl, "floor", flo_floor, MRB_ARGS_OPT(1)); /* 15.2.9.3.10 */ - mrb_define_method(mrb, fl, "infinite?", flo_infinite_p, MRB_ARGS_NONE()); /* 15.2.9.3.11 */ - mrb_define_method(mrb, fl, "round", flo_round, MRB_ARGS_OPT(1)); /* 15.2.9.3.12 */ - mrb_define_method(mrb, fl, "to_f", flo_to_f, MRB_ARGS_NONE()); /* 15.2.9.3.13 */ - mrb_define_method(mrb, fl, "to_i", flo_to_i, MRB_ARGS_NONE()); /* 15.2.9.3.14 */ - mrb_define_method(mrb, fl, "truncate", flo_truncate, MRB_ARGS_OPT(1)); /* 15.2.9.3.15 */ - mrb_define_method(mrb, fl, "divmod", flo_divmod, MRB_ARGS_REQ(1)); - - mrb_define_method(mrb, fl, "to_s", flo_to_s, MRB_ARGS_NONE()); /* 15.2.9.3.16(x) */ - mrb_define_method(mrb, fl, "inspect", flo_to_s, MRB_ARGS_NONE()); - mrb_define_method(mrb, fl, "nan?", flo_nan_p, MRB_ARGS_NONE()); - mrb_define_method(mrb, fl, "abs", flo_abs, MRB_ARGS_NONE()); /* 15.2.7.4.3 */ - mrb_define_method(mrb, fl, "hash", flo_hash, MRB_ARGS_NONE()); + MRB_MT_INIT_ROM(mrb, fl, float_rom_entries); #ifdef INFINITY mrb_define_const_id(mrb, fl, MRB_SYM(INFINITY), mrb_float_value(mrb, INFINITY)); diff --git a/src/numops.c b/src/numops.c index ff8adc2d60..9da0c3ae4c 100644 --- a/src/numops.c +++ b/src/numops.c @@ -7,7 +7,6 @@ #include #include #include -#include MRB_API mrb_value mrb_num_add(mrb_state *mrb, mrb_value x, mrb_value y) diff --git a/src/object.c b/src/object.c index 0b72c21d7e..da8ddb5e5f 100644 --- a/src/object.c +++ b/src/object.c @@ -10,8 +10,19 @@ #include #include #include -#include +/* + * Checks if two mruby values, `v1` and `v2`, are identical. + * For most types, this is equivalent to pointer equality. + * For immediate values (integers, symbols, true, false, nil), + * it compares their actual values. + * + * Behavior under different boxing configurations: + * - MRB_NAN_BOXING: Compares the raw `uint64_t` values. + * - MRB_WORD_BOXING: Compares the raw `mrb_word` values. + * - MRB_NO_BOXING: Checks if types are equal. If so, performs + * type-specific comparisons (value for immediates, pointer for others). + */ MRB_API mrb_bool mrb_obj_eq(mrb_state *mrb, mrb_value v1, mrb_value v2) { @@ -43,48 +54,60 @@ mrb_obj_eq(mrb_state *mrb, mrb_value v1, mrb_value v2) #endif } +/* + * Checks if two mruby values, `v1` and `v2`, are equal. + * This function currently calls mrb_obj_eq to perform the comparison. + */ MRB_API mrb_bool mrb_obj_equal(mrb_state *mrb, mrb_value v1, mrb_value v2) { - /* temporary definition */ return mrb_obj_eq(mrb, v1, v2); } +/* + * Checks for equality between `obj1` and `obj2`. + * + * It first uses `mrb_obj_eq` for an identity check. If that fails, + * it handles cases like mixed integer/float comparisons. + * If `MRB_USE_BIGINT` is defined, it also considers comparisons + * involving BigInts against Integers, other BigInts, or Floats. + * Finally, if none of the above apply, it attempts to call the + * `==` operator (MRB_OPSYM(eq)) on `obj1` with `obj2` as an argument, + * unless `obj1`'s `==` method is the default `mrb_obj_equal_m`. + */ MRB_API mrb_bool mrb_equal(mrb_state *mrb, mrb_value obj1, mrb_value obj2) { - mrb_value result; - if (mrb_obj_eq(mrb, obj1, obj2)) return TRUE; #ifndef MRB_NO_FLOAT /* value mixing with integer and float */ - if (mrb_integer_p(obj1) && mrb_float_p(obj2)) { + else if (mrb_integer_p(obj1) && mrb_float_p(obj2)) { if ((mrb_float)mrb_integer(obj1) == mrb_float(obj2)) return TRUE; - return FALSE; } else if (mrb_float_p(obj1) && mrb_integer_p(obj2)) { if (mrb_float(obj1) == (mrb_float)mrb_integer(obj2)) return TRUE; - return FALSE; } #endif #ifdef MRB_USE_BIGINT - if (mrb_bigint_p(obj1) && (mrb_integer_p(obj2) || mrb_bigint_p(obj2))) { + else if (mrb_bigint_p(obj1) && + (mrb_integer_p(obj2) || mrb_bigint_p(obj2) || mrb_float_p(obj2))) { if (mrb_bint_cmp(mrb, obj1, obj2) == 0) return TRUE; - return FALSE; } #endif - result = mrb_funcall_id(mrb, obj1, MRB_OPSYM(eq), 1, obj2); - if (mrb_test(result)) return TRUE; + else if (!mrb_func_basic_p(mrb, obj1, MRB_OPSYM(eq), mrb_obj_equal_m)) { + mrb_value result = mrb_funcall_argv(mrb, obj1, MRB_OPSYM(eq), 1, &obj2); + if (mrb_test(result)) return TRUE; + } return FALSE; } /* * Document-class: NilClass * - * The class of the singleton object nil. + * The class of the singleton object `nil`. */ /* 15.2.4.3.4 */ @@ -92,7 +115,7 @@ mrb_equal(mrb_state *mrb, mrb_value obj1, mrb_value obj2) * call_seq: * nil.nil? -> true * - * Only the object nil responds true to nil?. + * Only the object *nil* responds `true` to `nil?`. */ static mrb_value @@ -128,10 +151,10 @@ nil_inspect(mrb_state *mrb, mrb_value obj) /*********************************************************************** * Document-class: TrueClass * - * The global value true is the only instance of class - * TrueClass and represents a logically true value in + * The global value `true` is the only instance of class + * `TrueClass` and represents a logically true value in * boolean expressions. The class provides operators allowing - * true to be used in logical expressions. + * `true` to be used in logical expressions. */ /* 15.2.5.3.1 */ @@ -139,8 +162,8 @@ nil_inspect(mrb_state *mrb, mrb_value obj) * call-seq: * true & obj -> true or false * - * And---Returns false if obj is - * nil or false, true otherwise. + * And---Returns `false` if *obj* is + * `nil` or `false`, `true` otherwise. */ static mrb_value @@ -158,8 +181,8 @@ true_and(mrb_state *mrb, mrb_value obj) * call-seq: * true ^ obj -> !obj * - * Exclusive Or---Returns true if obj is - * nil or false, false + * Exclusive Or---Returns `true` if *obj* is + * `nil` or `false`, `false` * otherwise. */ @@ -177,7 +200,7 @@ true_xor(mrb_state *mrb, mrb_value obj) * call-seq: * true.to_s -> "true" * - * The string representation of true is "true". + * The string representation of `true` is "true". */ static mrb_value @@ -193,7 +216,7 @@ true_to_s(mrb_state *mrb, mrb_value obj) * call-seq: * true | obj -> true * - * Or---Returns true. As anObject is an argument to + * Or---Returns `true`. As *anObject* is an argument to * a method call, it is always evaluated; there is no short-circuit * evaluation in this case. * @@ -214,10 +237,10 @@ true_or(mrb_state *mrb, mrb_value obj) /* * Document-class: FalseClass * - * The global value false is the only instance of class - * FalseClass and represents a logically false value in + * The global value `false` is the only instance of class + * `FalseClass` and represents a logically false value in * boolean expressions. The class provides operators allowing - * false to participate correctly in logical expressions. + * `false` to participate correctly in logical expressions. * */ @@ -228,7 +251,7 @@ true_or(mrb_state *mrb, mrb_value obj) * false & obj -> false * nil & obj -> false * - * And---Returns false. obj is always + * And---Returns `false`. *obj* is always * evaluated as it is the argument to a method call---there is no * short-circuit evaluation in this case. */ @@ -246,9 +269,9 @@ false_and(mrb_state *mrb, mrb_value obj) * false ^ obj -> true or false * nil ^ obj -> true or false * - * Exclusive Or---If obj is nil or - * false, returns false; otherwise, returns - * true. + * Exclusive Or---If *obj* is `nil` or + * `false`, returns `false`; otherwise, returns + * `true`. * */ @@ -268,8 +291,8 @@ false_xor(mrb_state *mrb, mrb_value obj) * false | obj -> true or false * nil | obj -> true or false * - * Or---Returns false if obj is - * nil or false; true otherwise. + * Or---Returns `false` if *obj* is + * `nil` or `false`; `true` otherwise. */ static mrb_value @@ -297,6 +320,32 @@ false_to_s(mrb_state *mrb, mrb_value obj) return str; } +/* ---------------------------*/ +static const mrb_mt_entry nil_rom_entries[] = { + MRB_MT_ENTRY(false_and, MRB_OPSYM(and), MRB_ARGS_REQ(1)), /* 15.2.4.3.1 */ + MRB_MT_ENTRY(false_or, MRB_OPSYM(or), MRB_ARGS_REQ(1)), /* 15.2.4.3.2 */ + MRB_MT_ENTRY(false_xor, MRB_OPSYM(xor), MRB_ARGS_REQ(1)), /* 15.2.4.3.3 */ + MRB_MT_ENTRY(mrb_true, MRB_SYM_Q(nil), MRB_ARGS_NONE()), /* 15.2.4.3.4 */ + MRB_MT_ENTRY(nil_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), /* 15.2.4.3.5 */ + MRB_MT_ENTRY(nil_inspect, MRB_SYM(inspect), MRB_ARGS_NONE()), +}; + +static const mrb_mt_entry true_rom_entries[] = { + MRB_MT_ENTRY(true_and, MRB_OPSYM(and), MRB_ARGS_REQ(1)), /* 15.2.5.3.1 */ + MRB_MT_ENTRY(true_or, MRB_OPSYM(or), MRB_ARGS_REQ(1)), /* 15.2.5.3.2 */ + MRB_MT_ENTRY(true_xor, MRB_OPSYM(xor), MRB_ARGS_REQ(1)), /* 15.2.5.3.3 */ + MRB_MT_ENTRY(true_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), /* 15.2.5.3.4 */ + MRB_MT_ENTRY(true_to_s, MRB_SYM(inspect), MRB_ARGS_NONE()), +}; + +static const mrb_mt_entry false_rom_entries[] = { + MRB_MT_ENTRY(false_and, MRB_OPSYM(and), MRB_ARGS_REQ(1)), /* 15.2.4.3.1 */ + MRB_MT_ENTRY(false_or, MRB_OPSYM(or), MRB_ARGS_REQ(1)), /* 15.2.4.3.2 */ + MRB_MT_ENTRY(false_xor, MRB_OPSYM(xor), MRB_ARGS_REQ(1)), /* 15.2.4.3.3 */ + MRB_MT_ENTRY(false_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), /* 15.2.6.3.4 */ + MRB_MT_ENTRY(false_to_s, MRB_SYM(inspect), MRB_ARGS_NONE()), +}; + void mrb_init_object(mrb_state *mrb) { @@ -304,33 +353,20 @@ mrb_init_object(mrb_state *mrb) struct RClass *t; struct RClass *f; - mrb->nil_class = n = mrb_define_class(mrb, "NilClass", mrb->object_class); + mrb->nil_class = n = mrb_define_class_id(mrb, MRB_SYM(NilClass), mrb->object_class); MRB_SET_INSTANCE_TT(n, MRB_TT_FALSE); - mrb_undef_class_method(mrb, n, "new"); - mrb_define_method(mrb, n, "&", false_and, MRB_ARGS_REQ(1)); /* 15.2.4.3.1 */ - mrb_define_method(mrb, n, "^", false_xor, MRB_ARGS_REQ(1)); /* 15.2.4.3.2 */ - mrb_define_method(mrb, n, "|", false_or, MRB_ARGS_REQ(1)); /* 15.2.4.3.3 */ - mrb_define_method(mrb, n, "nil?", mrb_true, MRB_ARGS_NONE()); /* 15.2.4.3.4 */ - mrb_define_method(mrb, n, "to_s", nil_to_s, MRB_ARGS_NONE()); /* 15.2.4.3.5 */ - mrb_define_method(mrb, n, "inspect", nil_inspect, MRB_ARGS_NONE()); - - mrb->true_class = t = mrb_define_class(mrb, "TrueClass", mrb->object_class); + mrb_undef_class_method_id(mrb, n, MRB_SYM(new)); + MRB_MT_INIT_ROM(mrb, n, nil_rom_entries); + + mrb->true_class = t = mrb_define_class_id(mrb, MRB_SYM(TrueClass), mrb->object_class); MRB_SET_INSTANCE_TT(t, MRB_TT_TRUE); - mrb_undef_class_method(mrb, t, "new"); - mrb_define_method(mrb, t, "&", true_and, MRB_ARGS_REQ(1)); /* 15.2.5.3.1 */ - mrb_define_method(mrb, t, "^", true_xor, MRB_ARGS_REQ(1)); /* 15.2.5.3.2 */ - mrb_define_method(mrb, t, "to_s", true_to_s, MRB_ARGS_NONE()); /* 15.2.5.3.3 */ - mrb_define_method(mrb, t, "|", true_or, MRB_ARGS_REQ(1)); /* 15.2.5.3.4 */ - mrb_define_method(mrb, t, "inspect", true_to_s, MRB_ARGS_NONE()); - - mrb->false_class = f = mrb_define_class(mrb, "FalseClass", mrb->object_class); + mrb_undef_class_method_id(mrb, t, MRB_SYM(new)); + MRB_MT_INIT_ROM(mrb, t, true_rom_entries); + + mrb->false_class = f = mrb_define_class_id(mrb, MRB_SYM(FalseClass), mrb->object_class); MRB_SET_INSTANCE_TT(f, MRB_TT_FALSE); - mrb_undef_class_method(mrb, f, "new"); - mrb_define_method(mrb, f, "&", false_and, MRB_ARGS_REQ(1)); /* 15.2.6.3.1 */ - mrb_define_method(mrb, f, "^", false_xor, MRB_ARGS_REQ(1)); /* 15.2.6.3.2 */ - mrb_define_method(mrb, f, "to_s", false_to_s, MRB_ARGS_NONE()); /* 15.2.6.3.3 */ - mrb_define_method(mrb, f, "|", false_or, MRB_ARGS_REQ(1)); /* 15.2.6.3.4 */ - mrb_define_method(mrb, f, "inspect", false_to_s, MRB_ARGS_NONE()); + mrb_undef_class_method_id(mrb, f, MRB_SYM(new)); + MRB_MT_INIT_ROM(mrb, f, false_rom_entries); } static const char* @@ -357,15 +393,28 @@ convert_type(mrb_state *mrb, mrb_value val, const char *tname, mrb_sym method, m return mrb_funcall_argv(mrb, val, method, 0, 0); } +/* + * Attempts to convert the mruby value `val` to the specified `type` + * by calling the given `method` (a symbol) on `val`. + * + * It first checks if `val` is already of the target `type`. If not, it + * proceeds to call the conversion `method` on `val`. + * + * If the conversion method does not return a value of the target `type`, + * a `TypeError` is raised. However, as a special case, if the target + * `type` is `MRB_TT_STRING`, and the initial conversion fails to produce + * a string, this function will then attempt to call `mrb_any_to_s` on + * the original `val` as a fallback mechanism. + * + * Returns the converted value if successful. + */ MRB_API mrb_value mrb_type_convert(mrb_state *mrb, mrb_value val, enum mrb_vtype type, mrb_sym method) { - mrb_value v; - const char *tname; - if (mrb_type(val) == type) return val; - tname = type_name(type); - v = convert_type(mrb, val, tname, method, TRUE); + + const char *tname = type_name(type); + mrb_value v = convert_type(mrb, val, tname, method, TRUE); if (mrb_type(v) != type) { if (type == MRB_TT_STRING) return mrb_any_to_s(mrb, val); mrb_raisef(mrb, E_TYPE_ERROR, "%v cannot be converted to %s by #%n", val, tname, method); @@ -373,17 +422,41 @@ mrb_type_convert(mrb_state *mrb, mrb_value val, enum mrb_vtype type, mrb_sym met return v; } +/* + * Attempts to convert the mruby value `val` to the specified `type` + * by calling the given `method` (a symbol) on `val`. + * + * This function first checks if `val` is already of the target `type`. + * An exception to this initial check is if the target `type` is + * `MRB_TT_CDATA` or `MRB_TT_ISTRUCT`; in these cases, the conversion + * attempt proceeds regardless of `val`'s current type. + * + * If `val` is not already of the target `type` (or if it's `MRB_TT_CDATA` + * or `MRB_TT_ISTRUCT`), the specified `method` is called on `val` to + * perform the conversion. Unlike `mrb_type_convert`, this function + * does *not* raise an error if the conversion fails or if the returned + * value is not of the target `type`. + * + * Returns the converted value if the conversion was successful and the + * resulting value is of the target `type`. Otherwise, it returns + * `mrb_nil_value()`. + */ MRB_API mrb_value mrb_type_convert_check(mrb_state *mrb, mrb_value val, enum mrb_vtype type, mrb_sym method) { - mrb_value v; - - if (mrb_type(val) == type && type != MRB_TT_DATA && type != MRB_TT_ISTRUCT) return val; - v = convert_type(mrb, val, type_name(type), method, FALSE); + if (mrb_type(val) == type && type != MRB_TT_CDATA && type != MRB_TT_ISTRUCT) return val; + mrb_value v = convert_type(mrb, val, type_name(type), method, FALSE); if (mrb_nil_p(v) || mrb_type(v) != type) return mrb_nil_value(); return v; } +/* + * Checks if the mruby value `x` is of the specified type `t`. + * + * If the type of `x` does not match `t`, this function raises + * a `TypeError`. The error message provides details about the + * actual type of `x` and the expected type `t`. + */ MRB_API void mrb_check_type(mrb_state *mrb, mrb_value x, enum mrb_vtype t) { @@ -420,8 +493,8 @@ mrb_check_type(mrb_state *mrb, mrb_value x, enum mrb_vtype t) * call-seq: * obj.to_s => string * - * Returns a string representing obj. The default - * to_s prints the object's class and an encoding of the + * Returns a string representing *obj*. The default + * `to_s` prints the object's class and an encoding of the * object id. As a special case, the top-level object that is the * initial execution context of Ruby programs returns "main." */ @@ -448,9 +521,15 @@ mrb_any_to_s(mrb_state *mrb, mrb_value obj) * obj.is_a?(class) => true or false * obj.kind_of?(class) => true or false * - * Returns true if class is the class of - * obj, or if class is one of the superclasses of - * obj or modules included in obj. + * Checks if the mruby object `obj` is an instance of class `c`, + * or an instance of a class that inherits from `c`, or an instance + * of a class that includes `c` if `c` is a module. + * + * This function traverses the class hierarchy of `obj` upwards. + * It returns `TRUE` if `c` is found in the ancestry. Otherwise, + * it returns `FALSE`. + * + * If `c` is not a class or module, a `TypeError` is raised. * * module M; end * class A @@ -459,16 +538,13 @@ mrb_any_to_s(mrb_state *mrb, mrb_value obj) * class B < A; end * class C < B; end * b = B.new - * b.instance_of? A #=> false - * b.instance_of? B #=> true - * b.instance_of? C #=> false - * b.instance_of? M #=> false + * b.instance_of? A #=> false (mrb_obj_is_instance_of) + * b.instance_of? B #=> true (mrb_obj_is_instance_of) * b.kind_of? A #=> true * b.kind_of? B #=> true * b.kind_of? C #=> false * b.kind_of? M #=> true */ - MRB_API mrb_bool mrb_obj_is_kind_of(mrb_state *mrb, mrb_value obj, struct RClass *c) { @@ -505,6 +581,28 @@ mrb_value mrb_complex_to_f(mrb_state *mrb, mrb_value comp); mrb_value mrb_complex_to_i(mrb_state *mrb, mrb_value comp); #endif +/* + * Ensures that the given mruby value `val` is an Integer. + * + * If `val` is already an `MRB_TT_INTEGER`, it is returned directly. + * + * If `val` is an `MRB_TT_FLOAT` (and `MRB_NO_FLOAT` is not defined), + * it is converted to an integer using `mrb_float_to_integer`. + * + * The function also handles conversions from other numeric types if + * the respective modules are enabled: + * - `MRB_TT_BIGINT` (if `MRB_USE_BIGINT` is defined): `val` is returned as is, + * as BigInts are considered integers. + * - `MRB_TT_RATIONAL` (if `MRB_USE_RATIONAL` is defined): Converted using + * `mrb_rational_to_i`. + * - `MRB_TT_COMPLEX` (if `MRB_USE_COMPLEX` is defined): Converted using + * `mrb_complex_to_i` (typically if the imaginary part is zero). + * + * If `val` cannot be converted to an Integer (e.g., it's a String or Array, + * or a Complex with a non-zero imaginary part), a `TypeError` is raised. + * + * Returns the (potentially converted) integer value. + */ MRB_API mrb_value mrb_ensure_integer_type(mrb_state *mrb, mrb_value val) { @@ -537,6 +635,25 @@ mrb_ensure_integer_type(mrb_state *mrb, mrb_value val) return val; } +/* + * Ensures that the given mruby value `val` is a C `mrb_int` (fixed-size integer). + * + * This function first calls `mrb_ensure_integer_type` to convert `val` + * to a generic mruby Integer if it's not already. This step might result + * in `val` being a Fixnum or a BigInt (if `MRB_USE_BIGINT` is enabled). + * + * If `mrb_ensure_integer_type` returns a BigInt (and `MRB_USE_BIGINT` + * is defined), this function then attempts to convert the BigInt to a C + * `mrb_int` using `mrb_bint_as_int`. This conversion may involve truncation + * if the BigInt's value is outside the representable range of `mrb_int`, + * or it could raise an error (e.g., RangeError) depending on the + * `mrb_bint_as_int` implementation if the value is too large to truncate. + * + * If `val` is already a standard Integer (Fixnum) after the call to + * `mrb_ensure_integer_type`, it is returned directly as it fits `mrb_int`. + * + * Returns an `mrb_value` that represents a C `mrb_int`. + */ MRB_API mrb_value mrb_ensure_int_type(mrb_state *mrb, mrb_value val) { @@ -550,6 +667,28 @@ mrb_ensure_int_type(mrb_state *mrb, mrb_value val) } #ifndef MRB_NO_FLOAT +/* + * Ensures that the given mruby value `val` is a Float. + * + * If `val` is `nil`, this function raises a `TypeError`. + * + * If `val` is an `MRB_TT_INTEGER`, it is converted to an `mrb_float`. + * If `val` is already an `MRB_TT_FLOAT`, it is returned directly. + * + * The function also handles conversions from other numeric types if the + * respective mruby modules are enabled: + * - `MRB_TT_RATIONAL` (if `MRB_USE_RATIONAL` is defined): Converted to Float + * using `mrb_rational_to_f`. + * - `MRB_TT_COMPLEX` (if `MRB_USE_COMPLEX` is defined): Converted to Float + * using `mrb_complex_to_f` (typically requires the imaginary part to be zero). + * - `MRB_TT_BIGINT` (if `MRB_USE_BIGINT` is defined): Converted to Float + * using `mrb_bint_as_float`. + * + * If `val` cannot be converted to a Float (e.g., it's a String, Array, or + * an incompatible Complex number), a `TypeError` is raised. + * + * Returns an `mrb_value` representing an `mrb_float`. + */ MRB_API mrb_value mrb_ensure_float_type(mrb_state *mrb, mrb_value val) { @@ -586,6 +725,13 @@ mrb_ensure_float_type(mrb_state *mrb, mrb_value val) } #endif +/* + * Ensures that the given mruby value `str` is a String. + * + * If `str` is not of type `MRB_TT_STRING`, this function raises + * a `TypeError`. + * Otherwise, `str` itself is returned. + */ MRB_API mrb_value mrb_ensure_string_type(mrb_state *mrb, mrb_value str) { @@ -595,6 +741,14 @@ mrb_ensure_string_type(mrb_state *mrb, mrb_value str) return str; } +/* + * Checks if the given mruby value `str` is a String. + * + * If `str` is of type `MRB_TT_STRING`, this function returns `str`. + * Otherwise (if `str` is not a String), it returns `mrb_nil_value()` + * without raising an error. This allows for type checking without + * forcing error handling. + */ MRB_API mrb_value mrb_check_string_type(mrb_state *mrb, mrb_value str) { @@ -602,6 +756,13 @@ mrb_check_string_type(mrb_state *mrb, mrb_value str) return str; } +/* + * Ensures that the given mruby value `ary` is an Array. + * + * If `ary` is not of type `MRB_TT_ARRAY`, this function raises + * a `TypeError`. + * Otherwise, `ary` itself is returned. + */ MRB_API mrb_value mrb_ensure_array_type(mrb_state *mrb, mrb_value ary) { @@ -611,6 +772,14 @@ mrb_ensure_array_type(mrb_state *mrb, mrb_value ary) return ary; } +/* + * Checks if the given mruby value `ary` is an Array. + * + * If `ary` is of type `MRB_TT_ARRAY`, this function returns `ary`. + * Otherwise (if `ary` is not an Array), it returns `mrb_nil_value()` + * without raising an error. This allows for type checking without + * forcing error handling. + */ MRB_API mrb_value mrb_check_array_type(mrb_state *mrb, mrb_value ary) { @@ -618,6 +787,13 @@ mrb_check_array_type(mrb_state *mrb, mrb_value ary) return ary; } +/* + * Ensures that the given mruby value `hash` is a Hash. + * + * If `hash` is not of type `MRB_TT_HASH`, this function raises + * a `TypeError`. + * Otherwise, `hash` itself is returned. + */ MRB_API mrb_value mrb_ensure_hash_type(mrb_state *mrb, mrb_value hash) { @@ -627,6 +803,14 @@ mrb_ensure_hash_type(mrb_state *mrb, mrb_value hash) return hash; } +/* + * Checks if the given mruby value `hash` is a Hash. + * + * If `hash` is of type `MRB_TT_HASH`, this function returns `hash`. + * Otherwise (if `hash` is not a Hash), it returns `mrb_nil_value()` + * without raising an error. This allows for type checking without + * forcing error handling. + */ MRB_API mrb_value mrb_check_hash_type(mrb_state *mrb, mrb_value hash) { @@ -634,19 +818,65 @@ mrb_check_hash_type(mrb_state *mrb, mrb_value hash) return hash; } +/* + * Returns a human-readable string representation of the mruby object `obj`. + * + * This function calls the `inspect` method (identified by `MRB_SYM(inspect)`) + * on the given `obj`. The `inspect` method is expected to return a string + * that is suitable for debugging and inspection. + * + * If the object's `inspect` method does not return a String value (e.g., it + * returns `nil` or another type, or if the class doesn't define `inspect` + * appropriately), this function falls back to calling `mrb_obj_as_string`. + * `mrb_obj_as_string` typically provides a basic string representation, + * such as "#" if `inspect` is unavailable or + * misbehaves by not returning a string. + * + * The function ultimately returns the resulting string `mrb_value`. + */ MRB_API mrb_value mrb_inspect(mrb_state *mrb, mrb_value obj) { - mrb_value v = mrb_funcall_id(mrb, obj, MRB_SYM(inspect), 0); + mrb_value v = mrb_funcall_argv(mrb, obj, MRB_SYM(inspect), 0, NULL); if (!mrb_string_p(v)) { v = mrb_obj_as_string(mrb, obj); } return v; } +/* + * Checks if two mruby values, `obj1` and `obj2`, are equal using + * `eql?` semantics. + * + * This function first performs an identity check on `obj1` and `obj2` + * using `mrb_obj_eq`. If they are identical (i.e., the same object), + * it returns `TRUE` immediately. + * + * Otherwise, it calls the `eql?` method on `obj1`, passing `obj2` as + * an argument. The symbol for the `eql?` method is `MRB_SYM_Q(eql)`. + * + * The function returns `TRUE` if the `eql?` method call returns a truthy + * value (any value other than `false` or `nil`). Otherwise, it returns + * `FALSE`. This is determined by `mrb_test` on the result of the + * method call. + */ MRB_API mrb_bool mrb_eql(mrb_state *mrb, mrb_value obj1, mrb_value obj2) { if (mrb_obj_eq(mrb, obj1, obj2)) return TRUE; - return mrb_test(mrb_funcall_id(mrb, obj1, MRB_SYM_Q(eql), 1, obj2)); + return mrb_test(mrb_funcall_argv(mrb, obj1, MRB_SYM_Q(eql), 1, &obj2)); +} + +/* + * Returns the receiver object itself. + * + * This function simply returns the mruby value `self` that it was passed. + * It corresponds to the `Object#itself` method in Ruby, which is useful + * in some functional programming patterns or for obtaining the object + * itself in a chain of method calls. + */ +MRB_API mrb_value +mrb_obj_itself(mrb_state *mrb, mrb_value self) +{ + return self; } diff --git a/src/opcode.h b/src/opcode.h deleted file mode 100644 index fe4d17a218..0000000000 --- a/src/opcode.h +++ /dev/null @@ -1,2 +0,0 @@ -/* this header file is to be removed soon. */ -#include diff --git a/src/pool.c b/src/pool.c deleted file mode 100644 index ab30be1d86..0000000000 --- a/src/pool.c +++ /dev/null @@ -1,207 +0,0 @@ -/* -** pool.c - memory pool -** -** See Copyright Notice in mruby.h -*/ - -#include -#include - -/* configuration section */ -/* allocated memory address should be multiple of POOL_ALIGNMENT */ -/* or undef it if alignment does not matter */ -#ifndef POOL_ALIGNMENT -#if INTPTR_MAX == INT64_MAX -#define POOL_ALIGNMENT 8 -#else -#define POOL_ALIGNMENT 4 -#endif -#endif -/* page size of memory pool */ -#ifndef POOL_PAGE_SIZE -#define POOL_PAGE_SIZE 16000 -#endif -/* end of configuration section */ - -/* Disable MSVC warning "C4200: nonstandard extension used: zero-sized array - * in struct/union" when in C++ mode */ -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4200) -#endif - -struct mrb_pool_page { - struct mrb_pool_page *next; - size_t offset; - size_t len; - void *last; - char page[]; -}; - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -struct mrb_pool { - mrb_state *mrb; - struct mrb_pool_page *pages; -}; - -#undef TEST_POOL -#ifdef TEST_POOL - -#define mrb_malloc_simple(m,s) malloc(s) -#define mrb_free(m,p) free(p) -#endif - -#ifdef POOL_ALIGNMENT -# define ALIGN_PADDING(x) ((SIZE_MAX - (x) + 1) & (POOL_ALIGNMENT - 1)) -#else -# define ALIGN_PADDING(x) (0) -#endif - -MRB_API mrb_pool* -mrb_pool_open(mrb_state *mrb) -{ - mrb_pool *pool = (mrb_pool *)mrb_malloc_simple(mrb, sizeof(mrb_pool)); - - if (pool) { - pool->mrb = mrb; - pool->pages = NULL; - } - - return pool; -} - -MRB_API void -mrb_pool_close(mrb_pool *pool) -{ - struct mrb_pool_page *page, *tmp; - - if (!pool) return; - page = pool->pages; - while (page) { - tmp = page; - page = page->next; - mrb_free(pool->mrb, tmp); - } - mrb_free(pool->mrb, pool); -} - -static struct mrb_pool_page* -page_alloc(mrb_pool *pool, size_t len) -{ - struct mrb_pool_page *page; - - if (len < POOL_PAGE_SIZE) - len = POOL_PAGE_SIZE; - page = (struct mrb_pool_page *)mrb_malloc_simple(pool->mrb, sizeof(struct mrb_pool_page)+len); - if (page) { - page->offset = 0; - page->len = len; - } - - return page; -} - -MRB_API void* -mrb_pool_alloc(mrb_pool *pool, size_t len) -{ - struct mrb_pool_page *page; - size_t n; - - if (!pool) return NULL; - len += ALIGN_PADDING(len); - page = pool->pages; - while (page) { - if (page->offset + len <= page->len) { - n = page->offset; - page->offset += len; - page->last = (char*)page->page+n; - return page->last; - } - page = page->next; - } - page = page_alloc(pool, len); - if (!page) return NULL; - page->offset = len; - page->next = pool->pages; - pool->pages = page; - - page->last = (void*)page->page; - return page->last; -} - -MRB_API mrb_bool -mrb_pool_can_realloc(mrb_pool *pool, void *p, size_t len) -{ - struct mrb_pool_page *page; - - if (!pool) return FALSE; - len += ALIGN_PADDING(len); - page = pool->pages; - while (page) { - if (page->last == p) { - size_t beg; - - beg = (char*)p - page->page; - if (beg + len > page->len) return FALSE; - return TRUE; - } - page = page->next; - } - return FALSE; -} - -MRB_API void* -mrb_pool_realloc(mrb_pool *pool, void *p, size_t oldlen, size_t newlen) -{ - struct mrb_pool_page *page; - void *np; - - if (!pool) return NULL; - oldlen += ALIGN_PADDING(oldlen); - newlen += ALIGN_PADDING(newlen); - page = pool->pages; - while (page) { - if (page->last == p) { - size_t beg; - - beg = (char*)p - page->page; - if (beg + oldlen != page->offset) break; - if (beg + newlen > page->len) { - page->offset = beg; - break; - } - page->offset = beg + newlen; - return p; - } - page = page->next; - } - np = mrb_pool_alloc(pool, newlen); - if (np == NULL) { - return NULL; - } - memcpy(np, p, oldlen); - return np; -} - -#ifdef TEST_POOL -int -main(void) -{ - int i, len = 250; - mrb_pool *pool; - void *p; - - pool = mrb_pool_open(NULL); - p = mrb_pool_alloc(pool, len); - for (i=1; i<20; i++) { - printf("%p (len=%d) %ud\n", p, len, mrb_pool_can_realloc(pool, p, len*2)); - p = mrb_pool_realloc(pool, p, len, len*2); - len *= 2; - } - mrb_pool_close(pool); - return 0; -} -#endif diff --git a/src/print.c b/src/print.c index c6cb6464e5..c54fa20499 100644 --- a/src/print.c +++ b/src/print.c @@ -1,5 +1,5 @@ /* -** print.c - Kernel.#p +** print.c - Kernel#p, Kernel#print ** ** See Copyright Notice in mruby.h */ @@ -8,63 +8,114 @@ #include #include #include -#include #include +#if defined(_WIN32) +# include +# include +#ifdef _MSC_VER +# define isatty(x) _isatty(x) +# define fileno(x) _fileno(x) +#endif +#else +# include +#endif #ifndef MRB_NO_STDIO static void -printcstr(const char *str, size_t len, FILE *stream) +printcstr(mrb_state *mrb, const char *str, size_t len, FILE *stream) { - if (str) { - fwrite(str, len, 1, stream); - putc('\n', stream); +#if defined(_WIN32) + if (isatty(fileno(stream))) { + DWORD written; + int wlen = MultiByteToWideChar(CP_UTF8, 0, str, (int)len, NULL, 0); + wchar_t* utf16 = (wchar_t*)mrb_malloc(mrb, (wlen+1) * sizeof(wchar_t)); + if (MultiByteToWideChar(CP_UTF8, 0, str, (int)len, utf16, wlen) > 0) { + utf16[wlen] = 0; + WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE), + utf16, (DWORD)wlen, &written, NULL); + } + mrb_free(mrb, utf16); + return; } +#endif + fwrite(str, (size_t)len, 1, stream); } static void -printstr(mrb_value obj, FILE *stream) +printstr(mrb_state *mrb, mrb_value obj, FILE *stream) { - if (mrb_string_p(obj)) { - printcstr(RSTRING_PTR(obj), RSTRING_LEN(obj), stream); + if (!mrb_string_p(obj)) { + obj = mrb_obj_as_string(mrb, obj); } + printcstr(mrb, RSTRING_PTR(obj), RSTRING_LEN(obj), stream); +} + +static void +printstrln(mrb_state *mrb, mrb_value obj, FILE *stream) +{ + printstr(mrb, obj, stream); + printcstr(mrb, "\n", 1, stdout); } void -mrb_core_init_printabort(void) +mrb_core_init_printabort(mrb_state *mrb) { static const char *str = "Failed mruby core initialization"; - printcstr(str, strlen(str), stdout); + printcstr(mrb, str, strlen(str), stdout); } +#ifndef HAVE_MRUBY_IO_GEM +mrb_value +mrb_print_m(mrb_state *mrb, mrb_value self) +{ + mrb_int argc; + mrb_value *argv; + + mrb_get_args(mrb, "*", &argv, &argc); + for (mrb_int i=0; inomem_err) { - static const char *str = "Out of memory"; - printcstr(str, strlen(str), stdout); + static const char *str = "Out of memory\n"; + printcstr(mrb, str, strlen(str), stdout); } else { - printstr(mrb_inspect(mrb, obj), stdout); + printstrln(mrb, mrb_inspect(mrb, obj), stdout); } + if (isatty(fileno(stdout))) fflush(stdout); } MRB_API void mrb_show_version(mrb_state *mrb) { - printstr(mrb_const_get(mrb, mrb_obj_value(mrb->object_class), MRB_SYM(MRUBY_DESCRIPTION)), stdout); + printstrln(mrb, mrb_const_get(mrb, mrb_obj_value(mrb->object_class), MRB_SYM(MRUBY_DESCRIPTION)), stdout); } MRB_API void mrb_show_copyright(mrb_state *mrb) { - printstr(mrb_const_get(mrb, mrb_obj_value(mrb->object_class), MRB_SYM(MRUBY_COPYRIGHT)), stdout); + printstrln(mrb, mrb_const_get(mrb, mrb_obj_value(mrb->object_class), MRB_SYM(MRUBY_COPYRIGHT)), stdout); } #else void -mrb_core_init_printabort(void) +mrb_core_init_printabort(mrb_state *mrb) +{ +} + +mrb_value +mrb_print_m(mrb_state *mrb, mrb_value self) { + return mrb_nil_value(); } MRB_API void diff --git a/src/proc.c b/src/proc.c index bbc4a2b141..93f070b9be 100644 --- a/src/proc.c +++ b/src/proc.c @@ -12,7 +12,6 @@ #include #include #include -#include static const mrb_code call_iseq[] = { OP_CALL, @@ -36,8 +35,9 @@ static const mrb_irep call_irep = { 0, /* refcnt */ }; +mrb_alignas(8) static const struct RProc call_proc = { - NULL, NULL, MRB_TT_PROC, MRB_GC_RED, MRB_FL_OBJ_IS_FROZEN | MRB_PROC_SCOPE | MRB_PROC_STRICT, + NULL, MRB_TT_PROC, MRB_GC_RED, MRB_OBJ_IS_FROZEN, MRB_PROC_SCOPE | MRB_PROC_STRICT, { &call_irep }, NULL, { NULL } }; @@ -47,7 +47,7 @@ mrb_proc_new(mrb_state *mrb, const mrb_irep *irep) struct RProc *p; mrb_callinfo *ci = mrb->c->ci; - p = MRB_OBJ_ALLOC(mrb, MRB_TT_PROC, mrb->proc_class); + p = (struct RProc*)mrb_obj_alloc_core(mrb, MRB_TT_PROC, mrb->proc_class); if (ci) { struct RClass *tc = NULL; @@ -76,7 +76,7 @@ mrb_env_new(mrb_state *mrb, struct mrb_context *c, mrb_callinfo *ci, int nstacks int n = ci->n; int nk = ci->nk; - e = MRB_OBJ_ALLOC(mrb, MRB_TT_ENV, NULL); + e = (struct REnv*)mrb_obj_alloc_core(mrb, MRB_TT_ENV, NULL); e->c = tc; MRB_ENV_SET_LEN(e, nstacks); bidx += (n == 15) ? 1 : n; @@ -85,6 +85,7 @@ mrb_env_new(mrb_state *mrb, struct mrb_context *c, mrb_callinfo *ci, int nstacks e->mid = ci->mid; e->stack = stack; e->cxt = c; + MRB_ENV_COPY_FLAGS_FROM_CI(e, ci); return e; } @@ -96,12 +97,16 @@ closure_setup(mrb_state *mrb, struct RProc *p) const struct RProc *up = p->upper; struct REnv *e = NULL; - if (ci && (e = mrb_vm_ci_env(ci)) != NULL) { + mrb_assert(ci != NULL); + if ((e = mrb_vm_ci_env(ci)) != NULL) { /* do nothing, because e is assigned already */ } else if (up) { struct RClass *tc = ci->u.target_class; + if (MRB_PROC_ALIAS_P(up)) { /* alias */ + up = up->upper; + } e = mrb_env_new(mrb, mrb->c, ci, up->body.irep->nlocals, ci->stack, tc); ci->u.env = e; if (MRB_PROC_ENV_P(up) && MRB_PROC_ENV(up)->cxt == NULL) { @@ -124,6 +129,14 @@ mrb_closure_new(mrb_state *mrb, const mrb_irep *irep) return p; } +/* + * Creates a new Proc object from a C function. + * + * mrb: The mruby state. + * func: The C function to create the Proc from. + * + * Returns a pointer to the new Proc object. + */ MRB_API struct RProc* mrb_proc_new_cfunc(mrb_state *mrb, mrb_func_t func) { @@ -138,6 +151,17 @@ mrb_proc_new_cfunc(mrb_state *mrb, mrb_func_t func) return p; } +/* + * Creates a new Proc object from a C function with a new environment. + * + * mrb: The mruby state. + * func: The C function to create the Proc from. + * argc: The number of arguments to be stored in the environment. + * argv: The array of mrb_value to be stored in the environment. + * These values are copied into the environment. + * + * Returns a pointer to the new Proc object. + */ MRB_API struct RProc* mrb_proc_new_cfunc_with_env(mrb_state *mrb, mrb_func_t func, mrb_int argc, const mrb_value *argv) { @@ -154,24 +178,56 @@ mrb_proc_new_cfunc_with_env(mrb_state *mrb, mrb_func_t func, mrb_int argc, const MRB_ENV_SET_LEN(e, argc); if (argv) { - for (i = 0; i < argc; ++i) { + for (i = 0; i < argc; i++) { e->stack[i] = argv[i]; } } else { - for (i = 0; i < argc; ++i) { + for (i = 0; i < argc; i++) { SET_NIL_VALUE(e->stack[i]); } } return p; } +/* + * Creates a new closure from a C function. + * + * This function creates a new Proc object that represents a C function + * along with an environment for a specified number of local variables. + * The local variables in the environment are initialized to nil. + * + * mrb: The mruby state. + * func: The C function to create the closure from. + * nlocals: The number of local variables to allocate space for in the + * closure's environment. + * + * Returns a pointer to the new Proc object representing the closure. + */ MRB_API struct RProc* mrb_closure_new_cfunc(mrb_state *mrb, mrb_func_t func, int nlocals) { return mrb_proc_new_cfunc_with_env(mrb, func, nlocals, NULL); } +/* + * Retrieves a value from the environment of the currently executing C function Proc. + * + * This function is used within a C function that was wrapped into a Proc + * (e.g., using mrb_closure_new_cfunc or mrb_proc_new_cfunc_with_env) + * to access values stored in its associated environment. + * + * mrb: The mruby state. + * idx: The index of the value to retrieve from the C function's environment. + * The index must be non-negative and less than the number of + * environment entries (argc passed during creation). + * + * Returns the mrb_value stored at the specified index in the environment. + * + * Raises E_TYPE_ERROR if the currently executing Proc is not a C function + * or if it does not have an associated environment. + * Raises E_INDEX_ERROR if the provided index is out of bounds. + */ MRB_API mrb_value mrb_proc_cfunc_env_get(mrb_state *mrb, mrb_int idx) { @@ -193,8 +249,31 @@ mrb_proc_cfunc_env_get(mrb_state *mrb, mrb_int idx) return e->stack[idx]; } +mrb_value +mrb_proc_get_self(mrb_state *mrb, const struct RProc *p, struct RClass **target_class_p) +{ + if (MRB_PROC_CFUNC_P(p)) { + *target_class_p = mrb->object_class; + return mrb_nil_value(); + } + else { + struct REnv *e = p->e.env; + + if (!e || e->tt != MRB_TT_ENV) { + *target_class_p = mrb->object_class; + return mrb_top_self(mrb); + } + else if (MRB_ENV_LEN(e) < 1) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "self is lost (probably ran out of memory when the block became independent)"); + } + + *target_class_p = e->c; + return e->stack[0]; + } +} + void -mrb_proc_copy(mrb_state *mrb, struct RProc *a, struct RProc *b) +mrb_proc_copy(mrb_state *mrb, struct RProc *a, const struct RProc *b) { if (a->body.irep) { /* already initialized proc */ @@ -214,14 +293,12 @@ static mrb_value mrb_proc_s_new(mrb_state *mrb, mrb_value proc_class) { mrb_value blk; - mrb_value proc; - struct RProc *p; /* Calling Proc.new without a block is not implemented yet */ mrb_get_args(mrb, "&!", &blk); - p = MRB_OBJ_ALLOC(mrb, MRB_TT_PROC, mrb_class_ptr(proc_class)); + struct RProc *p = MRB_OBJ_ALLOC(mrb, MRB_TT_PROC, mrb_class_ptr(proc_class)); mrb_proc_copy(mrb, p, mrb_proc_ptr(blk)); - proc = mrb_obj_value(p); + mrb_value proc = mrb_obj_value(p); mrb_funcall_with_block(mrb, proc, MRB_SYM(initialize), 0, NULL, proc); if (!MRB_PROC_STRICT_P(p) && mrb->c->ci > mrb->c->cibase && MRB_PROC_ENV(p) == mrb->c->ci[-1].u.env) { @@ -245,37 +322,86 @@ mrb_proc_init_copy(mrb_state *mrb, mrb_value self) check_proc(mrb, proc); mrb_proc_copy(mrb, mrb_proc_ptr(self), mrb_proc_ptr(proc)); + /* A copied Proc is always treated as an orphan block: it cannot + `break` / `return` from the original yielding method. This is + stricter than CRuby (which only marks the copy orphan once the + original becomes orphan), but matches mruby's memory-first + design — tracking the original via a back pointer would grow + RProc and the GC mark set. See limitations.md for the spec + divergence note. */ + mrb_proc_ptr(self)->flags |= MRB_PROC_ORPHAN; return self; } -/* 15.2.17.4.2 */ static mrb_value proc_arity(mrb_state *mrb, mrb_value self) { return mrb_int_value(mrb, mrb_proc_arity(mrb_proc_ptr(self))); } +mrb_bool +mrb_proc_eql(mrb_state *mrb, mrb_value self, mrb_value other) +{ + if (mrb_type(self) != MRB_TT_PROC) return FALSE; + if (mrb_type(other) != MRB_TT_PROC) return FALSE; + + const struct RProc *p1 = mrb_proc_ptr(self); + const struct RProc *p2 = mrb_proc_ptr(other); + + /* Follow alias chains to get the real procs */ + while (p1 && MRB_PROC_ALIAS_P(p1)) { + p1 = p1->upper; + } + while (p2 && MRB_PROC_ALIAS_P(p2)) { + p2 = p2->upper; + } + + /* If either pointer is NULL after following aliases, they can't be equal */ + if (!p1 || !p2) return FALSE; + + if (MRB_PROC_CFUNC_P(p1)) { + if (!MRB_PROC_CFUNC_P(p2)) return FALSE; + if (p1->body.func != p2->body.func) return FALSE; + } + else if (MRB_PROC_CFUNC_P(p2)) return FALSE; + else if (p1->body.irep != p2->body.irep) return FALSE; + else if (MRB_PROC_ENV(p1) != MRB_PROC_ENV(p2)) return FALSE; + return TRUE; +} + +static mrb_value +proc_eql(mrb_state *mrb, mrb_value self) +{ + return mrb_bool_value(mrb_proc_eql(mrb, self, mrb_get_arg1(mrb))); +} + +static mrb_value +proc_hash(mrb_state *mrb, mrb_value self) +{ + const struct RProc *p = mrb_proc_ptr(self); + return mrb_int_value(mrb, (mrb_int)((intptr_t)p->body.irep^((intptr_t)MRB_PROC_ENV(p)>>2)^MRB_TT_PROC)); +} + /* 15.3.1.2.6 */ /* 15.3.1.3.27 */ /* * call-seq: * lambda { |...| block } -> a_proc * - * Equivalent to Proc.new, except the resulting Proc objects + * Equivalent to `Proc.new`, except the resulting Proc objects * check the number of parameters passed when called. */ static mrb_value proc_lambda(mrb_state *mrb, mrb_value self) { mrb_value blk; - struct RProc *p; mrb_get_args(mrb, "&", &blk); if (mrb_nil_p(blk)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "tried to create Proc object without a block"); } check_proc(mrb, blk); - p = mrb_proc_ptr(blk); + const struct RProc *p = mrb_proc_ptr(blk); if (!MRB_PROC_STRICT_P(p)) { struct RProc *p2 = MRB_OBJ_ALLOC(mrb, MRB_TT_PROC, p->c); mrb_proc_copy(mrb, p2, p); @@ -293,8 +419,28 @@ mrb_proc_arity(const struct RProc *p) mrb_aspec aspec; int ma, op, ra, pa, arity; + /* Resolve alias procs first: an alias carries body.mid (a symbol), not an + irep, with `upper` pointing at the original proc. Without this the irep + branch below reads body.mid as an mrb_irep* and dereferences it -> SEGV. + Mirrors the guard already present in mrb_proc_source_location / mrb_proc_eql. */ + while (p && MRB_PROC_ALIAS_P(p)) { + p = p->upper; + } + if (!p) return 0; + if (MRB_PROC_CFUNC_P(p)) { - /* TODO cfunc aspec not implemented yet */ + uint32_t caspec_bits = p->flags & MRB_PROC_CASPEC_MASK; + if (caspec_bits != 0) { + aspec = mrb_proc_decompress_caspec(caspec_bits); + ma = MRB_ASPEC_REQ(aspec); + op = MRB_ASPEC_OPT(aspec); + ra = MRB_ASPEC_REST(aspec); + pa = MRB_ASPEC_POST(aspec); + return ra || op ? -(ma + pa + 1) : ma + pa; + } + if (MRB_PROC_NOARG_P(p)) { + return 0; + } return -1; } @@ -322,19 +468,15 @@ mrb_proc_arity(const struct RProc *p) mrb_value mrb_proc_local_variables(mrb_state *mrb, const struct RProc *proc) { - const mrb_irep *irep; - mrb_value vars; - size_t i; - if (proc == NULL || MRB_PROC_CFUNC_P(proc)) { return mrb_ary_new(mrb); } - vars = mrb_hash_new(mrb); + mrb_value vars = mrb_hash_new(mrb); while (proc) { if (MRB_PROC_CFUNC_P(proc)) break; - irep = proc->body.irep; + const mrb_irep *irep = proc->body.irep; if (irep->lv) { - for (i = 0; i + 1 < irep->nlocals; ++i) { + for (size_t i = 0; i + 1 < irep->nlocals; i++) { if (irep->lv[i]) { mrb_sym sym = irep->lv[i]; const char *name = mrb_sym_name(mrb, sym); @@ -366,18 +508,13 @@ mrb_proc_get_caller(mrb_state *mrb, struct REnv **envp) if (envp) *envp = NULL; } else { - struct RClass *tc = MRB_PROC_TARGET_CLASS(proc); struct REnv *e = mrb_vm_ci_env(ci); if (e == NULL) { int nstacks = proc->body.irep->nlocals; - e = mrb_env_new(mrb, c, ci, nstacks, ci->stack, tc); + e = mrb_env_new(mrb, c, ci, nstacks, ci->stack, mrb_vm_ci_target_class(ci)); ci->u.env = e; } - else if (tc) { - e->c = tc; - mrb_field_write_barrier(mrb, (struct RBasic*)e, (struct RBasic*)tc); - } if (envp) *envp = e; } @@ -410,8 +547,8 @@ mrb_proc_merge_lvar(mrb_state *mrb, mrb_irep *irep, struct REnv *env, int num, c mrb_raise(mrb, E_RUNTIME_ERROR, "unavailable local variable names"); } - irep->lv = (mrb_sym*)mrb_realloc(mrb, (mrb_sym*)irep->lv, sizeof(mrb_sym) * (irep->nlocals + num)); - env->stack = (mrb_value*)mrb_realloc(mrb, env->stack, sizeof(mrb_value) * (irep->nlocals + 1 /* self */ + num)); + irep->lv = (mrb_sym*)mrb_realloc(mrb, (mrb_sym*)irep->lv, sizeof(mrb_sym) * (irep->nlocals - 1 /* self */ + num)); + env->stack = (mrb_value*)mrb_realloc(mrb, env->stack, sizeof(mrb_value) * (irep->nlocals + num)); mrb_sym *destlv = (mrb_sym*)irep->lv + irep->nlocals - 1 /* self */; mrb_value *destst = env->stack + irep->nlocals; @@ -434,19 +571,29 @@ mrb_proc_merge_lvar(mrb_state *mrb, mrb_irep *irep, struct REnv *env, int num, c MRB_ENV_SET_LEN(env, irep->nlocals); } +/* ---------------------------*/ +static const mrb_mt_entry proc_rom_entries[] = { + MRB_MT_ENTRY(mrb_proc_init_copy, MRB_SYM(initialize_copy), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), + MRB_MT_ENTRY(proc_arity, MRB_SYM(arity), MRB_ARGS_NONE()), /* 15.2.17.4.2 */ + MRB_MT_ENTRY(proc_eql, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(proc_eql, MRB_SYM_Q(eql), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(proc_hash, MRB_SYM(hash), MRB_ARGS_NONE()), /* 15.2.17.4.2 */ +}; + void mrb_init_proc(mrb_state *mrb) { mrb_method_t m; + struct RClass *pc = mrb->proc_class = mrb_define_class_id(mrb, MRB_SYM(Proc), mrb->object_class); /* 15.2.17 */ - mrb_define_class_method(mrb, mrb->proc_class, "new", mrb_proc_s_new, MRB_ARGS_NONE()|MRB_ARGS_BLOCK()); - mrb_define_method(mrb, mrb->proc_class, "initialize_copy", mrb_proc_init_copy, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, mrb->proc_class, "arity", proc_arity, MRB_ARGS_NONE()); + MRB_SET_INSTANCE_TT(pc, MRB_TT_PROC); + MRB_UNDEF_ALLOCATOR(pc); + mrb_define_class_method_id(mrb, pc, MRB_SYM(new), mrb_proc_s_new, MRB_ARGS_NONE()|MRB_ARGS_BLOCK()); + MRB_MT_INIT_ROM(mrb, pc, proc_rom_entries); MRB_METHOD_FROM_PROC(m, &call_proc); - mrb_define_method_raw(mrb, mrb->proc_class, MRB_SYM(call), m); - mrb_define_method_raw(mrb, mrb->proc_class, MRB_OPSYM(aref), m); + mrb_define_method_raw(mrb, pc, MRB_SYM(call), m); /* 15.2.17.4.3 */ + mrb_define_method_raw(mrb, pc, MRB_OPSYM(aref), m); /* 15.2.17.4.1 */ - mrb_define_class_method(mrb, mrb->kernel_module, "lambda", proc_lambda, MRB_ARGS_NONE()|MRB_ARGS_BLOCK()); /* 15.3.1.2.6 */ - mrb_define_method(mrb, mrb->kernel_module, "lambda", proc_lambda, MRB_ARGS_NONE()|MRB_ARGS_BLOCK()); /* 15.3.1.3.27 */ + mrb_define_private_method_id(mrb, mrb->kernel_module, MRB_SYM(lambda), proc_lambda, MRB_ARGS_NONE()|MRB_ARGS_BLOCK()); /* 15.3.1.3.27 */ } diff --git a/src/range.c b/src/range.c index 34ae7e4510..0ff506b5ba 100644 --- a/src/range.c +++ b/src/range.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #define RANGE_INITIALIZED_FLAG 1 #define RANGE_INITIALIZED(p) ((p)->flags |= RANGE_INITIALIZED_FLAG) @@ -19,12 +19,9 @@ static void r_check(mrb_state *mrb, mrb_value a, mrb_value b) { - enum mrb_vtype ta; - enum mrb_vtype tb; - mrb_int n; + enum mrb_vtype ta = mrb_type(a); + enum mrb_vtype tb = mrb_type(b); - ta = mrb_type(a); - tb = mrb_type(b); #ifdef MRB_NO_FLOAT if (ta == MRB_TT_INTEGER && tb == MRB_TT_INTEGER ) return; #else @@ -36,7 +33,7 @@ r_check(mrb_state *mrb, mrb_value a, mrb_value b) if (mrb_nil_p(a) || mrb_nil_p(b)) return; - n = mrb_cmp(mrb, a, b); + mrb_int n = mrb_cmp(mrb, a, b); if (n == -2) { /* can not be compared */ mrb_raise(mrb, E_ARGUMENT_ERROR, "bad value for range"); } @@ -70,7 +67,7 @@ static void range_ptr_alloc_edges(mrb_state *mrb, struct RRange *r) { #ifndef MRB_RANGE_EMBED - r->edges = (mrb_range_edges *)mrb_malloc(mrb, sizeof(mrb_range_edges)); + r->edges = (mrb_range_edges*)mrb_malloc(mrb, sizeof(mrb_range_edges)); #endif } @@ -113,7 +110,7 @@ range_ptr_replace(mrb_state *mrb, struct RRange *r, mrb_value beg, mrb_value end * rng.first => obj * rng.begin => obj * - * Returns the first object in rng. + * Returns the first object in `rng`. */ static mrb_value range_beg(mrb_state *mrb, mrb_value range) @@ -126,7 +123,7 @@ range_beg(mrb_state *mrb, mrb_value range) * rng.end => obj * rng.last => obj * - * Returns the object that defines the end of rng. + * Returns the object that defines the end of `rng`. * * (1..10).end #=> 10 * (1...10).end #=> 10 @@ -141,7 +138,7 @@ range_end(mrb_state *mrb, mrb_value range) * call-seq: * range.exclude_end? => true or false * - * Returns true if range excludes its end value. + * Returns `true` if `range` excludes its end value. */ static mrb_value range_excl(mrb_state *mrb, mrb_value range) @@ -153,8 +150,8 @@ range_excl(mrb_state *mrb, mrb_value range) * call-seq: * Range.new(start, end, exclusive=false) => range * - * Constructs a range using the given start and end. If the third - * parameter is omitted or is false, the range will include + * Constructs a range using the given `start` and `end`. If the third + * parameter is omitted or is `false`, the `range` will include * the end object; otherwise, it will be excluded. */ static mrb_value @@ -173,10 +170,10 @@ range_initialize(mrb_state *mrb, mrb_value range) * call-seq: * range == obj => true or false * - * Returns true only if - * 1) obj is a Range, - * 2) obj has equivalent beginning and end items (by comparing them with ==), - * 3) obj has the same #exclude_end? setting as rng. + * Returns `true` only if + * 1) `obj` is a Range, + * 2) `obj` has equivalent beginning and end items (by comparing them with `==`), + * 3) `obj` has the same #exclude_end? setting as `rng`. * * (0..2) == (0..2) #=> true * (0..2) == Range.new(0,2) #=> true @@ -266,7 +263,7 @@ range_to_s(mrb_state *mrb, mrb_value range) * rng.inspect -> string * * Convert this range object to a printable form (using - * inspect to convert the start and end + * `inspect` to convert the start and end * objects). */ static mrb_value @@ -296,9 +293,9 @@ range_inspect(mrb_state *mrb, mrb_value range) * call-seq: * rng.eql?(obj) -> true or false * - * Returns true only if obj is a Range, has equivalent + * Returns `true` only if `obj` is a Range, has equivalent * beginning and end items (by comparing them with #eql?), and has the same - * #exclude_end? setting as rng. + * #exclude_end? setting as `rng`. * * (0..2).eql?(0..2) #=> true * (0..2).eql?(Range.new(0,2)) #=> true @@ -311,7 +308,6 @@ range_eql(mrb_state *mrb, mrb_value range) struct RRange *r, *o; if (mrb_obj_equal(mrb, range, obj)) return mrb_true_value(); - if (!mrb_obj_is_kind_of(mrb, obj, mrb->range_class)) return mrb_false_value(); if (!mrb_range_p(obj)) return mrb_false_value(); r = mrb_range_ptr(mrb, range); @@ -349,7 +345,6 @@ range_num_to_a(mrb_state *mrb, mrb_value range) struct RRange *r = mrb_range_ptr(mrb, range); mrb_value beg = RANGE_BEG(r); mrb_value end = RANGE_END(r); - mrb_value ary; mrb->c->ci->mid = 0; if (mrb_nil_p(end)) { @@ -359,6 +354,10 @@ range_num_to_a(mrb_state *mrb, mrb_value range) if (mrb_integer_p(end)) { mrb_int a = mrb_integer(beg); mrb_int b = mrb_integer(end); + + if (a > b) { + return mrb_ary_new_capa(mrb, 0); + } mrb_int len; if (mrb_int_sub_overflow(b, a, &len)) { @@ -369,75 +368,113 @@ range_num_to_a(mrb_state *mrb, mrb_value range) if (len == MRB_INT_MAX) goto too_long; len++; } - ary = mrb_ary_new_capa(mrb, len); + mrb_value ary = mrb_ary_new_capa(mrb, len); + mrb_value *ptr = RARRAY_PTR(ary); for (mrb_int i=0; i b) { + return mrb_ary_new_capa(mrb, 0); } - else { - while (a <= b) { - mrb_ary_push(mrb, ary, mrb_int_value(mrb, (mrb_int)a)); - a += 1.0; - } + mrb_int alen = (mrb_int)(b - a) + RANGE_EXCL(r); + mrb_value ary = mrb_ary_new_capa(mrb, alen); + mrb_value *ptr = RARRAY_PTR(ary); + for (mrb_int i=0; iobject_class); /* 15.2.14 */ + r = mrb_define_class_id(mrb, MRB_SYM(Range), mrb->object_class); /* 15.2.14 */ mrb->range_class = r; MRB_SET_INSTANCE_TT(r, MRB_TT_RANGE); - mrb_define_method(mrb, r, "begin", range_beg, MRB_ARGS_NONE()); /* 15.2.14.4.3 */ - mrb_define_method(mrb, r, "end", range_end, MRB_ARGS_NONE()); /* 15.2.14.4.5 */ - mrb_define_method(mrb, r, "==", range_eq, MRB_ARGS_REQ(1)); /* 15.2.14.4.1 */ - mrb_define_method(mrb, r, "===", range_include, MRB_ARGS_REQ(1)); /* 15.2.14.4.2 */ - mrb_define_method(mrb, r, "exclude_end?", range_excl, MRB_ARGS_NONE()); /* 15.2.14.4.6 */ - mrb_define_method(mrb, r, "first", range_beg, MRB_ARGS_NONE()); /* 15.2.14.4.7 */ - mrb_define_method(mrb, r, "include?", range_include, MRB_ARGS_REQ(1)); /* 15.2.14.4.8 */ - mrb_define_method(mrb, r, "initialize", range_initialize, MRB_ARGS_ANY()); /* 15.2.14.4.9 */ - mrb_define_method(mrb, r, "last", range_end, MRB_ARGS_NONE()); /* 15.2.14.4.10 */ - mrb_define_method(mrb, r, "member?", range_include, MRB_ARGS_REQ(1)); /* 15.2.14.4.11 */ - mrb_define_method(mrb, r, "to_s", range_to_s, MRB_ARGS_NONE()); /* 15.2.14.4.12(x) */ - mrb_define_method(mrb, r, "inspect", range_inspect, MRB_ARGS_NONE()); /* 15.2.14.4.13(x) */ - mrb_define_method(mrb, r, "eql?", range_eql, MRB_ARGS_REQ(1)); /* 15.2.14.4.14(x) */ - mrb_define_method(mrb, r, "initialize_copy", range_initialize_copy, MRB_ARGS_REQ(1)); /* 15.2.14.4.15(x) */ - mrb_define_method(mrb, r, "__num_to_a", range_num_to_a, MRB_ARGS_NONE()); + MRB_MT_INIT_ROM(mrb, r, range_rom_entries); } diff --git a/src/readfloat.c b/src/readfloat.c deleted file mode 100644 index db5899edfc..0000000000 --- a/src/readfloat.c +++ /dev/null @@ -1,120 +0,0 @@ -#include - -#ifndef MRB_NO_FLOAT -/* - * strtod implementation. - * author: Yasuhiro Matsumoto (@mattn) - * license: public domain - */ - -/* -The original code can be found in https://github.com/mattn/strtod - -I modified the routine for mruby: - - * renamed the function `vim_strtod` -> `mrb_float_read` - * simplified the code - -My modifications in this file are also placed in the public domain. - -Matz (Yukihiro Matsumoto) -*/ - -#include -#include -#include - -MRB_API double -mrb_float_read(const char *str, char **end) -{ - double d = 0.0; - int sign; - int n = 0; - const char *p, *a; - - a = p = str; - while (ISSPACE(*p)) - ++p; - - /* decimal part */ - sign = 1; - if (*p == '-') { - sign = -1; - ++p; - } else if (*p == '+') - ++p; - if (ISDIGIT(*p)) { - d = (double)(*p++ - '0'); - while (*p && ISDIGIT(*p)) { - d = d * 10.0 + (double)(*p - '0'); - ++p; - ++n; - } - a = p; - } - else if (*p != '.') - goto done; - d *= sign; - - /* fraction part */ - if (*p == '.') { - double f = 0.0; - double base = 0.1; - ++p; - - if (ISDIGIT(*p)) { - while (*p && ISDIGIT(*p)) { - f += base * (*p - '0') ; - base /= 10.0; - ++p; - ++n; - } - } - d += f * sign; - a = p; - } - - /* exponential part */ - if ((*p == 'E') || (*p == 'e')) { - int e = 0; - ++p; - - sign = 1; - if (*p == '-') { - sign = -1; - ++p; - } - else if (*p == '+') - ++p; - - if (ISDIGIT(*p)) { - while (*p == '0') - ++p; - if (*p == '\0') --p; - e = (int)(*p++ - '0'); - for (; *p && ISDIGIT(*p); p++) { - if (e < 10000) - e = e * 10 + (*p - '0'); - } - e *= sign; - } - else if (!ISDIGIT(*(a-1))) { - a = str; - goto done; - } - else if (*p == 0) - goto done; - d *= pow(10.0, (double) e); - a = p; - } - else if (p > str && !ISDIGIT(*(p-1))) { - a = str; - goto done; - } - -done: - if (end) - *end = (char*)a; - return d; -} -#endif diff --git a/src/readint.c b/src/readint.c index 5fae222c2a..584bdf41b8 100644 --- a/src/readint.c +++ b/src/readint.c @@ -1,30 +1,27 @@ #include #include -#include -/* mrb_int_read(): read mrb_int from a string (base 10 only) */ +/* mrb_read_int(): read mrb_int from a string (base 10 only) */ /* const char *p - string to read */ /* const char *e - end of string */ /* char **endp - end of parsed integer */ - -/* if integer overflows, errno will be set to ERANGE */ -/* also endp will be set to NULL on overflow */ -MRB_API mrb_int -mrb_int_read(const char *p, const char *e, char **endp) +/* mrb_int *np - variable to save the result */ +/* returns TRUE if read succeeded */ +/* if integer overflows, returns FALSE */ +MRB_API mrb_bool +mrb_read_int(const char *p, const char *e, char **endp, mrb_int *np) { mrb_int n = 0; - int ch; while ((e == NULL || p < e) && ISDIGIT(*p)) { - ch = *p - '0'; + int ch = *p - '0'; if (mrb_int_mul_overflow(n, 10, &n) || mrb_int_add_overflow(n, ch, &n)) { - if (endp) *endp = NULL; - errno = ERANGE; - return MRB_INT_MAX; + return FALSE; } p++; } if (endp) *endp = (char*)p; - return n; + *np = n; + return TRUE; } diff --git a/src/readnum.c b/src/readnum.c new file mode 100644 index 0000000000..3c459d068d --- /dev/null +++ b/src/readnum.c @@ -0,0 +1,43 @@ +/* this file defines obsolete functions: mrb_int_read() and mrb_float_read() */ +/* use mrb_read_int() and mrb_read_float() instead */ + +#include +#include +#include + +/* mrb_int_read(): read mrb_int from a string (base 10 only) */ +/* const char *p - string to read */ +/* const char *e - end of string */ +/* char **endp - end of parsed integer */ + +/* if integer overflows, errno will be set to ERANGE */ +/* also endp will be set to NULL on overflow */ +MRB_API mrb_int +mrb_int_read(const char *p, const char *e, char **endp) +{ + mrb_int n; + + if (!mrb_read_int(p, e, endp, &n)) { + if (endp) *endp = NULL; + errno = ERANGE; + return MRB_INT_MAX; + } + if (endp) *endp = (char*)p; + return n; +} + +#ifndef MRB_NO_FLOAT +//#include +//#include + +MRB_API double +mrb_float_read(const char *str, char **endp) +{ + double d; + + if (!mrb_read_float(str, endp, &d)) { + errno = ERANGE; + } + return d; +} +#endif diff --git a/src/state.c b/src/state.c index 4632e81d7e..642a1b9634 100644 --- a/src/state.c +++ b/src/state.c @@ -20,7 +20,10 @@ void mrb_init_mrbgems(mrb_state*); void mrb_gc_init(mrb_state*, mrb_gc *gc); void mrb_gc_destroy(mrb_state*, mrb_gc *gc); -int mrb_core_init_protect(mrb_state *mrb, void (*body)(mrb_state *, void *), void *opaque); +int mrb_core_init_protect(mrb_state *mrb, void (*body)(mrb_state*, void*), void *opaque); + +void mrb_init_shape(mrb_state*); +void mrb_free_shape(mrb_state*); static void init_gc_and_core(mrb_state *mrb, void *opaque) @@ -28,6 +31,7 @@ init_gc_and_core(mrb_state *mrb, void *opaque) static const struct mrb_context mrb_context_zero = { 0 }; mrb_gc_init(mrb, &mrb->gc); + mrb_init_shape(mrb); mrb->c = (struct mrb_context*)mrb_malloc(mrb, sizeof(struct mrb_context)); *mrb->c = mrb_context_zero; mrb->root_c = mrb->c; @@ -35,45 +39,27 @@ init_gc_and_core(mrb_state *mrb, void *opaque) mrb_init_core(mrb); } +/* Initializes the core of mruby, without loading gems. */ MRB_API mrb_state* -mrb_open_core(mrb_allocf f, void *ud) +mrb_open_core(void) { static const mrb_state mrb_state_zero = { 0 }; mrb_state *mrb; - if (f == NULL) f = mrb_default_allocf; - mrb = (mrb_state *)(f)(NULL, NULL, sizeof(mrb_state), ud); + mrb = (mrb_state*)mrb_basic_alloc_func(NULL, sizeof(mrb_state)); if (mrb == NULL) return NULL; *mrb = mrb_state_zero; - mrb->allocf_ud = ud; - mrb->allocf = f; mrb->atexit_stack_len = 0; + mrb->bootstrapping = TRUE; if (mrb_core_init_protect(mrb, init_gc_and_core, NULL)) { - mrb_close(mrb); - return NULL; + /* Return mrb with mrb->exc set for caller to inspect */ + return mrb; } - return mrb; -} - -void* -mrb_default_allocf(mrb_state *mrb, void *p, size_t size, void *ud) -{ - if (size == 0) { - free(p); - return NULL; - } - else { - return realloc(p, size); - } -} - -MRB_API mrb_state* -mrb_open(void) -{ - mrb_state *mrb = mrb_open_allocf(mrb_default_allocf, NULL); + mrb_method_cache_clear(mrb); + mrb->bootstrapping = FALSE; return mrb; } @@ -86,19 +72,21 @@ init_mrbgems(mrb_state *mrb, void *opaque) } #endif +/* Initializes mruby, including loading gems. */ MRB_API mrb_state* -mrb_open_allocf(mrb_allocf f, void *ud) +mrb_open(void) { - mrb_state *mrb = mrb_open_core(f, ud); + mrb_state *mrb = mrb_open_core(); - if (mrb == NULL) { - return NULL; + if (mrb == NULL || mrb->exc) { + /* Either allocation failed or core init failed */ + return mrb; } #ifndef MRB_NO_GEMS if (mrb_core_init_protect(mrb, init_mrbgems, NULL)) { - mrb_close(mrb); - return NULL; + /* Gem init failed - return mrb with mrb->exc set */ + return mrb; } mrb_gc_arena_restore(mrb, 0); #endif @@ -150,8 +138,10 @@ void mrb_irep_free(mrb_state *mrb, mrb_irep *irep) { int i; + mrb_bool consolidated; if (irep->flags & MRB_IREP_NO_FREE) return; + consolidated = (irep->flags & MRB_IREP_CONSOLIDATED) != 0; if (!(irep->flags & MRB_ISEQ_NO_FREE)) mrb_free(mrb, (void*)irep->iseq); if (irep->pool) { @@ -161,21 +151,25 @@ mrb_irep_free(mrb_state *mrb, mrb_irep *irep) mrb_free(mrb, (void*)irep->pool[i].u.str); } } - mrb_free(mrb, (void*)irep->pool); + if (!consolidated) mrb_free(mrb, (void*)irep->pool); } - mrb_free(mrb, (void*)irep->syms); + if (!consolidated) mrb_free(mrb, (void*)irep->syms); if (irep->reps) { for (i=0; irlen; i++) { if (irep->reps[i]) mrb_irep_decref(mrb, (mrb_irep*)irep->reps[i]); } - mrb_free(mrb, (void*)irep->reps); + if (!consolidated) mrb_free(mrb, (void*)irep->reps); } mrb_free(mrb, (void*)irep->lv); mrb_debug_info_free(mrb, irep->debug_info); +#ifdef MRB_DEBUG + memset(irep, -1, sizeof(*irep)); +#endif mrb_free(mrb, irep); } +/* Frees a mruby context. */ MRB_API void mrb_free_context(mrb_state *mrb, struct mrb_context *c) { @@ -187,39 +181,56 @@ mrb_free_context(mrb_state *mrb, struct mrb_context *c) void mrb_protect_atexit(mrb_state *mrb); - MRB_API void +/* Closes and finalizes a mruby state. */ +MRB_API void mrb_close(mrb_state *mrb) { if (!mrb) return; mrb_protect_atexit(mrb); /* free */ + mrb_gc_free_gv(mrb); mrb_gc_destroy(mrb, &mrb->gc); + mrb_free_shape(mrb); mrb_free_context(mrb, mrb->root_c); - mrb_gc_free_gv(mrb); mrb_free_symtbl(mrb); + + /* free heap-allocated ROM method table wrappers */ + { + struct mrb_mt_rom_list *node = mrb->rom_mt; + while (node) { + struct mrb_mt_rom_list *next = node->next; + mrb_free(mrb, node->tbl); + mrb_free(mrb, node); + node = next; + } + } + mrb_free(mrb, mrb); } +/* Adds an instruction sequence (irep) to the mruby state. */ MRB_API mrb_irep* mrb_add_irep(mrb_state *mrb) { static const mrb_irep mrb_irep_zero = { 0 }; mrb_irep *irep; - irep = (mrb_irep *)mrb_malloc(mrb, sizeof(mrb_irep)); + irep = (mrb_irep*)mrb_malloc(mrb, sizeof(mrb_irep)); *irep = mrb_irep_zero; irep->refcnt = 1; return irep; } +/* Returns the top-level self object. */ MRB_API mrb_value mrb_top_self(mrb_state *mrb) { return mrb_obj_value(mrb->top_self); } +/* Registers a function to be called when the mruby state is closed. */ MRB_API void mrb_state_atexit(mrb_state *mrb, mrb_atexit_func f) { diff --git a/src/string.c b/src/string.c index 0e204118a9..900ce079aa 100644 --- a/src/string.c +++ b/src/string.c @@ -6,6 +6,7 @@ #ifdef _MSC_VER # define _CRT_NONSTDC_NO_DEPRECATE +# define WIN32_LEAN_AND_MEAN #endif #include @@ -15,7 +16,6 @@ #include #include #include -#include #include typedef struct mrb_shared_string { @@ -29,14 +29,18 @@ const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; #define mrb_obj_alloc_string(mrb) MRB_OBJ_ALLOC((mrb), MRB_TT_STRING, (mrb)->string_class) #ifndef MRB_STR_LENGTH_MAX +#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) +#define MRB_STR_LENGTH_MAX 0 +#else #define MRB_STR_LENGTH_MAX 1048576 #endif +#endif static void -str_check_too_big(mrb_state *mrb, mrb_int len) +str_check_length(mrb_state *mrb, mrb_int len) { - if (len < 0) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "[BUG] negative string length"); + if (len < 0 || len == MRB_INT_MAX) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative (or overflowed) string size"); } #if MRB_STR_LENGTH_MAX != 0 if (len > MRB_STR_LENGTH_MAX-1) { @@ -45,18 +49,32 @@ str_check_too_big(mrb_state *mrb, mrb_int len) #endif } +mrb_bool +mrb_strcasecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2) +{ + if (len1 != len2) return FALSE; + + const char *e1 = s1 + len1; + while (s1 < e1) { + if (*s1 != *s2 && TOUPPER(*s1) != TOUPPER(*s2)) return FALSE; + s1++; + s2++; + } + return TRUE; +} + static struct RString* str_init_normal_capa(mrb_state *mrb, struct RString *s, const char *p, mrb_int len, mrb_int capa) { - str_check_too_big(mrb, capa); - char *dst = (char *)mrb_malloc(mrb, capa + 1); + str_check_length(mrb, capa); + char *dst = (char*)mrb_malloc(mrb, capa + 1); if (p) memcpy(dst, p, len); dst[len] = '\0'; s->as.heap.ptr = dst; s->as.heap.len = len; s->as.heap.aux.capa = capa; - RSTR_UNSET_TYPE_FLAG(s); + RSTR_SET_TYPE(s, NORMAL); return s; } @@ -69,9 +87,10 @@ str_init_normal(mrb_state *mrb, struct RString *s, const char *p, mrb_int len) static struct RString* str_init_embed(struct RString *s, const char *p, mrb_int len) { + mrb_assert(len >= 0); if (p) memcpy(RSTR_EMBED_PTR(s), p, len); RSTR_EMBED_PTR(s)[len] = '\0'; - RSTR_SET_TYPE_FLAG(s, EMBED); + RSTR_SET_TYPE(s, EMBED); RSTR_SET_EMBED_LEN(s, len); return s; } @@ -79,10 +98,10 @@ str_init_embed(struct RString *s, const char *p, mrb_int len) static struct RString* str_init_nofree(struct RString *s, const char *p, mrb_int len) { - s->as.heap.ptr = (char *)p; + s->as.heap.ptr = (char*)p; s->as.heap.len = len; s->as.heap.aux.capa = 0; /* nofree */ - RSTR_SET_TYPE_FLAG(s, NOFREE); + RSTR_SET_TYPE(s, NOFREE); return s; } @@ -93,7 +112,7 @@ str_init_shared(mrb_state *mrb, const struct RString *orig, struct RString *s, m shared->refcnt++; } else { - shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string)); + shared = (mrb_shared_string*)mrb_malloc(mrb, sizeof(mrb_shared_string)); shared->refcnt = 1; shared->ptr = orig->as.heap.ptr; shared->capa = orig->as.heap.aux.capa; @@ -101,7 +120,7 @@ str_init_shared(mrb_state *mrb, const struct RString *orig, struct RString *s, m s->as.heap.ptr = orig->as.heap.ptr; s->as.heap.len = orig->as.heap.len; s->as.heap.aux.shared = shared; - RSTR_SET_TYPE_FLAG(s, SHARED); + RSTR_SET_TYPE(s, SHARED); return s; } @@ -111,7 +130,7 @@ str_init_fshared(const struct RString *orig, struct RString *s, struct RString * s->as.heap.ptr = orig->as.heap.ptr; s->as.heap.len = orig->as.heap.len; s->as.heap.aux.fshared = fshared; - RSTR_SET_TYPE_FLAG(s, FSHARED); + RSTR_SET_TYPE(s, FSHARED); return s; } @@ -136,6 +155,7 @@ str_new_static(mrb_state *mrb, const char *p, mrb_int len) static struct RString* str_new(mrb_state *mrb, const char *p, mrb_int len) { + str_check_length(mrb, len); if (RSTR_EMBEDDABLE_P(len)) { return str_init_embed(mrb_obj_alloc_string(mrb), p, len); } @@ -145,6 +165,14 @@ str_new(mrb_state *mrb, const char *p, mrb_int len) return str_init_normal(mrb, mrb_obj_alloc_string(mrb), p, len); } +/* + * @param mrb The mruby state. + * @param capa The desired capacity of the new string. + * @return A new mruby string with the specified capacity. + * + * Creates a new mruby string with a given initial capacity. + * The string is initially empty. + */ MRB_API mrb_value mrb_str_new_capa(mrb_state *mrb, mrb_int capa) { @@ -168,18 +196,35 @@ resize_capa(mrb_state *mrb, struct RString *s, mrb_int capacity) } } else { - str_check_too_big(mrb, capacity); + str_check_length(mrb, capacity); s->as.heap.ptr = (char*)mrb_realloc(mrb, RSTR_PTR(s), capacity+1); s->as.heap.aux.capa = (mrb_ssize)capacity; } } +/* + * @param mrb The mruby state. + * @param p A pointer to the C string to copy. + * @param len The length of the C string. + * @return A new mruby string containing the copied C string. + * + * Creates a new mruby string from a C string and a specified length. + * If `p` is NULL, an empty string is created. + */ MRB_API mrb_value mrb_str_new(mrb_state *mrb, const char *p, mrb_int len) { return mrb_obj_value(str_new(mrb, p, len)); } +/* + * @param mrb The mruby state. + * @param p A pointer to the null-terminated C string to copy. + * @return A new mruby string containing the copied C string. + * + * Creates a new mruby string from a null-terminated C string. + * If `p` is NULL, an empty string is created. + */ MRB_API mrb_value mrb_str_new_cstr(mrb_state *mrb, const char *p) { @@ -198,6 +243,16 @@ mrb_str_new_cstr(mrb_state *mrb, const char *p) return mrb_obj_value(s); } +/* + * @param mrb The mruby state. + * @param p A pointer to the static C string. + * @param len The length of the static C string. + * @return A new mruby string referencing the static C string. + * + * Creates a new mruby string that directly references a static C string. + * The C string is not copied and must remain valid for the lifetime of the mruby string. + * This is typically used for string literals. + */ MRB_API mrb_value mrb_str_new_static(mrb_state *mrb, const char *p, mrb_int len) { @@ -216,7 +271,7 @@ str_decref(mrb_state *mrb, mrb_shared_string *shared) } static void -str_modify_keep_ascii(mrb_state *mrb, struct RString *s) +str_unshare_buffer(mrb_state *mrb, struct RString *s) { if (RSTR_SHARED_P(s)) { mrb_shared_string *shared = s->as.heap.aux.shared; @@ -224,7 +279,7 @@ str_modify_keep_ascii(mrb_state *mrb, struct RString *s) if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) { s->as.heap.aux.capa = shared->capa; s->as.heap.ptr[s->as.heap.len] = '\0'; - RSTR_UNSET_SHARED_FLAG(s); + RSTR_SET_TYPE(s, NORMAL); mrb_free(mrb, shared); } else { @@ -240,7 +295,8 @@ str_modify_keep_ascii(mrb_state *mrb, struct RString *s) static void check_null_byte(mrb_state *mrb, struct RString *str) { - if (memchr(RSTR_PTR(str), '\0', RSTR_LEN(str))) { + const char *p = RSTR_PTR(str); + if (p && memchr(p, '\0', RSTR_LEN(str))) { mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte"); } } @@ -256,7 +312,149 @@ mrb_gc_free_str(mrb_state *mrb, struct RString *str) mrb_free(mrb, str->as.heap.ptr); } +#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ + defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \ + defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \ + defined(__mc68020__) +# define ALIGNED_WORD_ACCESS 0 +#else +# define ALIGNED_WORD_ACCESS 1 +#endif + +#ifdef MRB_64BIT +#define bitint uint64_t +#define MASK01 0x0101010101010101ull +#else +#define bitint uint32_t +#define MASK01 0x01010101ul +#endif + +/* + * Encode a Unicode codepoint to UTF-8 bytes. + * buf must have at least 4 bytes of space. + * Returns the number of bytes written (1-4), or 0 for invalid codepoint. + */ +mrb_int +mrb_utf8_to_buf(char *buf, uint32_t cp) +{ + if (cp < 0x80) { + buf[0] = (char)cp; + return 1; + } + else if (cp < 0x800) { + buf[0] = (char)(0xC0 | (cp >> 6)); + buf[1] = (char)(0x80 | (cp & 0x3F)); + return 2; + } + else if (cp < 0x10000) { + buf[0] = (char)(0xE0 | (cp >> 12)); + buf[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); + buf[2] = (char)(0x80 | (cp & 0x3F)); + return 3; + } + else if (cp <= 0x10FFFF) { + buf[0] = (char)(0xF0 | (cp >> 18)); + buf[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); + buf[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); + buf[3] = (char)(0x80 | (cp & 0x3F)); + return 4; + } + return 0; /* invalid codepoint */ +} + #ifdef MRB_UTF8_STRING + +#define NOASCII(c) ((c) & 0x80) + +#ifdef SIMPLE_SEARCH_NONASCII +/* the naive implementation. define SIMPLE_SEARCH_NONASCII, */ +/* if you need it for any constraint (e.g. code size). */ +static const char* +search_nonascii(const char* p, const char *e) +{ + for (; p < e; ++p) { + if (NOASCII(*p)) return p; + } + return e; +} + +#elif defined(__SSE2__) +# include + +static inline const char * +search_nonascii(const char *p, const char *e) +{ + if (sizeof(__m128i) < (size_t)(e - p)) { + if (!_mm_movemask_epi8(_mm_loadu_si128((__m128i const*)p))) { + const intptr_t lowbits = sizeof(__m128i) - 1; + const __m128i *s, *t; + s = (const __m128i*)(~lowbits & ((intptr_t)p + lowbits)); + t = (const __m128i*)(~lowbits & (intptr_t)e); + for (; s < t; ++s) { + if (_mm_movemask_epi8(_mm_load_si128(s))) break; + } + p = (const char *)s; + } + } + switch (e - p) { + default: + case 15: if (NOASCII(*p)) return p; ++p; + case 14: if (NOASCII(*p)) return p; ++p; + case 13: if (NOASCII(*p)) return p; ++p; + case 12: if (NOASCII(*p)) return p; ++p; + case 11: if (NOASCII(*p)) return p; ++p; + case 10: if (NOASCII(*p)) return p; ++p; + case 9: if (NOASCII(*p)) return p; ++p; + case 8: if (NOASCII(*p)) return p; ++p; + case 7: if (NOASCII(*p)) return p; ++p; + case 6: if (NOASCII(*p)) return p; ++p; + case 5: if (NOASCII(*p)) return p; ++p; + case 4: if (NOASCII(*p)) return p; ++p; + case 3: if (NOASCII(*p)) return p; ++p; + case 2: if (NOASCII(*p)) return p; ++p; + case 1: if (NOASCII(*p)) return p; ++p; + if (NOASCII(*p)) return p; + case 0: break; + } + return e; +} + +#else + +static const char* +search_nonascii(const char *p, const char *e) +{ + ptrdiff_t byte_len = e - p; + + const char *be = p + sizeof(bitint) * (byte_len / sizeof(bitint)); + for (; p < be; p+=sizeof(bitint)) { + bitint t0; + + memcpy(&t0, p, sizeof(bitint)); + const bitint t1 = t0 & (MASK01*0x80); + if (t1) { + e = p + sizeof(bitint)-1; + byte_len = sizeof(bitint)-1; + break; + } + } + + switch (byte_len % sizeof(bitint)) { +#ifdef MRB_64BIT + case 7: if (e[-7]&0x80) return e-7; + case 6: if (e[-6]&0x80) return e-6; + case 5: if (e[-5]&0x80) return e-5; + case 4: if (e[-4]&0x80) return e-4; +#endif + case 3: if (e[-3]&0x80) return e-3; + case 2: if (e[-2]&0x80) return e-2; + case 1: if (e[-1]&0x80) return e-1; + } + return e; +} + +#endif /* SIMPLE_SEARCH_NONASCII */ + #define utf8_islead(c) ((unsigned char)((c)&0xc0) != 0x80) extern const char mrb_utf8len_table[]; @@ -271,7 +469,7 @@ mrb_utf8len(const char* p, const char* e) mrb_int len = mrb_utf8len_table[(unsigned char)p[0] >> 3]; if (len > e - p) return 1; switch (len) { - case 1: + case 0: return 1; case 4: if (utf8_islead(p[3])) return 1; @@ -283,15 +481,40 @@ mrb_utf8len(const char* p, const char* e) return len; } +#if defined(__GNUC__) || __has_builtin(__builtin_popcount) +# ifdef MRB_64BIT +# define popcount(x) __builtin_popcountll(x) +# else +# define popcount(x) __builtin_popcountl(x) +# endif +#else +#define POPC_SHIFT (8 * sizeof(bitint) - 8) +static inline uint32_t popcount(bitint x) +{ + x = (x & (MASK01*0x55)) + ((x >> 1) & (MASK01*0x55)); + x = (x & (MASK01*0x33)) + ((x >> 2) & (MASK01*0x33)); + x = (x & (MASK01*0x0F)) + ((x >> 4) & (MASK01*0x0F)); + return (uint32_t)((x * MASK01) >> POPC_SHIFT); +} +#endif + mrb_int mrb_utf8_strlen(const char *str, mrb_int byte_len) { - mrb_int len = 0; const char *p = str; - const char *e = p + byte_len; + const char *e = str + byte_len; + mrb_int len = 0; + while (p < e) { - if (utf8_islead(*p)) len++; - p++; + const char *np = search_nonascii(p, e); + + len += np - p; + if (np == e) break; + p = np; + while (NOASCII(*p)) { + p += mrb_utf8len(p, e); + len++; + } } return len; } @@ -302,12 +525,13 @@ utf8_strlen(mrb_value str) struct RString *s = mrb_str_ptr(str); mrb_int byte_len = RSTR_LEN(s); - if (RSTR_ASCII_P(s)) { + if (RSTR_SINGLE_BYTE_P(s)) { return byte_len; } else { mrb_int utf8_len = mrb_utf8_strlen(RSTR_PTR(s), byte_len); - if (byte_len == utf8_len) RSTR_SET_ASCII_FLAG(s); + mrb_assert(utf8_len <= byte_len); + if (byte_len == utf8_len) RSTR_SET_SINGLE_BYTE_FLAG(s); return utf8_len; } } @@ -316,42 +540,75 @@ utf8_strlen(mrb_value str) /* map character index to byte offset index */ static mrb_int -chars2bytes(mrb_value s, mrb_int off, mrb_int idx) +chars2bytes(mrb_value str, mrb_int off, mrb_int idx) { - if (RSTR_ASCII_P(mrb_str_ptr(s))) { + struct RString *s = mrb_str_ptr(str); + if (RSTR_SINGLE_BYTE_P(s) || RSTR_BINARY_P(s)) { return idx; } - else { - mrb_int i, b, n; - const char *p = RSTRING_PTR(s) + off; - const char *e = RSTRING_END(s); - for (b=i=0; p= slen) { - const char *pivot; - - if (memcmp(p, s, slen) == 0) { - return off; - } - - pivot = p + qstable[(unsigned char)p[slen - 1]]; - if (pivot >= pend || pivot < p /* overflowed */) { return -1; } - - do { - p += mrb_utf8len(p, pend); - off ++; - } while (p < pivot); - } - - return -1; -} - static mrb_int str_index_str_by_char(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) { - const char *p = RSTRING_PTR(str); - const char *pend = p + RSTRING_LEN(str); - const char *s = RSTRING_PTR(sub); - const mrb_int slen = RSTRING_LEN(sub); - mrb_int off = pos; + const char *ptr = RSTRING_PTR(sub); + mrb_int len = RSTRING_LEN(sub); - for (; pos > 0; pos --) { - if (pend - p < 1) { return -1; } - p += mrb_utf8len(p, pend); + if (pos > 0) { + pos = chars2bytes(str, 0, pos); } - if (slen < 1) { return off; } + pos = mrb_str_index(mrb, str, ptr, len, pos); - return str_index_str_by_char_search(mrb, p, pend, s, slen, off); + if (pos > 0) { + pos = bytes2chars(str, pos); + } + return pos; } -#define BYTES_ALIGN_CHECK(pos) if (pos < 0) return mrb_nil_value(); #else #define RSTRING_CHAR_LEN(s) RSTRING_LEN(s) -#define chars2bytes(p, off, ci) (ci) -#define bytes2chars(p, end, bi) (bi) -#define char_adjust(beg, end, ptr) (ptr) +#define chars2bytes(s, off, ci) (ci) +#define bytes2chars(s, bi) (bi) +#define char_adjust(ptr, end) (ptr) #define char_backtrack(ptr, end) ((end) - 1) -#define BYTES_ALIGN_CHECK(pos) -#define str_index_str_by_char(mrb, str, sub, pos) str_index_str(mrb, str, sub, pos) -#endif - -#ifndef MRB_QS_SHORT_STRING_LENGTH -#define MRB_QS_SHORT_STRING_LENGTH 2048 +#define str_index_str_by_char(mrb, str, sub, pos) str_index_str((mrb), (str), (sub), (pos)) #endif +/* memsearch_swar (SWAR stands for SIMD within a register) */ +/* See https://en.wikipedia.org/wiki/SWAR */ +/* The function is taken from http://0x80.pl/articles/simd-strfind.html */ +/* The original source code is under 2-clause BSD license; see LEGAL file. */ +/* The modifications: + * port from C++ to C + * returns mrb_int + * remove alignment issue + * support bigendian CPU + * fixed potential buffer overflow +*/ static inline mrb_int -mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n) +memsearch_swar(const char *xs, mrb_int m, const char *ys, mrb_int n) { - if (n + m < MRB_QS_SHORT_STRING_LENGTH) { - const unsigned char *y = ys; - const unsigned char *ye = ys+n-m+1; +#define MASK7f (MASK01*0x7f) +#define MASK80 (MASK01*0x80) +#if defined(MRB_ENDIAN_BIG) +#ifdef MRB_64BIT +#define MASKtop 0x8000000000000000ull +#else +#define MASKtop 0x80000000ul +#endif +#else +#define MASKtop 0x80 +#endif + + const bitint first = MASK01 * (uint8_t)xs[0]; + const bitint last = MASK01 * (uint8_t)xs[m-1]; + + const char *s0 = ys; + const char *s1 = ys+m-1; + + const mrb_int lim = n - m - (mrb_int)sizeof(bitint); + mrb_int i; - for (;;) { - y = (const unsigned char*)memchr(y, xs[0], (size_t)(ye-y)); - if (y == NULL) return -1; - if (memcmp(xs, y, m) == 0) { - return (mrb_int)(y - ys); + for (i=0; i < lim; i+=sizeof(bitint)) { + bitint t0, t1; + + memcpy(&t0, s0+i, sizeof(bitint)); + memcpy(&t1, s1+i, sizeof(bitint)); + + const bitint eq = (t0 ^ first) | (t1 ^ last); + bitint zeros = ((~eq & MASK7f) + MASK01) & (~eq & MASK80); + + for (size_t j = 0; zeros; j++) { + if (zeros & MASKtop) { + const mrb_int idx = i + j; + const char* p = s0 + idx + 1; + if (memcmp(p, xs + 1, m - 2) == 0) { + return idx; + } } - y++; + +#if defined(MRB_ENDIAN_BIG) + zeros <<= 8; +#else + zeros >>= 8; +#endif } - return -1; } - else { - const unsigned char *x = xs, *xe = xs + m; - const unsigned char *y = ys; - int i; - ptrdiff_t qstable[256]; - - /* Preprocessing */ - for (i = 0; i < 256; ++i) - qstable[i] = m + 1; - for (; x < xe; ++x) - qstable[*x] = xe - x; - /* Searching */ - for (; y + m <= ys + n; y += *(qstable + y[m])) { - if (*xs == *y && memcmp(xs, y, m) == 0) - return (mrb_int)(y - ys); + + if (i+m < n) { + const char *p = s0; + const char *e = ys + n; + while (p n) return -1; else if (m == n) { - return memcmp(x0, y0, m) == 0 ? 0 : -1; + return memcmp(x, y, m) == 0 ? 0 : -1; } else if (m < 1) { return 0; } else if (m == 1) { - const unsigned char *ys = (const unsigned char *)memchr(y, *x, n); + const char *p = (const char*)memchr(y, *x, n); - if (ys) - return (mrb_int)(ys - y); - else - return -1; + if (p) return (mrb_int)(p - y); + return -1; } - return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n); + return memsearch_swar(x, m, y, n); } static void @@ -522,7 +767,7 @@ str_share(mrb_state *mrb, struct RString *orig, struct RString *s) } else { if (orig->as.heap.aux.capa > orig->as.heap.len) { - orig->as.heap.ptr = (char *)mrb_realloc(mrb, orig->as.heap.ptr, len+1); + orig->as.heap.ptr = (char*)mrb_realloc(mrb, orig->as.heap.ptr, len+1); orig->as.heap.aux.capa = (mrb_ssize)len; } str_init_shared(mrb, orig, s, NULL); @@ -530,13 +775,23 @@ str_share(mrb_state *mrb, struct RString *orig, struct RString *s) } } +/* + * @param mrb The mruby state. + * @param str The original mruby string. + * @param beg The starting byte offset of the substring. + * @param len The length in bytes of the substring. + * @return A new mruby string representing the byte subsequence. + * + * Creates a new mruby string that is a subsequence of an existing string, + * based on byte offsets and length. This function may share the underlying + * buffer with the original string if possible. + */ mrb_value mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { - struct RString *orig, *s; + struct RString *orig = mrb_str_ptr(str); + struct RString *s = mrb_obj_alloc_string(mrb); - orig = mrb_str_ptr(str); - s = mrb_obj_alloc_string(mrb); if (RSTR_EMBEDDABLE_P(len)) { str_init_embed(s, RSTR_PTR(orig)+beg, len); } @@ -545,21 +800,16 @@ mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) s->as.heap.ptr += (mrb_ssize)beg; s->as.heap.len = (mrb_ssize)len; } - RSTR_COPY_ASCII_FLAG(s, orig); + RSTR_COPY_SINGLE_BYTE_FLAG(s, orig); return mrb_obj_value(s); } -static void -str_range_to_bytes(mrb_value str, mrb_int *pos, mrb_int *len) -{ - *pos = chars2bytes(str, 0, *pos); - *len = chars2bytes(str, *pos, *len); -} #ifdef MRB_UTF8_STRING static inline mrb_value str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { - str_range_to_bytes(str, &beg, &len); + beg = chars2bytes(str, 0, beg); + len = chars2bytes(str, beg, len); return mrb_str_byte_subseq(mrb, str, beg, len); } #else @@ -589,27 +839,37 @@ str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) str_subseq(mrb, str, beg, len) : mrb_nil_value(); } +/* + * @param mrb The mruby state. + * @param str The mruby string to search in. + * @param sptr A pointer to the C string to search for. + * @param slen The length of the C string to search for. + * @param offset The byte offset at which to start the search. + * @return The byte offset of the first occurrence of the substring, or -1 if not found. + * + * Finds the first occurrence of a C string within an mruby string, starting from a given offset. + * The search is performed on a byte-by-byte basis. + */ MRB_API mrb_int mrb_str_index(mrb_state *mrb, mrb_value str, const char *sptr, mrb_int slen, mrb_int offset) { - mrb_int pos; - char *s; - mrb_int len; + mrb_int len = RSTRING_LEN(str); - len = RSTRING_LEN(str); if (offset < 0) { offset += len; if (offset < 0) return -1; } if (len - offset < slen) return -1; - s = RSTRING_PTR(str); + + char *s = RSTRING_PTR(str); if (offset) { s += offset; } if (slen == 0) return offset; /* need proceed one character at a time */ len = RSTRING_LEN(str) - offset; - pos = mrb_memsearch(sptr, slen, s, len); + + mrb_int pos = mrb_memsearch(sptr, slen, s, len); if (pos < 0) return pos; return pos + offset; } @@ -617,11 +877,8 @@ mrb_str_index(mrb_state *mrb, mrb_value str, const char *sptr, mrb_int slen, mrb static mrb_int str_index_str(mrb_state *mrb, mrb_value str, mrb_value str2, mrb_int offset) { - const char *ptr; - mrb_int len; - - ptr = RSTRING_PTR(str2); - len = RSTRING_LEN(str2); + const char *ptr = RSTRING_PTR(str2); + mrb_int len = RSTRING_LEN(str2); return mrb_str_index(mrb, str, ptr, len, offset); } @@ -629,20 +886,17 @@ str_index_str(mrb_state *mrb, mrb_value str, mrb_value str2, mrb_int offset) static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2) { - size_t len; - mrb_check_frozen(mrb, s1); if (s1 == s2) return mrb_obj_value(s1); - RSTR_COPY_ASCII_FLAG(s1, s2); + RSTR_COPY_SINGLE_BYTE_FLAG(s1, s2); if (RSTR_SHARED_P(s1)) { str_decref(mrb, s1->as.heap.aux.shared); } - else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1) && !RSTR_FSHARED_P(s1) - && s1->as.heap.ptr) { + else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1) && !RSTR_FSHARED_P(s1)) { mrb_free(mrb, s1->as.heap.ptr); } - len = (size_t)RSTR_LEN(s2); + size_t len = (size_t)RSTR_LEN(s2); if (RSTR_EMBEDDABLE_P(len)) { str_init_embed(s1, RSTR_PTR(s2), len); } @@ -671,7 +925,7 @@ str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) s = sbeg + pos; t = RSTRING_PTR(sub); if (len) { - s = char_adjust(sbeg, send, s); + s = char_adjust(s, send); while (sbeg <= s) { if ((mrb_int)(send - s) >= len && memcmp(s, t, len) == 0) { return (mrb_int)(s - sbeg); @@ -686,6 +940,8 @@ str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) } #ifdef _WIN32 +#include +#include #include char* @@ -748,31 +1004,58 @@ mrb_locale_from_utf8(const char *utf8, int len) } #endif +/* + * @param mrb The mruby state. + * @param s The RString structure to modify. + * + * Prepares a string for modification. If the string is shared or not extensible, + * it will be unshared or converted to a normal string. This version preserves + * the ASCII/single-byte nature of the string if it was already set. + * Raises an error if the string is frozen. + */ MRB_API void mrb_str_modify_keep_ascii(mrb_state *mrb, struct RString *s) { mrb_check_frozen(mrb, s); - str_modify_keep_ascii(mrb, s); + str_unshare_buffer(mrb, s); } +/* + * @param mrb The mruby state. + * @param s The RString structure to modify. + * + * Prepares a string for modification. Similar to `mrb_str_modify_keep_ascii`, + * but also unsets the single-byte flag, assuming the modification might + * introduce multi-byte characters. + * Raises an error if the string is frozen. + */ MRB_API void mrb_str_modify(mrb_state *mrb, struct RString *s) { mrb_str_modify_keep_ascii(mrb, s); - RSTR_UNSET_ASCII_FLAG(s); + RSTR_UNSET_SINGLE_BYTE_FLAG(s); } +/* + * @param mrb The mruby state. + * @param str The mruby string to resize. + * @param len The new desired length of the string. + * @return The resized mruby string. + * + * Resizes an mruby string to a new length. + * If the new length is shorter, the string is truncated. + * If the new length is longer, the string is extended, and the new portion's + * content is undefined (it might be null bytes or garbage). + * The string is modified in place. + */ MRB_API mrb_value mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len) { - mrb_int slen; struct RString *s = mrb_str_ptr(str); - if (len < 0) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "negative (or overflowed) string size"); - } + str_check_length(mrb, len); mrb_str_modify(mrb, s); - slen = RSTR_LEN(s); + mrb_int slen = RSTR_LEN(s); if (len != slen) { if (slen < len || slen - len > 256) { resize_capa(mrb, s, len); @@ -783,6 +1066,21 @@ mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len) return str; } +/* + * @param mrb The mruby state. + * @param str0 The mruby string to convert. + * @return A pointer to a null-terminated C string. + * + * Converts an mruby string to a null-terminated C string. + * This function may allocate a new C string if the mruby string + * contains null bytes or is not already null-terminated. + * The caller is responsible for managing the memory of the returned C string + * if it's different from the string's internal buffer. + * Raises E_ARGUMENT_ERROR if the string contains a null byte. + * Note: This function creates a *new* RString object to hold the C-string version if modification is needed. + * It's generally recommended to use RSTRING_PTR and RSTRING_LEN for direct access + * and ensure null termination manually if needed, or use mrb_string_cstr for a (potentially new) null-terminated string. + */ MRB_API char* mrb_str_to_cstr(mrb_state *mrb, mrb_value str0) { @@ -795,6 +1093,14 @@ mrb_str_to_cstr(mrb_state *mrb, mrb_value str0) return RSTR_PTR(s); } +/* + * @param mrb The mruby state. + * @param self The mruby string to append to (modified in place). + * @param other The mruby value to append (will be converted to a string). + * + * Concatenates the string representation of `other` to `self`. + * `self` is modified in place. + */ MRB_API void mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other) { @@ -802,6 +1108,14 @@ mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other) mrb_str_cat_str(mrb, self, other); } +/* + * @param mrb The mruby state. + * @param a The first mruby string. + * @param b The second mruby string. + * @return A new mruby string that is the concatenation of `a` and `b`. + * + * Creates a new mruby string by concatenating two existing mruby strings. + */ MRB_API mrb_value mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b) { @@ -827,8 +1141,8 @@ mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b) * call-seq: * str + other_str -> new_str * - * Concatenation---Returns a new String containing - * other_str concatenated to str. + * Concatenation---Returns a new `String` containing + * `other_str` concatenated to `str`. * * "Hello from " + self.to_s #=> "Hello from main" */ @@ -859,8 +1173,7 @@ mrb_str_size(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_bytesize(mrb_state *mrb, mrb_value self) { - mrb_int len = RSTRING_LEN(self); - return mrb_int_value(mrb, len); + return mrb_int_value(mrb, RSTRING_LEN(self)); } /* 15.2.10.5.1 */ @@ -868,7 +1181,7 @@ mrb_str_bytesize(mrb_state *mrb, mrb_value self) * call-seq: * str * integer => new_str * - * Copy---Returns a new String containing integer copies of + * Copy---Returns a new `String` containing `integer` copies of * the receiver. * * "Ho! " * 3 #=> "Ho! Ho! Ho! " @@ -876,9 +1189,7 @@ mrb_str_bytesize(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_times(mrb_state *mrb, mrb_value self) { - mrb_int n, len, times; - struct RString *str2; - char *p; + mrb_int len, times; mrb_get_args(mrb, "i", ×); if (times < 0) { @@ -887,10 +1198,11 @@ mrb_str_times(mrb_state *mrb, mrb_value self) if (mrb_int_mul_overflow(RSTRING_LEN(self), times, &len)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big"); } - str2 = str_new(mrb, 0, len); - p = RSTR_PTR(str2); + + struct RString *str2 = str_new(mrb, 0, len); + char *p = RSTR_PTR(str2); if (len > 0) { - n = RSTRING_LEN(self); + mrb_int n = RSTRING_LEN(self); memcpy(p, RSTRING_PTR(self), n); while (n <= len/2) { memcpy(p + n, p, n); @@ -899,7 +1211,7 @@ mrb_str_times(mrb_state *mrb, mrb_value self) memcpy(p + n, p, len-n); } p[RSTR_LEN(str2)] = '\0'; - RSTR_COPY_ASCII_FLAG(str2, mrb_str_ptr(self)); + RSTR_COPY_SINGLE_BYTE_FLAG(str2, mrb_str_ptr(self)); return mrb_obj_value(str2); } @@ -915,18 +1227,27 @@ mrb_str_times(mrb_state *mrb, mrb_value self) * = 0 * < -1 */ +/* + * @param mrb The mruby state. + * @param str1 The first mruby string for comparison. + * @param str2 The second mruby string for comparison (must be a string). + * @return An integer less than, equal to, or greater than zero if `str1` is less than, + * equal to, or greater than `str2`, respectively. + * + * Compares two mruby strings lexicographically. + * Assumes `str2` is already a string. For a version that checks and converts, see `mrb_str_cmp_m`. + */ MRB_API int mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2) { - mrb_int len, len1, len2; - mrb_int retval; struct RString *s1 = mrb_str_ptr(str1); struct RString *s2 = mrb_str_ptr(str2); - len1 = RSTR_LEN(s1); - len2 = RSTR_LEN(s2); - len = lesser(len1, len2); - retval = memcmp(RSTR_PTR(s1), RSTR_PTR(s2), len); + mrb_int len1 = RSTR_LEN(s1); + mrb_int len2 = RSTR_LEN(s2); + mrb_int len = lesser(len1, len2); + mrb_int retval = (len == 0) ? 0 : memcmp(RSTR_PTR(s1), RSTR_PTR(s2), len); + if (retval == 0) { if (len1 == len2) return 0; if (len1 > len2) return 1; @@ -935,6 +1256,7 @@ mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2) if (retval > 0) return 1; return -1; } +#undef lesser /* 15.2.10.5.3 */ @@ -942,20 +1264,19 @@ mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2) * call-seq: * str <=> other_str => -1, 0, +1 * - * Comparison---Returns -1 if other_str is less than, 0 if - * other_str is equal to, and +1 if other_str is greater than - * str. If the strings are of different lengths, and the strings are + * Comparison---Returns -1 if `other_str` is less than, 0 if + * `other_str` is equal to, and +1 if `other_str` is greater than + * `str`. If the strings are of different lengths, and the strings are * equal when compared up to the shortest length, then the longer string is - * considered greater than the shorter one. If the variable $= is - * false, the comparison is based on comparing the binary values + * considered greater than the shorter one. If the variable `$=` is + * `false`, the comparison is based on comparing the binary values * of each character in the string. In older versions of Ruby, setting - * $= allowed case-insensitive comparisons; this is now deprecated - * in favor of using String#casecmp. + * `$=` allowed case-insensitive comparisons; this is now deprecated + * in favor of using `String#casecmp`. * - * <=> is the basis for the methods <, - * <=, >, >=, and between?, - * included from module Comparable. The method - * String#== does not use Comparable#==. + * `<=>` is the basis for the methods `<`, `<=`, `>`, `>=`, and `between?`, + * included from module `Comparable`. The method `String#==` does not use + * `Comparable#==`. * * "abcdef" <=> "abcde" #=> 1 * "abcdef" <=> "abcdef" #=> 0 @@ -966,14 +1287,12 @@ static mrb_value mrb_str_cmp_m(mrb_state *mrb, mrb_value str1) { mrb_value str2 = mrb_get_arg1(mrb); - mrb_int result; if (!mrb_string_p(str2)) { return mrb_nil_value(); } - else { - result = mrb_str_cmp(mrb, str1, str2); - } + + mrb_int result = mrb_str_cmp(mrb, str1, str2); return mrb_int_value(mrb, result); } @@ -983,11 +1302,18 @@ str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2) const mrb_int len = RSTRING_LEN(str1); if (len != RSTRING_LEN(str2)) return FALSE; - if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), (size_t)len) == 0) - return TRUE; - return FALSE; + return (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), (size_t)len) == 0); } +/* + * @param mrb The mruby state. + * @param str1 The first mruby string. + * @param str2 The second mruby value to compare with. + * @return `TRUE` if `str1` and `str2` are equal strings, `FALSE` otherwise. + * + * Checks if two mruby strings are equal. + * Returns `FALSE` if `str2` is not a string. + */ MRB_API mrb_bool mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2) { @@ -1001,10 +1327,10 @@ mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2) * str == obj => true or false * * Equality--- - * If obj is not a String, returns false. - * Otherwise, returns false or true + * If `obj` is not a `String`, returns `false`. + * Otherwise, returns `false` or `true` * - * caution:if str <=> obj returns zero. + * caution:if `str` `<=>` `obj` returns zero. */ static mrb_value mrb_str_equal_m(mrb_state *mrb, mrb_value str1) @@ -1015,6 +1341,14 @@ mrb_str_equal_m(mrb_state *mrb, mrb_value str1) } /* ---------------------------------- */ +/* + * @param mrb The mruby state. + * @param str The mruby string to duplicate. + * @return A new mruby string that is a copy of the original. + * + * Creates a new mruby string that is a duplicate of the given string. + * The new string will have its own buffer. + */ MRB_API mrb_value mrb_str_dup(mrb_state *mrb, mrb_value str) { @@ -1024,6 +1358,16 @@ mrb_str_dup(mrb_state *mrb, mrb_value str) return str_replace(mrb, dup, s); } +MRB_API mrb_value +mrb_str_dup_frozen(mrb_state *mrb, mrb_value str) +{ + if (!mrb_frozen_p(mrb_basic_ptr(str))) { + str = mrb_str_dup(mrb, str); + mrb_basic_ptr(str)->frozen = TRUE; + } + return str; +} + enum str_convert_range { /* `beg` and `len` are byte unit in `0 ... str.bytesize` */ STR_BYTE_RANGE_CORRECTED = 1, @@ -1039,32 +1383,32 @@ enum str_convert_range { }; static enum str_convert_range -str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_int *beg, mrb_int *len) +str_convert_range(mrb_state *mrb, mrb_value str, mrb_value idx, mrb_value alen, mrb_int *beg, mrb_int *len) { if (!mrb_undef_p(alen)) { - *beg = mrb_as_int(mrb, indx); + *beg = mrb_as_int(mrb, idx); *len = mrb_as_int(mrb, alen); return STR_CHAR_RANGE; } else { - switch (mrb_type(indx)) { + switch (mrb_type(idx)) { default: - indx = mrb_ensure_int_type(mrb, indx); + idx = mrb_ensure_int_type(mrb, idx); /* fall through */ case MRB_TT_INTEGER: - *beg = mrb_integer(indx); + *beg = mrb_integer(idx); *len = 1; return STR_CHAR_RANGE; case MRB_TT_STRING: - *beg = str_index_str(mrb, str, indx, 0); + *beg = str_index_str(mrb, str, idx, 0); if (*beg < 0) { break; } - *len = RSTRING_LEN(indx); + *len = RSTRING_LEN(idx); return STR_BYTE_RANGE_CORRECTED; case MRB_TT_RANGE: *len = RSTRING_CHAR_LEN(str); - switch (mrb_range_beg_len(mrb, indx, beg, len, *len, TRUE)) { + switch (mrb_range_beg_len(mrb, idx, beg, len, *len, TRUE)) { case MRB_RANGE_OK: return STR_CHAR_RANGE_CORRECTED; case MRB_RANGE_OUT: @@ -1077,12 +1421,25 @@ str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, return STR_OUT_OF_RANGE; } +/* + * @param mrb The mruby state. + * @param str The mruby string. + * @param idx The index or range. Can be an integer, a string, or a range. + * @param alen An optional length (if `idx` is an integer). + * @return A new mruby string (substring), or nil if out of bounds or not found. + * + * Implements string element reference (e.g., `str[idx]`, `str[idx, len]`). + * - If `idx` is an Integer, returns a substring of 1 character at that index (or `len` characters if `alen` is provided). + * - If `idx` is a String, returns that string if it's a substring of `str`. + * - If `idx` is a Range, returns the substring specified by the range. + * Character indexing is used if UTF-8 is enabled, otherwise byte indexing. + */ mrb_value -mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen) +mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value idx, mrb_value alen) { mrb_int beg, len; - switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) { + switch (str_convert_range(mrb, str, idx, alen, &beg, &len)) { case STR_CHAR_RANGE_CORRECTED: return str_subseq(mrb, str, beg, len); case STR_CHAR_RANGE: @@ -1090,8 +1447,8 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen) if (mrb_undef_p(alen) && !mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value(); return str; case STR_BYTE_RANGE_CORRECTED: - if (mrb_string_p(indx)) { - return mrb_str_dup(mrb, indx); + if (mrb_string_p(idx)) { + return mrb_str_dup(mrb, idx); } else { return mrb_str_byte_subseq(mrb, str, beg, len); @@ -1115,17 +1472,17 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen) * str.slice(range) => new_str or nil * str.slice(other_str) => new_str or nil * - * Element Reference---If passed a single Integer, returns the code - * of the character at that position. If passed two Integer + * Element Reference---If passed a single `Integer`, returns the code + * of the character at that position. If passed two `Integer` * objects, returns a substring starting at the offset given by the first, and * a length given by the second. If given a range, a substring containing * characters at offsets given by the range is returned. In all three cases, if - * an offset is negative, it is counted from the end of str. Returns - * nil if the initial offset falls outside the string, the length + * an offset is negative, it is counted from the end of *str*. Returns + * `nil` if the initial offset falls outside the string, the length * is negative, or the beginning of the range is greater than the end. * - * If a String is given, that string is returned if it occurs in - * str. In both cases, nil is returned if there is no + * If a `String` is given, that string is returned if it occurs in + * *str*. In both cases, `nil` is returned if there is no * match. * * a = "hello there" @@ -1209,7 +1566,7 @@ str_escape(mrb_state *mrb, mrb_value str, mrb_bool inspect) char buf[4]; /* `\x??` or UTF-8 character */ mrb_value result = mrb_str_new_lit(mrb, "\""); #ifdef MRB_UTF8_STRING - uint32_t ascii_flag = MRB_STR_ASCII; + uint32_t sb_flag = MRB_STR_SINGLE_BYTE; #endif p = RSTRING_PTR(str); pend = RSTRING_END(str); @@ -1219,14 +1576,9 @@ str_escape(mrb_state *mrb, mrb_value str, mrb_bool inspect) if (inspect) { mrb_int clen = mrb_utf8len(p, pend); if (clen > 1) { - mrb_int i; - - for (i=0; iflags |= ascii_flag; - mrb_str_ptr(result)->flags |= ascii_flag; + mrb_str_ptr(str)->flags |= sb_flag; + mrb_str_ptr(result)->flags |= sb_flag; } else { - RSTR_SET_ASCII_FLAG(mrb_str_ptr(result)); + RSTR_SET_SINGLE_BYTE_FLAG(mrb_str_ptr(result)); } #endif @@ -1283,12 +1632,12 @@ str_escape(mrb_state *mrb, mrb_value str, mrb_bool inspect) } static void -mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_value replace) +mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value idx, mrb_value alen, mrb_value replace) { mrb_int beg, len, charlen; mrb_ensure_string_type(mrb, replace); - switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) { + switch (str_convert_range(mrb, str, idx, alen, &beg, &len)) { case STR_OUT_OF_RANGE: default: mrb_raise(mrb, E_INDEX_ERROR, "string not matched"); @@ -1298,10 +1647,11 @@ mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_ } charlen = RSTRING_CHAR_LEN(str); if (beg < 0) { beg += charlen; } - if (beg < 0 || beg > charlen) { str_out_of_index(mrb, indx); } + if (beg < 0 || beg > charlen) { str_out_of_index(mrb, idx); } /* fall through */ case STR_CHAR_RANGE_CORRECTED: - str_range_to_bytes(str, &beg, &len); + beg = chars2bytes(str, 0, beg); + len = chars2bytes(str, beg, len); /* fall through */ case STR_BYTE_RANGE_CORRECTED: if (mrb_int_add_overflow(beg, len, &len)) { @@ -1318,15 +1668,16 @@ mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_ * str[range] = replace * str[other_str] = replace * - * Modify +self+ by replacing the content of +self+. + * Modify `self` by replacing the content of `self`. * The portion of the string affected is determined using the same criteria as +String#[]+. + * The return value of this expression is `replace`. */ static mrb_value mrb_str_aset_m(mrb_state *mrb, mrb_value str) { - mrb_value indx, alen, replace; + mrb_value idx, alen, replace; - switch (mrb_get_args(mrb, "oo|S!", &indx, &alen, &replace)) { + switch (mrb_get_args(mrb, "oo|S!", &idx, &alen, &replace)) { case 2: replace = alen; alen = mrb_undef_value(); @@ -1334,8 +1685,8 @@ mrb_str_aset_m(mrb_state *mrb, mrb_value str) case 3: break; } - mrb_str_aset(mrb, str, indx, alen, replace); - return str; + mrb_str_aset(mrb, str, idx, alen, replace); + return replace; } /* 15.2.10.5.8 */ @@ -1343,8 +1694,8 @@ mrb_str_aset_m(mrb_state *mrb, mrb_value str) * call-seq: * str.capitalize! => str or nil * - * Modifies str by converting the first character to uppercase and the - * remainder to lowercase. Returns nil if no changes are made. + * Modifies *str* by converting the first character to uppercase and the + * remainder to lowercase. Returns `nil` if no changes are made. * * a = "hello" * a.capitalize! #=> "Hello" @@ -1381,7 +1732,7 @@ mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str) * call-seq: * str.capitalize => new_str * - * Returns a copy of str with the first character converted to uppercase + * Returns a copy of *str* with the first character converted to uppercase * and the remainder to lowercase. * * "hello".capitalize #=> "Hello" @@ -1391,9 +1742,7 @@ mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_capitalize(mrb_state *mrb, mrb_value self) { - mrb_value str; - - str = mrb_str_dup(mrb, self); + mrb_value str = mrb_str_dup(mrb, self); mrb_str_capitalize_bang(mrb, str); return str; } @@ -1403,23 +1752,18 @@ mrb_str_capitalize(mrb_state *mrb, mrb_value self) * call-seq: * str.chomp!(separator="\n") => str or nil * - * Modifies str in place as described for String#chomp, - * returning str, or nil if no modifications were made. + * Modifies *str* in place as described for `String#chomp`, + * returning *str*, or `nil` if no modifications were made. */ static mrb_value mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) { mrb_value rs; - mrb_int newline; - char *p, *pp; - mrb_int rslen; - mrb_int len; - mrb_int argc; + mrb_int argc = mrb_get_args(mrb, "|S", &rs); struct RString *s = mrb_str_ptr(str); - argc = mrb_get_args(mrb, "|S", &rs); mrb_str_modify_keep_ascii(mrb, s); - len = RSTR_LEN(s); + mrb_int len = RSTR_LEN(s); if (argc == 0) { if (len == 0) return mrb_nil_value(); smart_chomp: @@ -1441,8 +1785,8 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) } if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value(); - p = RSTR_PTR(s); - rslen = RSTRING_LEN(rs); + char *p = RSTR_PTR(s); + mrb_int rslen = RSTRING_LEN(rs); if (rslen == 0) { while (len>0 && p[len-1] == '\n') { len--; @@ -1457,13 +1801,13 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) return mrb_nil_value(); } if (rslen > len) return mrb_nil_value(); - newline = RSTRING_PTR(rs)[rslen-1]; + mrb_int newline = RSTRING_PTR(rs)[rslen-1]; if (rslen == 1 && newline == '\n') newline = RSTRING_PTR(rs)[rslen-1]; if (rslen == 1 && newline == '\n') goto smart_chomp; - pp = p + len - rslen; + char *pp = p + len - rslen; if (p[len-1] == newline && (rslen <= 1 || memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) { @@ -1479,10 +1823,10 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) * call-seq: * str.chomp(separator="\n") => new_str * - * Returns a new String with the given record separator removed - * from the end of str (if present). chomp also removes - * carriage return characters (that is it will remove \n, - * \r, and \r\n). + * Returns a new `String` with the given record separator removed + * from the end of *str* (if present). `chomp` also removes + * carriage return characters (that is it will remove `\n`, + * `\r`, and `\r\n`). * * "hello".chomp #=> "hello" * "hello\n".chomp #=> "hello" @@ -1495,9 +1839,7 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_chomp(mrb_state *mrb, mrb_value self) { - mrb_value str; - - str = mrb_str_dup(mrb, self); + mrb_value str = mrb_str_dup(mrb, self); mrb_str_chomp_bang(mrb, str); return str; } @@ -1507,9 +1849,9 @@ mrb_str_chomp(mrb_state *mrb, mrb_value self) * call-seq: * str.chop! => str or nil * - * Processes str as for String#chop, returning str, - * or nil if str is the empty string. See also - * String#chomp!. + * Processes *str* as for `String#chop`, returning *str*, + * or `nil` if *str* is the empty string. See also + * `String#chomp!`. */ static mrb_value mrb_str_chop_bang(mrb_state *mrb, mrb_value str) @@ -1549,10 +1891,10 @@ mrb_str_chop_bang(mrb_state *mrb, mrb_value str) * call-seq: * str.chop => new_str * - * Returns a new String with the last character removed. If the - * string ends with \r\n, both characters are removed. Applying - * chop to an empty string returns an empty - * string. String#chomp is often a safer alternative, as it leaves + * Returns a new `String` with the last character removed. If the + * string ends with `\r\n`, both characters are removed. Applying + * `chop` to an empty string returns an empty + * string. `String#chomp` is often a safer alternative, as it leaves * the string unchanged if it doesn't end in a record separator. * * "string\r\n".chop #=> "string" @@ -1564,8 +1906,7 @@ mrb_str_chop_bang(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_chop(mrb_state *mrb, mrb_value self) { - mrb_value str; - str = mrb_str_dup(mrb, self); + mrb_value str = mrb_str_dup(mrb, self); mrb_str_chop_bang(mrb, str); return str; } @@ -1575,7 +1916,7 @@ mrb_str_chop(mrb_state *mrb, mrb_value self) * call-seq: * str.downcase! => str or nil * - * Downcases the contents of str, returning nil if no + * Downcases the contents of *str*, returning `nil` if no * changes were made. */ static mrb_value @@ -1605,7 +1946,7 @@ mrb_str_downcase_bang(mrb_state *mrb, mrb_value str) * call-seq: * str.downcase => new_str * - * Returns a copy of str with all uppercase letters replaced with their + * Returns a copy of *str* with all uppercase letters replaced with their * lowercase counterparts. The operation is locale insensitive---only * characters 'A' to 'Z' are affected. * @@ -1614,9 +1955,7 @@ mrb_str_downcase_bang(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_downcase(mrb_state *mrb, mrb_value self) { - mrb_value str; - - str = mrb_str_dup(mrb, self); + mrb_value str = mrb_str_dup(mrb, self); mrb_str_downcase_bang(mrb, str); return str; } @@ -1626,7 +1965,7 @@ mrb_str_downcase(mrb_state *mrb, mrb_value self) * call-seq: * str.empty? => true or false * - * Returns true if str has a length of zero. + * Returns `true` if *str* has a length of zero. * * "hello".empty? #=> false * "".empty? #=> true @@ -1650,13 +1989,23 @@ static mrb_value mrb_str_eql(mrb_state *mrb, mrb_value self) { mrb_value str2 = mrb_get_arg1(mrb); - mrb_bool eql_p; - - eql_p = (mrb_string_p(str2)) && str_eql(mrb, self, str2); + mrb_bool eql_p = (mrb_string_p(str2)) && str_eql(mrb, self, str2); return mrb_bool_value(eql_p); } +/* + * @param mrb The mruby state. + * @param str The mruby string from which to take a substring. + * @param beg The starting character index of the substring. + * @param len The length in characters of the substring. + * @return A new mruby string representing the substring, or nil if out of bounds. + * + * Creates a new mruby string that is a substring of an existing string. + * This function considers character indices (which might differ from byte indices + * if UTF-8 is enabled) and length. + * Handles negative indices and adjusts length to fit within string boundaries. + */ MRB_API mrb_value mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { @@ -1664,7 +2013,7 @@ mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) } /* - * 32 bit magic FNV-0 and FNV-1 prime + * 32-bit magic FNV-0 and FNV-1 prime b */ #define FNV_32_PRIME ((uint32_t)0x01000193) #define FNV1_32_INIT ((uint32_t)0x811c9dc5) @@ -1675,18 +2024,18 @@ mrb_byte_hash_step(const uint8_t *s, mrb_int len, uint32_t hval) const uint8_t *send = s + len; /* - * FNV-1 hash each octet in the buffer + * FNV-1a hash each octet in the buffer */ while (s < send) { - /* multiply by the 32 bit FNV magic prime mod 2^32 */ + /* xor the bottom with the current octet */ + hval ^= (uint32_t)*s++; + + /* multiply by the 32-bit FNV magic prime mod 2^32 */ #if defined(NO_FNV_GCC_OPTIMIZATION) hval *= FNV_32_PRIME; #else hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24); #endif - - /* xor the bottom with the current octet */ - hval ^= (uint32_t)*s++; } /* return our new hash value */ @@ -1726,7 +2075,7 @@ mrb_str_hash_m(mrb_state *mrb, mrb_value self) * str.include? other_str => true or false * str.include? int => true or false * - * Returns true if str contains the given string or + * Returns `true` if *str* contains the given string or * character. * * "hello".include? "lo" #=> true @@ -1748,8 +2097,8 @@ mrb_str_include(mrb_state *mrb, mrb_value self) * call-seq: * str.byteindex(substring, offset = 0) -> integer or nil * - * Returns the \Integer byte-based index of the first occurrence of the given +substring+, - * or +nil+ if none found: + * Returns the \Integer byte-based index of the first occurrence of the given `substring`, + * or `nil` if none found: * * 'foo'.byteindex('f') # => 0 * 'foo'.byteindex('oo') # => 1 @@ -1782,7 +2131,7 @@ mrb_str_byteindex_m(mrb_state *mrb, mrb_value str) * str.index(substring [, offset]) => int or nil * * Returns the index of the first occurrence of the given - * substring. Returns nil if not found. + * *substring*. Returns `nil` if not found. * If the second parameter is present, it * specifies the position in the string to begin the search. * @@ -1795,7 +2144,7 @@ mrb_str_byteindex_m(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_index_m(mrb_state *mrb, mrb_value str) { - if (RSTR_ASCII_P(mrb_str_ptr(str))) { + if (RSTR_SINGLE_BYTE_P(mrb_str_ptr(str))) { return mrb_str_byteindex_m(mrb, str); } @@ -1844,7 +2193,7 @@ mrb_str_replace(mrb_state *mrb, mrb_value str) * call-seq: * String.new(str="") => new_str * - * Returns a new string object containing a copy of str. + * Returns a new string object containing a copy of *str*. */ static mrb_value mrb_str_init(mrb_state *mrb, mrb_value self) @@ -1852,8 +2201,7 @@ mrb_str_init(mrb_state *mrb, mrb_value self) mrb_value str2; if (mrb_get_args(mrb, "|S", &str2) == 0) { - struct RString *s = str_new(mrb, 0, 0); - str2 = mrb_obj_value(s); + str2 = mrb_str_new(mrb, 0, 0); } str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2)); return self; @@ -1866,7 +2214,7 @@ mrb_str_init(mrb_state *mrb, mrb_value self) * str.intern => symbol * str.to_sym => symbol * - * Returns the Symbol corresponding to str, creating the + * Returns the `Symbol` corresponding to *str*, creating the * symbol if it did not previously exist. * * "Koala".intern #=> :Koala @@ -1876,16 +2224,35 @@ mrb_str_init(mrb_state *mrb, mrb_value self) * s == :@cat #=> true * * This can also be used to create symbols that cannot be represented using the - * :xxx notation. + * `:xxx` notation. * * 'cat and dog'.to_sym #=> :"cat and dog" */ +/* + * @param mrb The mruby state. + * @param self The mruby string to convert to a symbol. + * @return The mruby symbol corresponding to the string. + * + * Converts a mruby string to a symbol. If the symbol does not exist, it is created. + */ MRB_API mrb_value mrb_str_intern(mrb_state *mrb, mrb_value self) { return mrb_symbol_value(mrb_intern_str(mrb, self)); } /* ---------------------------------- */ +/* + * @param mrb The mruby state. + * @param obj The mruby value to convert to a string. + * @return The string representation of the mruby value. + * + * Converts any mruby object to its string representation. + * For strings, it returns the object itself. + * For symbols, it returns the symbol's name as a string. + * For integers, it converts the integer to a string (base 10). + * For classes/modules, it returns their name. + * For other types, it calls the `to_s` method on the object. + */ MRB_API mrb_value mrb_obj_as_string(mrb_state *mrb, mrb_value obj) { @@ -1905,6 +2272,13 @@ mrb_obj_as_string(mrb_state *mrb, mrb_value obj) } } +/* + * @param mrb The mruby state. + * @param p The pointer to convert. + * @return A new mruby string representing the pointer address. + * + * Converts a C pointer to a mruby string representation (e.g., "0x..."). + */ MRB_API mrb_value mrb_ptr_to_str(mrb_state *mrb, void *p) { @@ -1952,7 +2326,7 @@ str_reverse(char *p, char *e) * call-seq: * str.reverse! => str * - * Reverses str in place. + * Reverses *str* in place. */ static mrb_value mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) @@ -1997,7 +2371,7 @@ mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) * call-seq: * str.reverse => new_str * - * Returns a new string with the characters from str in reverse order. + * Returns a new string with the characters from *str* in reverse order. * * "stressed".reverse #=> "desserts" */ @@ -2013,8 +2387,8 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str) * call-seq: * byterindex(substring, offset = self.bytesize) -> integer or nil * - * Returns the \Integer byte-based index of the _last_ occurrence of the given +substring+, - * or +nil+ if none found: + * Returns the \Integer byte-based index of the _last_ occurrence of the given `substring`, + * or `nil` if none found: * * 'foo'.byterindex('f') # => 0 * 'foo'.byterindex('o') # => 2 @@ -2024,9 +2398,9 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_byterindex_m(mrb_state *mrb, mrb_value str) { + mrb_int len = RSTRING_LEN(str); mrb_value sub; mrb_int pos; - mrb_int len = RSTRING_LEN(str); if (mrb_get_args(mrb, "S|i", &sub, &pos) == 1) { pos = len; @@ -2052,8 +2426,8 @@ mrb_str_byterindex_m(mrb_state *mrb, mrb_value str) * call-seq: * str.rindex(substring [, offset]) => int or nil * - * Returns the index of the last occurrence of the given substring. - * Returns nil if not found. If the second parameter is + * Returns the index of the last occurrence of the given *substring*. + * Returns `nil` if not found. If the second parameter is * present, it specifies the position in the string to end the * search---characters beyond this point will not be considered. * @@ -2066,7 +2440,7 @@ mrb_str_byterindex_m(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_rindex_m(mrb_state *mrb, mrb_value str) { - if (RSTR_ASCII_P(mrb_str_ptr(str))) { + if (RSTR_SINGLE_BYTE_P(mrb_str_ptr(str))) { return mrb_str_byterindex_m(mrb, str); } @@ -2074,8 +2448,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) mrb_int pos; if (mrb_get_args(mrb, "S|i", &sub, &pos) == 1) { - pos = RSTRING_CHAR_LEN(str); - pos = chars2bytes(str, 0, pos); + pos = RSTRING_LEN(str); } else if (pos >= 0) { pos = chars2bytes(str, 0, pos); @@ -2091,8 +2464,8 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) } pos = str_rindex(mrb, str, sub, pos); if (pos >= 0) { - pos = bytes2chars(RSTRING_PTR(str), RSTRING_LEN(str), pos); - BYTES_ALIGN_CHECK(pos); + pos = bytes2chars(str, pos); + if (pos < 0) return mrb_nil_value(); return mrb_int_value(mrb, pos); } return mrb_nil_value(); @@ -2107,20 +2480,20 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) * call-seq: * str.split(separator=nil, [limit]) => anArray * - * Divides str into substrings based on a delimiter, returning an array + * Divides *str* into substrings based on a delimiter, returning an array * of these substrings. * - * If separator is a String, then its contents are used as - * the delimiter when splitting str. If separator is a single - * space, str is split on whitespace, with leading whitespace and runs + * If *separator* is a `String`, then its contents are used as + * the delimiter when splitting *str*. If *separator* is a single + * space, *str* is split on whitespace, with leading whitespace and runs * of contiguous whitespace characters ignored. * - * If separator is omitted or nil (which is the default), - * str is split on whitespace as if ' ' were specified. + * If *separator* is omitted or `nil` (which is the default), + * *str* is split on whitespace as if ' ' were specified. * - * If the limit parameter is omitted, trailing null fields are - * suppressed. If limit is a positive number, at most that number of - * fields will be returned (if limit is 1, the entire + * If the *limit* parameter is omitted, trailing null fields are + * suppressed. If *limit* is a positive number, at most that number of + * fields will be returned (if *limit* is `1`, the entire * string is returned as the only entry in an array). If negative, there is no * limit to the number of fields returned, and trailing null fields are not * suppressed. @@ -2137,18 +2510,14 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_split_m(mrb_state *mrb, mrb_value str) { - mrb_int argc; mrb_value spat = mrb_nil_value(); enum {awk, string} split_type = string; mrb_int i = 0; - mrb_int beg; - mrb_int end; mrb_int lim = 0; - mrb_bool lim_p; - mrb_value result, tmp; + mrb_value tmp; - argc = mrb_get_args(mrb, "|oi", &spat, &lim); - lim_p = (lim > 0 && argc == 2); + mrb_int argc = mrb_get_args(mrb, "|oi", &spat, &lim); + mrb_bool lim_p = (lim > 0 && argc == 2); if (argc == 2) { if (lim == 1) { if (RSTRING_LEN(str) == 0) @@ -2168,16 +2537,16 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) split_type = awk; } - result = mrb_ary_new(mrb); - beg = 0; + mrb_value result = mrb_ary_new(mrb); + mrb_int beg = 0; if (split_type == awk) { mrb_bool skip = TRUE; - mrb_int idx = 0; mrb_int str_len = RSTRING_LEN(str); - unsigned int c; + mrb_int idx = beg; + mrb_int end = beg; int ai = mrb_gc_arena_save(mrb); + unsigned int c; - idx = end = beg; while (idx < str_len) { c = (unsigned char)RSTRING_PTR(str)[idx++]; if (skip) { @@ -2195,7 +2564,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) mrb_gc_arena_restore(mrb, ai); skip = TRUE; beg = idx; - if (lim_p) ++i; + if (lim_p) i++; } else { end = idx; @@ -2209,6 +2578,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) int ai = mrb_gc_arena_save(mrb); while (idx < str_len) { + mrb_int end; if (pat_len > 0) { end = mrb_memsearch(RSTRING_PTR(spat), pat_len, RSTRING_PTR(str)+idx, str_len - idx); if (end < 0) break; @@ -2373,7 +2743,7 @@ mrb_str_len_to_integer(mrb_state *mrb, const char *str, size_t len, mrb_int base #ifdef MRB_USE_BIGINT p2 = p; #endif - for ( ;p integer * - * Returns the result of interpreting leading characters in str as an - * integer base base (between 2 and 36). Extraneous characters past the + * Returns the result of interpreting leading characters in *str* as an + * integer base *base* (between 2 and 36). Extraneous characters past the * end of a valid number are ignored. If there is not a valid number at the - * start of str, 0 is returned. This method never raises an + * start of *str*, `0` is returned. This method never raises an * exception. * * "12345".to_i #=> 12345 @@ -2505,6 +2909,7 @@ mrb_str_to_i(mrb_state *mrb, mrb_value self) } #ifndef MRB_NO_FLOAT +/* Internal helper for mrb_str_to_dbl */ static double mrb_str_len_to_dbl(mrb_state *mrb, const char *s, size_t len, mrb_bool badcheck) { @@ -2577,14 +2982,13 @@ mrb_str_len_to_dbl(mrb_state *mrb, const char *s, size_t len, mrb_bool badcheck) p = buf; pend = n; nocopy: - d = mrb_float_read(p, &end); - if (p == end) { + if (mrb_read_float(p, &end, &d) == FALSE) { if (badcheck) { bad: mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%!s)", s); /* not reached */ } - return d; + return 0.0; } if (badcheck) { if (!end || p == end) goto bad; @@ -2594,6 +2998,17 @@ mrb_str_len_to_dbl(mrb_state *mrb, const char *s, size_t len, mrb_bool badcheck) return d; } +/* + * @param mrb The mruby state. + * @param str The mruby string to convert. + * @param badcheck If `TRUE`, raise an error on invalid input; otherwise, return 0.0. + * @return A C double value. + * + * Converts an mruby string to a C double. + * Interprets leading characters in `str` as a floating-point number. + * If `badcheck` is true, invalid characters will raise an `E_ARGUMENT_ERROR`. + * Otherwise, extraneous characters are ignored, and 0.0 is returned for invalid numbers. + */ MRB_API double mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck) { @@ -2605,10 +3020,10 @@ mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck) * call-seq: * str.to_f => float * - * Returns the result of interpreting leading characters in str as a + * Returns the result of interpreting leading characters in *str* as a * floating-point number. Extraneous characters past the end of a valid number - * are ignored. If there is not a valid number at the start of str, - * 0.0 is returned. This method never raises an exception. + * are ignored. If there is not a valid number at the start of *str*, + * `0.0` is returned. This method never raises an exception. * * "123.45e1".to_f #=> 1234.5 * "45.67 degrees".to_f #=> 45.67 @@ -2642,7 +3057,7 @@ mrb_str_to_s(mrb_state *mrb, mrb_value self) * call-seq: * str.upcase! => str or nil * - * Upcases the contents of str, returning nil if no changes + * Upcases the contents of *str*, returning `nil` if no changes * were made. */ static mrb_value @@ -2672,7 +3087,7 @@ mrb_str_upcase_bang(mrb_state *mrb, mrb_value str) * call-seq: * str.upcase => new_str * - * Returns a copy of str with all lowercase letters replaced with their + * Returns a copy of *str* with all lowercase letters replaced with their * uppercase counterparts. The operation is locale insensitive---only * characters 'a' to 'z' are affected. * @@ -2681,9 +3096,7 @@ mrb_str_upcase_bang(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_upcase(mrb_state *mrb, mrb_value self) { - mrb_value str; - - str = mrb_str_dup(mrb, self); + mrb_value str = mrb_str_dup(mrb, self); mrb_str_upcase_bang(mrb, str); return str; } @@ -2692,8 +3105,8 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self) * call-seq: * str.dump -> new_str * - * Produces a version of str with all nonprinting characters replaced by - * \nnn notation and all special characters escaped. + * Produces a version of *str* with all nonprinting characters replaced by + * `\nnn` notation and all special characters escaped. */ mrb_value mrb_str_dump(mrb_state *mrb, mrb_value str) @@ -2701,12 +3114,21 @@ mrb_str_dump(mrb_state *mrb, mrb_value str) return str_escape(mrb, str, FALSE); } +/* + * @param mrb The mruby state. + * @param str The mruby string to append to (modified in place). + * @param ptr A pointer to the C string to append. + * @param len The length of the C string to append. + * @return The modified mruby string `str`. + * + * Appends a C string of a given length to an mruby string. + * The mruby string `str` is modified in place. Handles resizing and + * potential overlap if `ptr` is within `str`'s buffer. + */ MRB_API mrb_value mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len) { struct RString *s = mrb_str_ptr(str); - mrb_int capa; - mrb_int total; ptrdiff_t off = -1; if (len == 0) return str; @@ -2715,7 +3137,8 @@ mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len) off = ptr - RSTR_PTR(s); } - capa = RSTR_CAPA(s); + mrb_int capa = RSTR_CAPA(s); + mrb_int total; if (mrb_int_add_overflow(RSTR_LEN(s), len, &total)) { size_error: mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); @@ -2736,12 +3159,30 @@ mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len) return str; } +/* + * @param mrb The mruby state. + * @param str The mruby string to append to (modified in place). + * @param ptr A pointer to the null-terminated C string to append. + * @return The modified mruby string `str`. + * + * Appends a null-terminated C string to an mruby string. + * The mruby string `str` is modified in place. + */ MRB_API mrb_value mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr) { return mrb_str_cat(mrb, str, ptr, ptr ? strlen(ptr) : 0); } +/* + * @param mrb The mruby state. + * @param str The mruby string to append to (modified in place). + * @param str2 The mruby string to append. + * @return The modified mruby string `str`. + * + * Appends an mruby string (`str2`) to another mruby string (`str`). + * The mruby string `str` is modified in place. Handles self-appendage. + */ MRB_API mrb_value mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2) { @@ -2751,6 +3192,17 @@ mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2) return mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2)); } +/* + * @param mrb The mruby state. + * @param str1 The mruby string to append to (modified in place). + * @param str2 The mruby value to append (will be converted to a string if not already one). + * @return The modified mruby string `str1`. + * + * Appends an mruby value (`str2`) to an mruby string (`str1`). + * `str2` is first ensured to be a string (converted if necessary). + * Then, `str1` is modified in place. This is similar to `mrb_str_concat` + * but `mrb_str_concat` takes `self` and `other` as parameters. + */ MRB_API mrb_value mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2) { @@ -2769,6 +3221,15 @@ mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2) * str[3] = "\b" * str.inspect #=> "\"hel\\bo\"" */ +/* + * @param mrb The mruby state. + * @param str The mruby string to inspect. + * @return A new mruby string that is the inspect-representation of `str`. + * + * Returns a human-readable, printable version of the string, typically + * surrounded by quotes and with special characters escaped. + * UTF-8 characters are preserved if `MRB_UTF8_STRING` is defined and `inspect` is true. + */ mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str) { @@ -2789,7 +3250,7 @@ mrb_str_bytes(mrb_state *mrb, mrb_value str) { struct RString *s = mrb_str_ptr(str); mrb_value a = mrb_ary_new_capa(mrb, RSTR_LEN(s)); - unsigned char *p = (unsigned char *)(RSTR_PTR(s)), *pend = p + RSTR_LEN(s); + unsigned char *p = (unsigned char*)(RSTR_PTR(s)), *pend = p + RSTR_LEN(s); while (p < pend) { mrb_ary_push(mrb, a, mrb_fixnum_value(p[0])); @@ -2802,7 +3263,7 @@ mrb_str_bytes(mrb_state *mrb, mrb_value str) * call-seq: * str.getbyte(index) -> 0 .. 255 * - * returns the indexth byte as an integer. + * returns the *index*th byte as an integer. */ static mrb_value mrb_str_getbyte(mrb_state *mrb, mrb_value str) @@ -2822,17 +3283,16 @@ mrb_str_getbyte(mrb_state *mrb, mrb_value str) * call-seq: * str.setbyte(index, integer) -> integer * - * modifies the indexth byte as integer. + * modifies the *index*th byte as *integer*. */ static mrb_value mrb_str_setbyte(mrb_state *mrb, mrb_value str) { mrb_int pos, byte; - mrb_int len; mrb_get_args(mrb, "ii", &pos, &byte); - len = RSTRING_LEN(str); + mrb_int len = RSTRING_LEN(str); if (pos < -len || len <= pos) mrb_raisef(mrb, E_INDEX_ERROR, "index %i out of string", pos); if (pos < 0) @@ -2855,8 +3315,8 @@ mrb_str_setbyte(mrb_state *mrb, mrb_value str) * objects, returns a substring starting at the offset given by the first, and * a length given by the second. If given a Range, a substring containing * bytes at offsets given by the range is returned. In all three cases, if - * an offset is negative, it is counted from the end of str. Returns - * nil if the initial offset falls outside the string, the length + * an offset is negative, it is counted from the end of *str*. Returns + * `nil` if the initial offset falls outside the string, the length * is negative, or the beginning of the range is greater than the end. * The encoding of the resulted string keeps original encoding. * @@ -2911,10 +3371,12 @@ sub_replace(mrb_state *mrb, mrb_value self) char *p, *match; mrb_int plen, mlen; mrb_int found, offset; - mrb_value result; mrb_get_args(mrb, "ssi", &p, &plen, &match, &mlen, &found); - result = mrb_str_new(mrb, 0, 0); + if (found < 0 || RSTRING_LEN(self) < found) { + mrb_raise(mrb, E_RUNTIME_ERROR, "argument out of range"); + } + mrb_value result = mrb_str_new(mrb, 0, 0); for (mrb_int i=0; i= len2) { + memmove(RSTR_PTR(s)+idx1, RSTRING_PTR(replace)+idx2, len2); + if (len1 > len2) { + memmove(RSTR_PTR(s)+idx1+len2, RSTR_PTR(s)+idx1+len1, RSTR_LEN(s)-(idx1+len1)); + RSTR_SET_LEN(s, RSTR_LEN(s)-(len1-len2)); + } + } + else { /* len1 < len2 */ + mrb_int slen = RSTR_LEN(s); + mrb_str_resize(mrb, str, slen+len2-len1); + memmove(RSTR_PTR(s)+idx1+len2, RSTR_PTR(s)+idx1+len1, slen-(idx1+len1)); + memmove(RSTR_PTR(s)+idx1, RSTRING_PTR(replace)+idx2, len2); + } + return str; +} + +/* + * call-seq: + * bytesplice(index, length, str) -> string + * bytesplice(index, length, str, str_index, str_length) -> string + * bytesplice(range, str) -> string + * bytesplice(range, str, str_range) -> string + * + * Replaces some or all of the content of `self` with `str`, and returns `self`. + * The portion of the string affected is determined using + * the same criteria as String#byteslice, except that `length` cannot be omitted. + * If the replacement string is not the same length as the text it is replacing, + * the string will be adjusted accordingly. + * + * If `str_index` and `str_length`, or `str_range` are given, the content of `self` + * is replaced by str.byteslice(str_index, str_length) or str.byteslice(str_range); + * however the substring of `str` is not allocated as a new string. + * + * The form that take an Integer will raise an IndexError if the value is out + * of range; the Range form will raise a RangeError. + * If the beginning or ending offset does not land on character (codepoint) + * boundary, an IndexError will be raised. + */ +static mrb_value +mrb_str_bytesplice(mrb_state *mrb, mrb_value str) +{ + mrb_int idx1, len1, idx2, len2; + mrb_value range1, range2, replace; + switch (mrb_get_argc(mrb)) { + case 3: + mrb_get_args(mrb, "ooo", &range1, &replace, &range2); + if (mrb_integer_p(range1)) { + mrb_get_args(mrb, "iiS", &idx1, &len1, &replace); + return str_bytesplice(mrb, str, idx1, len1, replace, 0, RSTRING_LEN(replace)); + } + mrb_ensure_string_type(mrb, replace); + if (mrb_range_beg_len(mrb, range1, &idx1, &len1, RSTRING_LEN(str), FALSE) != MRB_RANGE_OK) break; + if (mrb_range_beg_len(mrb, range2, &idx2, &len2, RSTRING_LEN(replace), FALSE) != MRB_RANGE_OK) break; + return str_bytesplice(mrb, str, idx1, len1, replace, idx2, len2); + case 5: + mrb_get_args(mrb, "iiSii", &idx1, &len1, &replace, &idx2, &len2); + return str_bytesplice(mrb, str, idx1, len1, replace, idx2, len2); + case 2: + mrb_get_args(mrb, "oS", &range1, &replace); + if (mrb_range_beg_len(mrb, range1, &idx1, &len1, RSTRING_LEN(str), FALSE) == MRB_RANGE_OK) { + return str_bytesplice(mrb, str, idx1, len1, replace, 0, RSTRING_LEN(replace)); + } + default: + break; + } + mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arumgnts"); +} + +static mrb_value +mrb_encoding(mrb_state *mrb, mrb_value self) +{ + mrb_get_args(mrb, ""); +#ifdef MRB_UTF8_STRING + return mrb_str_new_lit(mrb, "UTF-8"); +#else + return mrb_str_new_lit(mrb, "ASCII-8BIT"); +#endif +} + /* ---------------------------*/ +static const mrb_mt_entry string_rom_entries[] = { + MRB_MT_ENTRY(mrb_str_bytesize, MRB_SYM(bytesize), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_str_cmp_m, MRB_OPSYM(cmp), MRB_ARGS_REQ(1)), /* 15.2.10.5.1 */ + MRB_MT_ENTRY(mrb_str_equal_m, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), /* 15.2.10.5.2 */ + MRB_MT_ENTRY(mrb_str_plus_m, MRB_OPSYM(add), MRB_ARGS_REQ(1)), /* 15.2.10.5.4 */ + MRB_MT_ENTRY(mrb_str_times, MRB_OPSYM(mul), MRB_ARGS_REQ(1)), /* 15.2.10.5.5 */ + MRB_MT_ENTRY(mrb_str_aref_m, MRB_OPSYM(aref), MRB_ARGS_ANY()), /* 15.2.10.5.6 */ + MRB_MT_ENTRY(mrb_str_aset_m, MRB_OPSYM(aset), MRB_ARGS_ANY()), + MRB_MT_ENTRY(mrb_str_capitalize, MRB_SYM(capitalize), MRB_ARGS_NONE()), /* 15.2.10.5.7 */ + MRB_MT_ENTRY(mrb_str_capitalize_bang, MRB_SYM_B(capitalize), MRB_ARGS_NONE()), /* 15.2.10.5.8 */ + MRB_MT_ENTRY(mrb_str_chomp, MRB_SYM(chomp), MRB_ARGS_ANY()), /* 15.2.10.5.9 */ + MRB_MT_ENTRY(mrb_str_chomp_bang, MRB_SYM_B(chomp), MRB_ARGS_ANY()), /* 15.2.10.5.10 */ + MRB_MT_ENTRY(mrb_str_chop, MRB_SYM(chop), MRB_ARGS_NONE()), /* 15.2.10.5.11 */ + MRB_MT_ENTRY(mrb_str_chop_bang, MRB_SYM_B(chop), MRB_ARGS_NONE()), /* 15.2.10.5.12 */ + MRB_MT_ENTRY(mrb_str_downcase, MRB_SYM(downcase), MRB_ARGS_NONE()), /* 15.2.10.5.13 */ + MRB_MT_ENTRY(mrb_str_downcase_bang, MRB_SYM_B(downcase), MRB_ARGS_NONE()), /* 15.2.10.5.14 */ + MRB_MT_ENTRY(mrb_str_empty_p, MRB_SYM_Q(empty), MRB_ARGS_NONE()), /* 15.2.10.5.16 */ + MRB_MT_ENTRY(mrb_str_eql, MRB_SYM_Q(eql), MRB_ARGS_REQ(1)), /* 15.2.10.5.17 */ + MRB_MT_ENTRY(mrb_str_hash_m, MRB_SYM(hash), MRB_ARGS_NONE()), /* 15.2.10.5.20 */ + MRB_MT_ENTRY(mrb_str_include, MRB_SYM_Q(include), MRB_ARGS_REQ(1)), /* 15.2.10.5.21 */ + MRB_MT_ENTRY(mrb_str_index_m, MRB_SYM(index), MRB_ARGS_ARG(1,1)), /* 15.2.10.5.22 */ + MRB_MT_ENTRY(mrb_str_init, MRB_SYM(initialize), MRB_ARGS_OPT(1) | MRB_MT_PRIVATE), /* 15.2.10.5.23 */ + MRB_MT_ENTRY(mrb_str_replace, MRB_SYM(initialize_copy), MRB_ARGS_REQ(1) | MRB_MT_PRIVATE), /* 15.2.10.5.24 */ + MRB_MT_ENTRY(mrb_str_intern, MRB_SYM(intern), MRB_ARGS_NONE()), /* 15.2.10.5.25 */ + MRB_MT_ENTRY(mrb_str_size, MRB_SYM(length), MRB_ARGS_NONE()), /* 15.2.10.5.26 */ + MRB_MT_ENTRY(mrb_str_replace, MRB_SYM(replace), MRB_ARGS_REQ(1)), /* 15.2.10.5.28 */ + MRB_MT_ENTRY(mrb_str_reverse, MRB_SYM(reverse), MRB_ARGS_NONE()), /* 15.2.10.5.29 */ + MRB_MT_ENTRY(mrb_str_reverse_bang, MRB_SYM_B(reverse), MRB_ARGS_NONE()), /* 15.2.10.5.30 */ + MRB_MT_ENTRY(mrb_str_rindex_m, MRB_SYM(rindex), MRB_ARGS_ANY()), /* 15.2.10.5.31 */ + MRB_MT_ENTRY(mrb_str_size, MRB_SYM(size), MRB_ARGS_NONE()), /* 15.2.10.5.33 */ + MRB_MT_ENTRY(mrb_str_aref_m, MRB_SYM(slice), MRB_ARGS_ANY()), /* 15.2.10.5.34 */ + MRB_MT_ENTRY(mrb_str_split_m, MRB_SYM(split), MRB_ARGS_ANY()), /* 15.2.10.5.35 */ + MRB_MT_ENTRY(mrb_str_to_i, MRB_SYM(to_i), MRB_ARGS_ANY()), /* 15.2.10.5.39 */ + MRB_MT_ENTRY(mrb_str_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), /* 15.2.10.5.40 */ + MRB_MT_ENTRY(mrb_str_to_s, MRB_SYM(to_str), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_str_intern, MRB_SYM(to_sym), MRB_ARGS_NONE()), /* 15.2.10.5.41 */ + MRB_MT_ENTRY(mrb_str_upcase, MRB_SYM(upcase), MRB_ARGS_NONE()), /* 15.2.10.5.42 */ + MRB_MT_ENTRY(mrb_str_upcase_bang, MRB_SYM_B(upcase), MRB_ARGS_NONE()), /* 15.2.10.5.43 */ + MRB_MT_ENTRY(mrb_str_inspect, MRB_SYM(inspect), MRB_ARGS_NONE()), /* 15.2.10.5.46(x) */ + MRB_MT_ENTRY(mrb_str_bytes, MRB_SYM(bytes), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_str_getbyte, MRB_SYM(getbyte), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_str_setbyte, MRB_SYM(setbyte), MRB_ARGS_REQ(2)), + MRB_MT_ENTRY(mrb_str_byteindex_m, MRB_SYM(byteindex), MRB_ARGS_ARG(1,1)), + MRB_MT_ENTRY(mrb_str_byterindex_m, MRB_SYM(byterindex), MRB_ARGS_ARG(1,1)), + MRB_MT_ENTRY(mrb_str_byteslice, MRB_SYM(byteslice), MRB_ARGS_ARG(1,1)), + MRB_MT_ENTRY(mrb_str_bytesplice, MRB_SYM(bytesplice), MRB_ARGS_ANY()), + MRB_MT_ENTRY(sub_replace, MRB_SYM(__sub_replace), MRB_ARGS_REQ(3)), /* internal */ +#ifndef MRB_NO_FLOAT + MRB_MT_ENTRY(mrb_str_to_f, MRB_SYM(to_f), MRB_ARGS_NONE()), /* 15.2.10.5.38 */ +#endif +}; + void mrb_init_string(mrb_state *mrb) { struct RClass *s; - mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << MRB_STR_EMBED_LEN_BIT), + mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << MRB_STR_EMBED_LEN_BITS), "pointer size too big for embedded string"); - mrb->string_class = s = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */ + mrb->string_class = s = mrb_define_class_id(mrb, MRB_SYM(String), mrb->object_class); /* 15.2.10 */ MRB_SET_INSTANCE_TT(s, MRB_TT_STRING); - mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, MRB_ARGS_NONE()); - - mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */ - mrb_define_method(mrb, s, "==", mrb_str_equal_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */ - mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */ - mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */ - mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */ - mrb_define_method(mrb, s, "[]=", mrb_str_aset_m, MRB_ARGS_ANY()); - mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */ - mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */ - mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */ - mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */ - mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_NONE()); /* 15.2.10.5.11 */ - mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_NONE()); /* 15.2.10.5.12 */ - mrb_define_method(mrb, s, "downcase", mrb_str_downcase, MRB_ARGS_NONE()); /* 15.2.10.5.13 */ - mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.14 */ - mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */ - mrb_define_method(mrb, s, "eql?", mrb_str_eql, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */ - - mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_NONE()); /* 15.2.10.5.20 */ - mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */ - mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ARG(1,1)); /* 15.2.10.5.22 */ - mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */ - mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */ - mrb_define_method(mrb, s, "intern", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.25 */ - mrb_define_method(mrb, s, "length", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.26 */ - mrb_define_method(mrb, s, "replace", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */ - mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); /* 15.2.10.5.29 */ - mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); /* 15.2.10.5.30 */ - mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, MRB_ARGS_ANY()); /* 15.2.10.5.31 */ - mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.33 */ - mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.34 */ - mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */ + MRB_MT_INIT_ROM(mrb, s, string_rom_entries); -#ifndef MRB_NO_FLOAT - mrb_define_method(mrb, s, "to_f", mrb_str_to_f, MRB_ARGS_NONE()); /* 15.2.10.5.38 */ -#endif - mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */ - mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */ - mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */ - mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */ - mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */ - mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */ - mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE()); - - mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2)); - mrb_define_method(mrb, s, "byteindex", mrb_str_byteindex_m, MRB_ARGS_ARG(1,1)); - mrb_define_method(mrb, s, "byterindex", mrb_str_byterindex_m, MRB_ARGS_ARG(1,1)); - mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_ARG(1,1)); - - mrb_define_method(mrb, s, "__sub_replace", sub_replace, MRB_ARGS_REQ(3)); /* internal */ + mrb_define_method_id(mrb, mrb->kernel_module, MRB_SYM(__ENCODING__), mrb_encoding, MRB_ARGS_NONE()); } diff --git a/src/symbol.c b/src/symbol.c index 35e489b04f..d769e33783 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -6,14 +6,15 @@ #include #include -#include +#include #include #include #include +#include +#include +#include +#include #include -#include - -#ifndef MRB_NO_PRESYM #ifndef MRB_PRESYM_SCANNING /* const uint16_t presym_length_table[] */ @@ -26,11 +27,10 @@ presym_find(const char *name, size_t len) { if (presym_length_table[MRB_PRESYM_MAX-1] < len) return 0; - mrb_sym start, idx, presym_size = MRB_PRESYM_MAX; - int cmp; - for (start = 0; presym_size != 0; presym_size/=2) { - idx = start+presym_size/2; - cmp = (int)len-(int)presym_length_table[idx]; + mrb_sym presym_size = MRB_PRESYM_MAX; + for (mrb_sym start = 0; presym_size != 0; presym_size/=2) { + mrb_sym idx = start+presym_size/2; + int cmp = (int)len-(int)presym_length_table[idx]; if (cmp == 0) { cmp = memcmp(name, presym_name_table[idx], len); if (cmp == 0) return idx+1; @@ -51,9 +51,41 @@ presym_sym2name(mrb_sym sym, mrb_int *lenp) return presym_name_table[sym-1]; } -#endif /* MRB_NO_PRESYM */ - /* ------------------------------------------------------ */ + +/* Per-symbol flags (stored in mrb->sym_flags[]) */ +#define SYM_FL_DYNAMIC 0x01 /* created at runtime (to_sym, send, etc.) */ +#define SYM_FL_MARK 0x02 /* marked during symbol GC */ + +#if MRB_SYMBOL_MAX > 0 +static void mrb_symbol_gc(mrb_state *mrb); +#endif + +/* LSB pointer tagging for literal flags */ +#define SYMTBL_LITERAL_FLAG ((uintptr_t)1) + +/* Extract clean pointer for memory operations */ +static inline const char* +symtbl_get_ptr(const char *tagged_ptr) +{ + return (const char*)((uintptr_t)tagged_ptr & ~SYMTBL_LITERAL_FLAG); +} + +/* Check if symbol is literal by testing LSB */ +static inline mrb_bool +symtbl_is_literal(const char *tagged_ptr) +{ + return ((uintptr_t)tagged_ptr & SYMTBL_LITERAL_FLAG) != 0; +} + +/* Create tagged pointer for literal string - with alignment verification */ +static inline const char* +symtbl_tag_literal(const char *ptr) +{ + mrb_assert(((uintptr_t)ptr & 1) == 0); /* Assert alignment */ + return (const char*)((uintptr_t)ptr | SYMTBL_LITERAL_FLAG); +} + static void sym_validate_len(mrb_state *mrb, size_t len) { @@ -62,40 +94,66 @@ sym_validate_len(mrb_state *mrb, size_t len) } } +/* Chunk-based string pool for heap-allocated symbol names */ +#define MRB_SYM_POOL_CHUNK_SIZE 4096 + +struct sym_pool_chunk { + struct sym_pool_chunk *next; + size_t used; + char buf[]; /* flexible array */ +}; + +static char* +sym_pool_alloc(mrb_state *mrb, size_t size) +{ + /* round up to even size to keep pointers even-aligned (LSB tagging) */ + size_t asize = (size + 1) & ~(size_t)1; + struct sym_pool_chunk *chunk = (struct sym_pool_chunk*)mrb->sym_pool; + if (chunk && chunk->used + asize <= MRB_SYM_POOL_CHUNK_SIZE) { + char *p = chunk->buf + chunk->used; + chunk->used += asize; + return p; + } + size_t csize = asize > MRB_SYM_POOL_CHUNK_SIZE ? asize : MRB_SYM_POOL_CHUNK_SIZE; + chunk = (struct sym_pool_chunk*)mrb_malloc(mrb, + offsetof(struct sym_pool_chunk, buf) + csize); + chunk->next = (struct sym_pool_chunk*)mrb->sym_pool; + chunk->used = asize; + mrb->sym_pool = (void*)chunk; + return chunk->buf; +} + +/* Hash table for symbols (allocated on demand when symbols exceed threshold) */ +struct mrb_sym_hash_table { + uint8_t *symlink; /* collision resolution chains */ + mrb_sym buckets[256]; /* hash buckets */ +}; + #ifdef MRB_USE_ALL_SYMBOLS # define SYMBOL_INLINE_P(sym) FALSE # define sym_inline_pack(name, len) 0 # define sym_inline_unpack(sym, buf, lenp) NULL #else -# define SYMBOL_INLINE_P(sym) ((sym) >= (1<<24)) +# define SYMBOL_INLINE_P(sym) ((sym) >= (1<<20)) static const char pack_table[] = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; static mrb_sym sym_inline_pack(const char *name, size_t len) { -#if defined(MRB_WORD_BOXING) && defined(MRB_32BIT) && !defined(MRB_WORDBOX_NO_FLOAT_TRUNCATE) const size_t pack_length_max = 4; -#else - const size_t pack_length_max = 5; -#endif - - char c; - const char *p; - size_t i; mrb_sym sym = 0; if (len > pack_length_max) return 0; /* too long */ if (len == 0) return 0; /* empty string */ - for (i=0; i>(24-i*6) & 0x3f; + for (i=0; i>(20-i*6) & 0x3f; if (bits == 0) break; - buf[i] = pack_table[bits-1];; + buf[i] = pack_table[bits-1]; } buf[i] = '\0'; if (lenp) *lenp = i; @@ -119,18 +178,22 @@ sym_inline_unpack(mrb_sym sym, char *buf, mrb_int *lenp) } #endif -#define sym_lit_p(mrb, i) (mrb->symflags[i>>3]&(1<<(i&7))) -#define sym_lit_set(mrb, i) mrb->symflags[i>>3]|=(1<<(i&7)) -#define sym_flags_clear(mrb, i) mrb->symflags[i>>3]&=~(1<<(i&7)) -#define sym_len(mrb, i) (size_t)(sym_lit_p(mrb, i)?strlen(mrb->symtbl[i]):mrb_packed_int_decode(mrb->symtbl[i],NULL)) +/* Check if using hash table mode */ +static inline mrb_bool +using_hash_table(mrb_state *mrb) +{ + return mrb->symhash != NULL; +} static mrb_bool sym_check(mrb_state *mrb, const char *name, size_t len, mrb_sym i) { - const char *symname = mrb->symtbl[i]; + const char *tagged_ptr = mrb->symtbl[i]; + if (tagged_ptr == NULL) return FALSE; /* tombstone (freed by symbol GC) */ + const char *symname = symtbl_get_ptr(tagged_ptr); /* Untag for access */ size_t symlen; - if (sym_lit_p(mrb, i)) { + if (symtbl_is_literal(tagged_ptr)) { symlen = strlen(symname); } else { @@ -144,31 +207,36 @@ sym_check(mrb_state *mrb, const char *name, size_t len, mrb_sym i) } static mrb_sym -find_symbol(mrb_state *mrb, const char *name, size_t len, uint8_t *hashp) +find_symbol_linear(mrb_state *mrb, const char *name, size_t len) { mrb_sym i; - uint8_t hash; -#ifndef MRB_NO_PRESYM - /* presym */ - i = presym_find(name, len); - if (i > 0) return i; -#endif + for (i = 1; i <= mrb->symidx; i++) { + if (mrb->symtbl[i] == NULL) continue; /* skip tombstones */ + if (sym_check(mrb, name, len, i)) { + return (i + MRB_PRESYM_MAX); + } + } + return 0; +} - /* inline symbol */ - i = sym_inline_pack(name, len); - if (i > 0) return i; +static mrb_sym +find_symbol_hash(mrb_state *mrb, const char *name, size_t len, uint8_t *hashp) +{ + mrb_sym i; + uint8_t hash; + struct mrb_sym_hash_table *ht = mrb->symhash; hash = mrb_byte_hash((const uint8_t*)name, len); if (hashp) *hashp = hash; - i = mrb->symhash[hash]; + i = ht->buckets[hash]; if (i == 0) return 0; for (;;) { if (sym_check(mrb, name, len, i)) { return (i+MRB_PRESYM_MAX); } - uint8_t diff = mrb->symlink[i]; + uint8_t diff = ht->symlink[i]; if (diff == 0xff) { i -= 0xff; while (i > 0) { @@ -186,80 +254,268 @@ find_symbol(mrb_state *mrb, const char *name, size_t len, uint8_t *hashp) } static mrb_sym -sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit) +find_symbol(mrb_state *mrb, const char *name, size_t len, uint8_t *hashp) { - mrb_sym sym; - uint8_t hash; + mrb_sym i; - sym_validate_len(mrb, len); - sym = find_symbol(mrb, name, len, &hash); - if (sym > 0) return sym; + /* presym */ + i = presym_find(name, len); + if (i > 0) return i; + + /* inline symbol */ + i = sym_inline_pack(name, len); + if (i > 0) return i; + + if (using_hash_table(mrb)) { + /* Hash table mode - O(1) average case */ + return find_symbol_hash(mrb, name, len, hashp); + } + else { + /* Linear mode - O(n) but fast for small n */ + if (hashp) *hashp = mrb_byte_hash((const uint8_t*)name, len); + return find_symbol_linear(mrb, name, len); + } +} + +static void +migrate_to_hash_table(mrb_state *mrb) +{ + struct mrb_sym_hash_table *ht; + mrb_sym i; + + mrb_assert(mrb->symhash == NULL); + mrb_assert(mrb->symidx >= MRB_SYMBOL_LINEAR_THRESHOLD); + + /* Allocate hash table structure */ + ht = (struct mrb_sym_hash_table*)mrb_calloc(mrb, 1, sizeof(struct mrb_sym_hash_table)); + ht->symlink = (uint8_t*)mrb_calloc(mrb, mrb->symcapa, sizeof(uint8_t)); + + /* Rebuild hash table from existing linear data */ + for (i = 1; i <= mrb->symidx; i++) { + const char *tagged_ptr = mrb->symtbl[i]; + const char *name = symtbl_get_ptr(tagged_ptr); + size_t len; + uint8_t hash; + + /* Get name and length from tagged pointer */ + if (symtbl_is_literal(tagged_ptr)) { + len = strlen(name); + } + else { + /* This is a packed length string */ + len = mrb_packed_int_decode((const uint8_t*)name, (const uint8_t**)&name); + } + + hash = mrb_byte_hash((const uint8_t*)name, len); + + /* Build collision chain */ + if (ht->buckets[hash] != 0) { + mrb_sym diff = i - ht->buckets[hash]; + ht->symlink[i] = (diff > 0xff) ? 0xff : (uint8_t)diff; + } + else { + ht->symlink[i] = 0; + } + ht->buckets[hash] = i; + } + + mrb->symhash = ht; +} + +static mrb_sym +sym_intern_common(mrb_state *mrb, const char *name, size_t len, mrb_bool lit) +{ + mrb_sym sym; - /* registering a new symbol */ sym = mrb->symidx + 1; if (mrb->symcapa <= sym) { size_t symcapa = mrb->symcapa; if (symcapa == 0) symcapa = 100; else symcapa = (size_t)(symcapa * 6 / 5); mrb->symtbl = (const char**)mrb_realloc(mrb, (void*)mrb->symtbl, sizeof(char*)*symcapa); - mrb->symflags = (uint8_t*)mrb_realloc(mrb, mrb->symflags, symcapa/8+1); - memset(mrb->symflags+mrb->symcapa/8+1, 0, (symcapa-mrb->symcapa)/8); - mrb->symlink = (uint8_t*)mrb_realloc(mrb, mrb->symlink, symcapa); + mrb->sym_flags = (uint8_t*)mrb_realloc(mrb, mrb->sym_flags, symcapa); + memset(mrb->sym_flags + mrb->symcapa, 0, symcapa - mrb->symcapa); + if (using_hash_table(mrb)) { + struct mrb_sym_hash_table *ht = mrb->symhash; + ht->symlink = (uint8_t*)mrb_realloc(mrb, ht->symlink, symcapa); + } mrb->symcapa = symcapa; } - sym_flags_clear(mrb, sym); - if ((lit || mrb_ro_data_p(name)) && strlen(name) == len) { - sym_lit_set(mrb, sym); - mrb->symtbl[sym] = name; + + lit = lit || mrb_ro_data_p(name); + if (lit && name[len] == 0 && strlen(name) == len) { + if (((uintptr_t)name & 1) != 0) { + /* Fallback: unaligned literal, allocate heap copy */ + goto heap_allocation; + } + mrb->symtbl[sym] = symtbl_tag_literal(name); } else { + heap_allocation:; uint32_t ulen = (uint32_t)len; size_t ilen = mrb_packed_int_len(ulen); - char *p = (char *)mrb_malloc(mrb, len+ilen+1); - mrb_packed_int_encode(ulen, (uint8_t*)p, (uint8_t*)p+ilen); + char *p; + if (lit) { + /* Static symbol from unaligned literal: use pool (not individually freeable) */ + p = sym_pool_alloc(mrb, len+ilen+1); + } + else { + /* Dynamic symbol: use individual malloc (freeable by symbol GC) */ + p = (char*)mrb_malloc(mrb, len+ilen+1); + } + mrb_packed_int_encode(ulen, (uint8_t*)p); memcpy(p+ilen, name, len); p[ilen+len] = 0; - mrb->symtbl[sym] = p; + mrb->symtbl[sym] = p; /* Untagged = heap */ + } + + mrb->symidx = sym; + if (!lit) { + mrb->sym_flags[sym] = SYM_FL_DYNAMIC; + mrb->dynamic_sym_count++; + } + else { + mrb->sym_flags[sym] = 0; } - if (mrb->symhash[hash]) { - mrb_sym i = sym - mrb->symhash[hash]; + return sym; +} + +static mrb_sym +sym_intern_linear_mode(mrb_state *mrb, const char *name, size_t len, mrb_bool lit) +{ + mrb_sym sym = sym_intern_common(mrb, name, len, lit); + return (sym+MRB_PRESYM_MAX); +} + +static mrb_sym +sym_intern_hash_mode(mrb_state *mrb, const char *name, size_t len, mrb_bool lit) +{ + mrb_sym sym = sym_intern_common(mrb, name, len, lit); + struct mrb_sym_hash_table *ht = mrb->symhash; + uint8_t hash = mrb_byte_hash((const uint8_t*)name, len); + + if (ht->buckets[hash]) { + mrb_sym i = sym - ht->buckets[hash]; if (i > 0xff) - mrb->symlink[sym] = 0xff; + ht->symlink[sym] = 0xff; else - mrb->symlink[sym] = i; + ht->symlink[sym] = i; } else { - mrb->symlink[sym] = 0; + ht->symlink[sym] = 0; } - mrb->symhash[hash] = mrb->symidx = sym; + ht->buckets[hash] = sym; return (sym+MRB_PRESYM_MAX); } +static mrb_sym +sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit) +{ + mrb_sym sym; + + sym_validate_len(mrb, len); + sym = find_symbol(mrb, name, len, NULL); + if (sym > 0) return sym; + +#if MRB_SYMBOL_MAX > 0 + if (!lit && mrb->dynamic_sym_count >= MRB_SYMBOL_MAX) { + mrb_symbol_gc(mrb); + /* re-check: the symbol might have been reclaimed and re-interned */ + sym = find_symbol(mrb, name, len, NULL); + if (sym > 0) return sym; + if (mrb->dynamic_sym_count >= MRB_SYMBOL_MAX) { + mrb_raise(mrb, E_RUNTIME_ERROR, "symbol table overflow"); + } + } +#endif + + /* Check if we need to migrate to hash table */ + if (!using_hash_table(mrb) && mrb->symidx >= MRB_SYMBOL_LINEAR_THRESHOLD) { + migrate_to_hash_table(mrb); + } + + /* Add new symbol using current mode */ + if (using_hash_table(mrb)) { + return sym_intern_hash_mode(mrb, name, len, lit); + } + else { + return sym_intern_linear_mode(mrb, name, len, lit); + } +} + +/* + * Interns a string, creating a symbol from it if it doesn't already exist, + * or returning the existing symbol if it does. + * + * mrb: The mruby state. + * name: The string to intern. + * len: The length of the string. + * + * Returns the interned symbol. + */ MRB_API mrb_sym mrb_intern(mrb_state *mrb, const char *name, size_t len) { return sym_intern(mrb, name, len, FALSE); } +/* + * Interns a static string, creating a symbol from it. + * This function is similar to mrb_intern, but it assumes that the given + * string is static and will not be freed. + * + * mrb: The mruby state. + * name: The static string to intern. + * len: The length of the string. + * + * Returns the interned symbol. + */ MRB_API mrb_sym mrb_intern_static(mrb_state *mrb, const char *name, size_t len) { return sym_intern(mrb, name, len, TRUE); } +/* + * Interns a C string (null-terminated), creating a symbol from it. + * This function is a convenience wrapper around mrb_intern that + * automatically calculates the length of the string. + * + * mrb: The mruby state. + * name: The C string to intern. + * + * Returns the interned symbol. + */ MRB_API mrb_sym mrb_intern_cstr(mrb_state *mrb, const char *name) { return mrb_intern(mrb, name, strlen(name)); } +/* + * Interns an mruby string value, creating a symbol from it. + * + * mrb: The mruby state. + * str: The mruby string value to intern. + * + * Returns the interned symbol. + */ MRB_API mrb_sym mrb_intern_str(mrb_state *mrb, mrb_value str) { return mrb_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str)); } +/* + * Checks if a symbol already exists for the given string. + * + * mrb: The mruby state. + * name: The string to check. + * len: The length of the string. + * + * Returns the symbol if it exists, otherwise 0. + */ MRB_API mrb_sym mrb_intern_check(mrb_state *mrb, const char *name, size_t len) { @@ -271,6 +527,15 @@ mrb_intern_check(mrb_state *mrb, const char *name, size_t len) return 0; } +/* + * Checks if a symbol already exists for the given string. + * + * mrb: The mruby state. + * name: The string to check. + * len: The length of the string. + * + * Returns the symbol as an mrb_value if it exists, otherwise a nil value. + */ MRB_API mrb_value mrb_check_intern(mrb_state *mrb, const char *name, size_t len) { @@ -279,12 +544,32 @@ mrb_check_intern(mrb_state *mrb, const char *name, size_t len) return mrb_symbol_value(sym); } +/* + * Checks if a symbol already exists for the given C string (null-terminated). + * This function is a convenience wrapper around mrb_intern_check that + * automatically calculates the length of the string. + * + * mrb: The mruby state. + * name: The C string to check. + * + * Returns the symbol if it exists, otherwise 0. + */ MRB_API mrb_sym mrb_intern_check_cstr(mrb_state *mrb, const char *name) { return mrb_intern_check(mrb, name, strlen(name)); } +/* + * Checks if a symbol already exists for the given C string (null-terminated). + * This function is similar to mrb_intern_check_cstr, but returns the result + * as an mrb_value (either the symbol or nil). + * + * mrb: The mruby state. + * name: The C string to check. + * + * Returns the symbol as an mrb_value if it exists, otherwise a nil value. + */ MRB_API mrb_value mrb_check_intern_cstr(mrb_state *mrb, const char *name) { @@ -293,12 +578,30 @@ mrb_check_intern_cstr(mrb_state *mrb, const char *name) return mrb_symbol_value(sym); } +/* + * Checks if a symbol already exists for the given mruby string value. + * + * mrb: The mruby state. + * str: The mruby string value to check. + * + * Returns the symbol if it exists, otherwise 0. + */ MRB_API mrb_sym mrb_intern_check_str(mrb_state *mrb, mrb_value str) { return mrb_intern_check(mrb, RSTRING_PTR(str), RSTRING_LEN(str)); } +/* + * Checks if a symbol already exists for the given mruby string value. + * This function is similar to mrb_intern_check_str, but returns the result + * as an mrb_value (either the symbol or nil). + * + * mrb: The mruby state. + * str: The mruby string value to check. + * + * Returns the symbol as an mrb_value if it exists, otherwise a nil value. + */ MRB_API mrb_value mrb_check_intern_str(mrb_state *mrb, mrb_value str) { @@ -313,12 +616,10 @@ sym2name_len(mrb_state *mrb, mrb_sym sym, char *buf, mrb_int *lenp) if (sym == 0) goto outofsym; if (SYMBOL_INLINE_P(sym)) return sym_inline_unpack(sym, buf, lenp); -#ifndef MRB_NO_PRESYM { const char *name = presym_sym2name(sym, lenp); if (name) return name; } -#endif sym -= MRB_PRESYM_MAX; if (mrb->symidx < sym) { @@ -327,8 +628,11 @@ sym2name_len(mrb_state *mrb, mrb_sym sym, char *buf, mrb_int *lenp) return NULL; } - const char *symname = mrb->symtbl[sym]; - if (!sym_lit_p(mrb, sym)) { + const char *tagged_ptr = mrb->symtbl[sym]; + if (tagged_ptr == NULL) goto outofsym; /* tombstone (freed by symbol GC) */ + const char *symname = symtbl_get_ptr(tagged_ptr); /* Untag for access */ + + if (!symtbl_is_literal(tagged_ptr)) { uint32_t len = mrb_packed_int_decode((const uint8_t*)symname, (const uint8_t**)&symname); if (lenp) *lenp = (mrb_int)len; } @@ -338,6 +642,19 @@ sym2name_len(mrb_state *mrb, mrb_sym sym, char *buf, mrb_int *lenp) return symname; } +/* + * Retrieves the name and length of a symbol. + * + * mrb: The mruby state. + * sym: The symbol to retrieve the name and length for. + * lenp: A pointer to an mrb_int where the length of the symbol name will be stored. + * This can be NULL if the length is not needed. + * + * Returns a pointer to the C string representing the symbol's name, + * or NULL if the symbol is invalid. + * For inline symbols, the name is copied to an internal buffer (mrb->symbuf) + * unless MRB_USE_ALL_SYMBOLS is defined. + */ MRB_API const char* mrb_sym_name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp) { @@ -348,39 +665,257 @@ mrb_sym_name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp) #endif } +/* + * Symbol GC: mark and sweep unreferenced dynamic symbols. + * Called lazily when dynamic symbol count reaches MRB_SYMBOL_MAX. + */ +#if MRB_SYMBOL_MAX > 0 + +/* Mark a runtime symbol as live (skip presym/inline) */ +static void +sym_gc_mark(mrb_state *mrb, mrb_sym sym) +{ + if (sym == 0) return; + if (SYMBOL_INLINE_P(sym)) return; + if (sym <= MRB_PRESYM_MAX) return; + mrb_sym idx = sym - MRB_PRESYM_MAX; + if (idx > mrb->symidx) return; + mrb->sym_flags[idx] |= SYM_FL_MARK; +} + +/* Callback: mark symbols in method table keys */ +static int +sym_gc_mark_mt(mrb_state *mrb, mrb_sym sym, mrb_method_t m, void *p) +{ + sym_gc_mark(mrb, sym); + return 0; +} + +/* Callback: mark symbols in IV table keys */ +static int +sym_gc_mark_iv(mrb_state *mrb, mrb_sym sym, mrb_value v, void *p) +{ + sym_gc_mark(mrb, sym); + /* also mark symbol values stored in IV tables */ + if (mrb_symbol_p(v)) { + sym_gc_mark(mrb, mrb_symbol(v)); + } + return 0; +} + +/* Callback: mark symbols in hash keys and values */ +static int +sym_gc_mark_hash_entry(mrb_state *mrb, mrb_value key, mrb_value val, void *p) +{ + if (mrb_symbol_p(key)) sym_gc_mark(mrb, mrb_symbol(key)); + if (mrb_symbol_p(val)) sym_gc_mark(mrb, mrb_symbol(val)); + return 0; +} + +/* Mark symbols from a single object */ +static int +sym_gc_mark_object(mrb_state *mrb, struct RBasic *obj, void *data) +{ + if (mrb_object_dead_p(mrb, obj)) return MRB_EACH_OBJ_OK; + + switch (obj->tt) { + case MRB_TT_CLASS: + case MRB_TT_MODULE: + case MRB_TT_SCLASS: + mrb_mt_foreach(mrb, (struct RClass*)obj, sym_gc_mark_mt, NULL); + /* fall through for IV */ + case MRB_TT_OBJECT: + case MRB_TT_EXCEPTION: + case MRB_TT_CDATA: + mrb_iv_foreach(mrb, mrb_obj_value(obj), sym_gc_mark_iv, NULL); + break; + case MRB_TT_ICLASS: + if (MRB_FLAG_TEST(obj, MRB_FL_CLASS_IS_ORIGIN)) { + mrb_mt_foreach(mrb, (struct RClass*)obj, sym_gc_mark_mt, NULL); + } + break; + case MRB_TT_ENV: + { + struct REnv *e = (struct REnv*)obj; + sym_gc_mark(mrb, e->mid); + mrb_int len = MRB_ENV_LEN(e); + for (mrb_int i = 0; i < len; i++) { + if (mrb_symbol_p(e->stack[i])) { + sym_gc_mark(mrb, mrb_symbol(e->stack[i])); + } + } + } + break; + case MRB_TT_STRUCT: + case MRB_TT_ARRAY: + { + struct RArray *a = (struct RArray*)obj; + mrb_int len = ARY_LEN(a); + const mrb_value *p = ARY_PTR(a); + for (mrb_int i = 0; i < len; i++) { + if (mrb_symbol_p(p[i])) { + sym_gc_mark(mrb, mrb_symbol(p[i])); + } + } + } + break; + case MRB_TT_HASH: + mrb_iv_foreach(mrb, mrb_obj_value(obj), sym_gc_mark_iv, NULL); + mrb_hash_foreach(mrb, (struct RHash*)obj, sym_gc_mark_hash_entry, NULL); + break; + default: + break; + } + return MRB_EACH_OBJ_OK; +} + +/* Mark symbols from VM stack and callinfo */ +static void +sym_gc_mark_context(mrb_state *mrb, struct mrb_context *c) +{ + if (!c || !c->stbase) return; + + /* Mark symbols on value stack */ + mrb_value *stend = c->ci ? c->ci->stack + mrb_ci_nregs(c->ci) : c->stbase; + if (stend > c->stend) stend = c->stend; + for (mrb_value *v = c->stbase; v < stend; v++) { + if (mrb_symbol_p(*v)) { + sym_gc_mark(mrb, mrb_symbol(*v)); + } + } + + /* Mark method IDs in call stack */ + if (c->cibase) { + for (mrb_callinfo *ci = c->cibase; ci <= c->ci; ci++) { + sym_gc_mark(mrb, ci->mid); + } + } +} + +static void +mrb_symbol_gc(mrb_state *mrb) +{ + static mrb_bool in_symbol_gc = FALSE; + mrb_sym i; + + if (mrb->symidx == 0) return; + if (in_symbol_gc) return; /* prevent recursive invocation */ + in_symbol_gc = TRUE; + + /* Phase 1: clear marks on all dynamic symbols */ + for (i = 1; i <= mrb->symidx; i++) { + mrb->sym_flags[i] &= ~SYM_FL_MARK; + } + + /* Phase 2: mark symbols referenced from all objects */ + /* Note: mrb_objspace_each_objects runs full GC first, then iterates */ + mrb_objspace_each_objects(mrb, sym_gc_mark_object, NULL); + + /* Mark symbols from root context */ + sym_gc_mark_context(mrb, mrb->root_c); + if (mrb->c != mrb->root_c) { + sym_gc_mark_context(mrb, mrb->c); + } + + /* Mark symbols from global variable table */ + if (mrb->globals) { + mrb_iv_foreach(mrb, mrb_obj_value(mrb->object_class), sym_gc_mark_iv, NULL); + } + + /* Phase 3: sweep unmarked dynamic symbols */ + mrb_sym freed = 0; + for (i = 1; i <= mrb->symidx; i++) { + if ((mrb->sym_flags[i] & SYM_FL_DYNAMIC) && + !(mrb->sym_flags[i] & SYM_FL_MARK)) { + /* Free individually-allocated string */ + mrb_free(mrb, (void*)mrb->symtbl[i]); + mrb->symtbl[i] = NULL; /* tombstone */ + mrb->sym_flags[i] = 0; + freed++; + } + } + mrb->dynamic_sym_count -= freed; + + /* Phase 4: rebuild hash table if in hash mode (chains may be broken) */ + if (freed > 0 && using_hash_table(mrb)) { + struct mrb_sym_hash_table *ht = mrb->symhash; + memset(ht->buckets, 0, sizeof(ht->buckets)); + memset(ht->symlink, 0, mrb->symcapa); + for (i = 1; i <= mrb->symidx; i++) { + if (mrb->symtbl[i] == NULL) continue; /* skip tombstones */ + const char *name = symtbl_get_ptr(mrb->symtbl[i]); + size_t len; + if (symtbl_is_literal(mrb->symtbl[i])) { + len = strlen(name); + } + else { + len = mrb_packed_int_decode((const uint8_t*)name, (const uint8_t**)&name); + } + uint8_t hash = mrb_byte_hash((const uint8_t*)name, len); + if (ht->buckets[hash] != 0) { + mrb_sym diff = i - ht->buckets[hash]; + ht->symlink[i] = (diff > 0xff) ? 0xff : (uint8_t)diff; + } + ht->buckets[hash] = i; + } + } + in_symbol_gc = FALSE; +} +#endif /* MRB_SYMBOL_MAX > 0 */ + void mrb_free_symtbl(mrb_state *mrb) { - mrb_sym i, lim; - - for (i=1, lim=mrb->symidx+1; isymtbl[i]); + /* Free individually-allocated dynamic symbol strings */ + if (mrb->sym_flags) { + for (mrb_sym i = 1; i <= mrb->symidx; i++) { + if ((mrb->sym_flags[i] & SYM_FL_DYNAMIC) && mrb->symtbl[i] != NULL) { + mrb_free(mrb, (void*)mrb->symtbl[i]); + } } } + + /* Free symbol string pool chunks (static symbols) */ + struct sym_pool_chunk *chunk = (struct sym_pool_chunk*)mrb->sym_pool; + while (chunk) { + struct sym_pool_chunk *next = chunk->next; + mrb_free(mrb, chunk); + chunk = next; + } + mrb->sym_pool = NULL; + mrb_free(mrb, (void*)mrb->symtbl); - mrb_free(mrb, (void*)mrb->symlink); - mrb_free(mrb, (void*)mrb->symflags); + mrb_free(mrb, mrb->sym_flags); + + /* Free hash table if allocated */ + if (mrb->symhash) { + mrb_free(mrb, mrb->symhash->symlink); + mrb_free(mrb, mrb->symhash); + mrb->symhash = NULL; + } } void mrb_init_symtbl(mrb_state *mrb) { + /* Initialize in linear mode - hash table allocated on demand */ + mrb->symhash = NULL; + mrb->sym_pool = NULL; } /********************************************************************** * Document-class: Symbol * - * Symbol objects represent names and some strings + * `Symbol` objects represent names and some strings * inside the Ruby - * interpreter. They are generated using the :name and - * :"string" literals - * syntax, and by the various to_sym methods. The same - * Symbol object will be created for a given name or string + * interpreter. They are generated using the `:name` and + * `:"string"` literals + * syntax, and by the various `to_sym` methods. The same + * `Symbol` object will be created for a given name or string * for the duration of a program's execution, regardless of the context - * or meaning of that name. Thus if Fred is a constant in + * or meaning of that name. Thus if `Fred` is a constant in * one context, a method in another, and a class in a third, the - * Symbol :Fred will be the same object in + * `Symbol` `:Fred` will be the same object in * all three contexts. * * module One @@ -407,7 +942,7 @@ mrb_init_symtbl(mrb_state *mrb) * call-seq: * sym.to_s -> string * - * Returns the name or string corresponding to sym. + * Returns the name or string corresponding to *sym*. * * :fred.to_s #=> "fred" */ @@ -421,7 +956,7 @@ sym_to_s(mrb_state *mrb, mrb_value sym) * call-seq: * sym.name -> string * - * Returns the name or string corresponding to sym. Unlike #to_s, the + * Returns the name or string corresponding to *sym*. Unlike #to_s, the * returned string is frozen. * * :fred.name #=> "fred" @@ -443,27 +978,23 @@ sym_name(mrb_state *mrb, mrb_value vsym) /* 15.2.11.3.4 */ /* + * Document-method: Symbol#to_sym + * * call-seq: * sym.to_sym -> sym * sym.intern -> sym * - * In general, to_sym returns the Symbol corresponding - * to an object. As sym is already a symbol, self is returned + * In general, `to_sym` returns the `Symbol` corresponding + * to an object. As *sym* is already a symbol, `self` is returned * in this case. */ -static mrb_value -sym_to_sym(mrb_state *mrb, mrb_value sym) -{ - return sym; -} - /* 15.2.11.3.5(x) */ /* * call-seq: * sym.inspect -> string * - * Returns the representation of sym as a symbol literal. + * Returns the representation of *sym* as a symbol literal. * * :fred.inspect #=> ":fred" */ @@ -485,15 +1016,15 @@ is_special_global_name(const char* m) case ':': case '<': case '>': case '\"': case '&': case '`': case '\'': case '+': case '0': - ++m; + m++; break; case '-': - ++m; + m++; if (is_identchar(*m)) m += 1; break; default: if (!ISDIGIT(*m)) return FALSE; - do ++m; while (ISDIGIT(*m)); + do m++; while (ISDIGIT(*m)); break; } return !*m; @@ -521,7 +1052,7 @@ symname_p(const char *name) case '<': switch (*++m) { case '<': ++m; break; - case '=': if (*++m == '>') ++m; break; + case '=': if (*++m == '>') m++; break; default: break; } break; @@ -535,37 +1066,37 @@ symname_p(const char *name) case '=': switch (*++m) { - case '~': ++m; break; - case '=': if (*++m == '=') ++m; break; + case '~': m++; break; + case '=': if (*++m == '=') m++; break; default: return FALSE; } break; case '*': - if (*++m == '*') ++m; + if (*++m == '*') m++; break; case '!': switch (*++m) { - case '=': case '~': ++m; + case '=': case '~': m++; } break; case '+': case '-': - if (*++m == '@') ++m; + if (*++m == '@') m++; break; case '|': - if (*++m == '|') ++m; + if (*++m == '|') m++; break; case '&': - if (*++m == '&') ++m; + if (*++m == '&') m++; break; case '^': case '/': case '%': case '~': case '`': - ++m; + m++; break; case '[': if (*++m != ']') return FALSE; - if (*++m == '=') ++m; + if (*++m == '=') m++; break; default: @@ -575,7 +1106,7 @@ symname_p(const char *name) while (is_identchar(*m)) m += 1; if (localid) { switch (*m) { - case '!': case '?': case '=': ++m; + case '!': case '?': case '=': m++; default: break; } } @@ -587,15 +1118,12 @@ symname_p(const char *name) static mrb_value sym_inspect(mrb_state *mrb, mrb_value sym) { - mrb_value str; - const char *name; - mrb_int len; mrb_sym id = mrb_symbol(sym); - char *sp; + mrb_int len; + const char *name = mrb_sym_name_len(mrb, id, &len); + mrb_value str = mrb_str_new(mrb, NULL, len+1); + char *sp = RSTRING_PTR(str); - name = mrb_sym_name_len(mrb, id, &len); - str = mrb_str_new(mrb, NULL, len+1); - sp = RSTRING_PTR(str); sp[0] = ':'; memcpy(sp+1, name, len); mrb_assert_int_fit(mrb_int, len, size_t, SIZE_MAX); @@ -611,6 +1139,17 @@ sym_inspect(mrb_state *mrb, mrb_value sym) return str; } +/* + * Converts a symbol to an mruby string value. + * + * mrb: The mruby state. + * sym: The symbol to convert. + * + * Returns the mruby string value corresponding to the symbol. + * If the symbol is an inline symbol, a new string is created. + * Otherwise, a static string (sharing the symbol's name buffer) is returned. + * Returns an undefined value if the symbol is invalid (though this should not happen). + */ MRB_API mrb_value mrb_sym_str(mrb_state *mrb, mrb_sym sym) { @@ -643,12 +1182,36 @@ sym_cstr(mrb_state *mrb, mrb_sym sym, mrb_bool dump) } } +/* + * Retrieves the C string representation of a symbol's name. + * + * mrb: The mruby state. + * sym: The symbol to retrieve the name for. + * + * Returns a pointer to the C string representing the symbol's name. + * Returns NULL if the symbol is invalid. + * If the symbol's name contains null bytes or is not a valid identifier + * for direct use (based on internal checks in sym_cstr), this function + * might return a "dumped" (quoted and escaped) version of the name. + */ MRB_API const char* mrb_sym_name(mrb_state *mrb, mrb_sym sym) { return sym_cstr(mrb, sym, FALSE); } +/* + * Retrieves the C string representation of a symbol's name, suitable for dumping. + * This version is intended for producing a string that can be safely outputted, + * for example, in debugging or serialization contexts. It may quote or escape + * the symbol name if it's not a simple identifier. + * + * mrb: The mruby state. + * sym: The symbol to retrieve the dump name for. + * + * Returns a pointer to the C string representing the symbol's name for dumping. + * Returns NULL if the symbol is invalid. + */ MRB_API const char* mrb_sym_dump(mrb_state *mrb, mrb_sym sym) { @@ -686,19 +1249,26 @@ sym_cmp(mrb_state *mrb, mrb_value s1) return mrb_fixnum_value(-1); } } +#undef lesser + +/* ---------------------------*/ +static const mrb_mt_entry symbol_rom_entries[] = { + MRB_MT_ENTRY(sym_to_s, MRB_SYM(to_s), MRB_ARGS_NONE()), /* 15.2.11.3.3 */ + MRB_MT_ENTRY(sym_name, MRB_SYM(name), MRB_ARGS_NONE()), + MRB_MT_ENTRY(mrb_obj_itself, MRB_SYM(to_sym), MRB_ARGS_NONE()), /* 15.2.11.3.4 */ + MRB_MT_ENTRY(sym_inspect, MRB_SYM(inspect), MRB_ARGS_NONE()), /* 15.2.11.3.5(x) */ + MRB_MT_ENTRY(sym_cmp, MRB_OPSYM(cmp), MRB_ARGS_REQ(1)), + MRB_MT_ENTRY(mrb_obj_equal_m, MRB_OPSYM(eq), MRB_ARGS_REQ(1)), +}; void mrb_init_symbol(mrb_state *mrb) { struct RClass *sym; - mrb->symbol_class = sym = mrb_define_class(mrb, "Symbol", mrb->object_class); /* 15.2.11 */ + mrb->symbol_class = sym = mrb_define_class_id(mrb, MRB_SYM(Symbol), mrb->object_class); /* 15.2.11 */ MRB_SET_INSTANCE_TT(sym, MRB_TT_SYMBOL); - mrb_undef_class_method(mrb, sym, "new"); + mrb_undef_class_method_id(mrb, sym, MRB_SYM(new)); - mrb_define_method(mrb, sym, "to_s", sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.3 */ - mrb_define_method(mrb, sym, "name", sym_name, MRB_ARGS_NONE()); - mrb_define_method(mrb, sym, "to_sym", sym_to_sym, MRB_ARGS_NONE()); /* 15.2.11.3.4 */ - mrb_define_method(mrb, sym, "inspect", sym_inspect, MRB_ARGS_NONE()); /* 15.2.11.3.5(x) */ - mrb_define_method(mrb, sym, "<=>", sym_cmp, MRB_ARGS_REQ(1)); + MRB_MT_INIT_ROM(mrb, sym, symbol_rom_entries); } diff --git a/src/variable.c b/src/variable.c index ea73017702..401692b19c 100644 --- a/src/variable.c +++ b/src/variable.c @@ -11,7 +11,6 @@ #include #include #include -#include /* Instance variable table structure */ typedef struct iv_tbl { @@ -19,9 +18,7 @@ typedef struct iv_tbl { mrb_value *ptr; } iv_tbl; -#define IV_EMPTY 0 -#define IV_DELETED (1UL<<31) -#define IV_KEY_P(k) (((k)&~((uint32_t)IV_DELETED))!=0) + /* Creates the instance variable table. */ static iv_tbl* @@ -39,190 +36,197 @@ iv_new(mrb_state *mrb) static void iv_put(mrb_state *mrb, iv_tbl *t, mrb_sym sym, mrb_value val); +#define IV_INITIAL_SIZE 2 + static void iv_rehash(mrb_state *mrb, iv_tbl *t) { int old_alloc = t->alloc; - int new_alloc = old_alloc+4; - mrb_value *old_ptr = t->ptr; + int new_alloc = old_alloc > 0 ? old_alloc << 1 : IV_INITIAL_SIZE; - khash_power2(new_alloc); - if (old_alloc == new_alloc) return; + if (old_alloc == 0) { + /* first-time init */ + t->ptr = (mrb_value*)mrb_calloc(mrb, new_alloc, sizeof(mrb_value)+sizeof(mrb_sym)); + t->alloc = new_alloc; + return; + } + + /* realloc may extend in place, avoiding malloc+memcpy+free */ + size_t new_size = (size_t)new_alloc * (sizeof(mrb_value) + sizeof(mrb_sym)); + t->ptr = (mrb_value*)mrb_realloc(mrb, t->ptr, new_size); + + /* move keys from old position to new position */ + mrb_sym *old_keys = (mrb_sym*)&t->ptr[old_alloc]; + mrb_sym *new_keys = (mrb_sym*)&t->ptr[new_alloc]; + memmove(new_keys, old_keys, sizeof(mrb_sym) * t->size); + + /* clear extended value region (where old keys were + new slots) */ + memset(&t->ptr[old_alloc], 0, sizeof(mrb_value) * (new_alloc - old_alloc)); + + /* clear extended key region */ + memset(&new_keys[t->size], 0, sizeof(mrb_sym) * (new_alloc - t->size)); - t->ptr = (mrb_value*)mrb_calloc(mrb, sizeof(mrb_value)+sizeof(mrb_sym), new_alloc); - t->size = 0; t->alloc = new_alloc; - if (old_alloc == 0) return; +} - mrb_sym *keys = (mrb_sym*)&old_ptr[old_alloc]; - mrb_value *vals = old_ptr; - for (int i = 0; i < old_alloc; i++) { - if (IV_KEY_P(keys[i])) { - iv_put(mrb, t, keys[i], vals[i]); - } +/* Branch-free binary search helper: returns the index where `target` should be inserted/found. */ +static inline int +iv_bsearch_idx(mrb_sym *keys, int size, mrb_sym target) { + if (size == 0) return 0; + int n = size; + mrb_sym *p = keys; + /* While more than one element remains, halve the range each iteration */ + while (n > 1) { + int half = n >> 1; + MRB_MEM_PREFETCH(p + (half >> 1)); + MRB_MEM_PREFETCH(p + half + (half >> 1)); + mrb_sym mid_sym = p[half]; + /* + * Update pointer p without a branch: + * If mid_sym < target, move p forward by half; otherwise keep p unchanged. + * Compiler will emit a CMOV or equivalent. + */ + p = (mid_sym < target) ? p + half : p; + n -= half; } - mrb_free(mrb, old_ptr); + /* Final adjustment: if the remaining element is still less than target, advance by one */ + return (int)(p - keys) + (p[0] < target); } -/* Set the value for the symbol in the instance variable table. */ +/* Set (insert or update) the value for `sym` in the instance variable table using branch-free search. */ static void iv_put(mrb_state *mrb, iv_tbl *t, mrb_sym sym, mrb_value val) { - int hash, pos, start, dpos = -1; - - if (t == NULL) return; + /* If table is uninitialized, allocate and initialize */ if (t->alloc == 0) { iv_rehash(mrb, t); } - mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; - mrb_value *vals = t->ptr; - hash = kh_int_hash_func(mrb, sym); - start = pos = hash & (t->alloc-1); - for (;;) { - mrb_sym key = keys[pos]; - if (key == sym) { - vals[pos] = val; - return; - } - else if (key == IV_EMPTY) { - t->size++; - keys[pos] = sym; - vals[pos] = val; - return; - } - else if (key == IV_DELETED && dpos < 0) { - dpos = pos; - } - pos = (pos+1) & (t->alloc-1); - if (pos == start) { /* not found */ - if (dpos >= 0) { - t->size++; - keys[dpos] = sym; - vals[dpos] = val; - return; - } - /* no room */ - iv_rehash(mrb, t); - keys = (mrb_sym*)&t->ptr[t->alloc]; - vals = t->ptr; - start = pos = hash & (t->alloc-1); - } + /* Obtain pointers to keys and values arrays */ + mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; + mrb_value *vals = t->ptr; + + /* Determine insertion/update index: + * If table has entries, use branch-free search; otherwise index = 0. + */ + int lo = iv_bsearch_idx(keys, t->size, sym); + + /* If the key already exists, update its value and return */ + if (lo < t->size && keys[lo] == sym) { + vals[lo] = val; + return; + } + + /* Grow table if full, then recompute position */ + if (t->size == t->alloc) { + iv_rehash(mrb, t); + keys = (mrb_sym*)&t->ptr[t->alloc]; + vals = t->ptr; + lo = iv_bsearch_idx(keys, t->size, sym); + } + + /* Shift existing entries right to make room at index lo */ + int move_count = t->size - lo; + if (move_count > 0) { + memmove(&keys[lo + 1], &keys[lo], move_count * sizeof(mrb_sym)); + memmove(&vals[lo + 1], &vals[lo], move_count * sizeof(mrb_value)); } + + /* Insert the new key and value */ + keys[lo] = sym; + vals[lo] = val; + t->size++; } -/* Get a value for a symbol from the instance variable table. */ +/* Get a value for `sym` from the instance variable table using branch-free search. */ static int iv_get(mrb_state *mrb, iv_tbl *t, mrb_sym sym, mrb_value *vp) { - int hash, pos, start; + /* Return 0 if table is null, uninitialized, or empty */ + if (t == NULL || t->alloc == 0 || t->size == 0) return 0; - if (t == NULL) return FALSE; - if (t->alloc == 0) return FALSE; - if (t->size == 0) return FALSE; + mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; + mrb_value *vals = t->ptr; - mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; - mrb_value *vals = t->ptr; - hash = kh_int_hash_func(mrb, sym); - start = pos = hash & (t->alloc-1); - for (;;) { - mrb_sym key = keys[pos]; - if (key == sym) { - if (vp) *vp = vals[pos]; - return pos+1; - } - else if (key == IV_EMPTY) { - return 0; - } - pos = (pos+1) & (t->alloc-1); - if (pos == start) { /* not found */ - return 0; - } + /* Find index in a branch-free manner */ + int lo = iv_bsearch_idx(keys, t->size, sym); + + /* If found, store value (if vp provided) and return 1-based position */ + if (lo < t->size && keys[lo] == sym) { + if (vp) *vp = vals[lo]; + return lo + 1; } + + /* Not found */ + return 0; } -/* Deletes the value for the symbol from the instance variable table. */ +/* Delete the entry for `sym` from the instance variable table using branch-free search. */ static mrb_bool iv_del(mrb_state *mrb, iv_tbl *t, mrb_sym sym, mrb_value *vp) { - int hash, pos, start; + /* Return FALSE if table is null, uninitialized, or empty */ + if (t == NULL || t->alloc == 0 || t->size == 0) return FALSE; - if (t == NULL) return FALSE; - if (t->alloc == 0) return FALSE; - if (t->size == 0) return FALSE; + mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; + mrb_value *vals = t->ptr; - mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; - mrb_value *vals = t->ptr; - hash = kh_int_hash_func(mrb, sym); - start = pos = hash & (t->alloc-1); - for (;;) { - mrb_sym key = keys[pos]; - if (key == sym) { - if (vp) *vp = vals[pos]; - t->size--; - keys[pos] = IV_DELETED; - return TRUE; - } - else if (key == IV_EMPTY) { - return FALSE; - } - pos = (pos+1) & (t->alloc-1); - if (pos == start) { /* not found */ - return FALSE; + /* Find index in a branch-free manner */ + int lo = iv_bsearch_idx(keys, t->size, sym); + + /* If found, optionally return value and shift entries left to delete */ + if (lo < t->size && keys[lo] == sym) { + if (vp) *vp = vals[lo]; + int move_count = t->size - lo - 1; + if (move_count > 0) { + memmove(&keys[lo], &keys[lo + 1], move_count * sizeof(mrb_sym)); + memmove(&vals[lo], &vals[lo + 1], move_count * sizeof(mrb_value)); } + t->size--; + return TRUE; } + + /* Not found */ + return FALSE; } /* Iterates over the instance variable table. */ static void iv_foreach(mrb_state *mrb, iv_tbl *t, mrb_iv_foreach_func *func, void *p) { - int i; - - if (t == NULL) return; - if (t->alloc == 0) return; - if (t->size == 0) return; - - mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; - mrb_value *vals = t->ptr; - for (i=0; ialloc; i++) { - if (IV_KEY_P(keys[i])) { - if ((*func)(mrb, keys[i], vals[i], p) != 0) { - return; - } - } + if (t == NULL || t->alloc == 0 || t->size == 0) return; + for (int i = 0; i < t->size; i++) { + mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; + mrb_value *vals = t->ptr; + if ((*func)(mrb, keys[i], vals[i], p) != 0) return; } - return; } /* Get the size of the instance variable table. */ -/* Size is approximated by the allocated table size. */ static size_t iv_size(mrb_state *mrb, iv_tbl *t) { - if (t == NULL) return 0; - return (size_t)t->size; + return t ? t->size : 0; } -/* Copy the instance variable table. */ +/* Copy the sorted table */ static iv_tbl* iv_copy(mrb_state *mrb, iv_tbl *t) { - iv_tbl *t2; - int i; + if (t == NULL || t->alloc == 0 || t->size == 0) return NULL; - if (t == NULL) return NULL; - if (t->alloc == 0) return NULL; - if (t->size == 0) return NULL; + /* create new table and mirror alloc/size */ + iv_tbl *t2 = iv_new(mrb); + t2->alloc = t->alloc; + t2->size = t->size; + + /* allocate the same block shape */ + t2->ptr = (mrb_value*)mrb_calloc(mrb, t2->alloc, sizeof(mrb_value)+sizeof(mrb_sym)); + + /* copy values[0...size] and keys[0...size] */ + memcpy(t2->ptr, t->ptr, sizeof(mrb_value)*t2->size); + memcpy(&t2->ptr[t2->alloc], &t->ptr[t->alloc], sizeof(mrb_sym)*t2->size); - mrb_sym *keys = (mrb_sym*)&t->ptr[t->alloc]; - mrb_value *vals = t->ptr; - t2 = iv_new(mrb); - for (i=0; ialloc; i++) { - if (IV_KEY_P(keys[i])) { - iv_put(mrb, t2, keys[i], vals[i]); - } - } return t2; } @@ -234,6 +238,287 @@ iv_free(mrb_state *mrb, iv_tbl *t) mrb_free(mrb, t); } +/* + * Object Shape (Hidden Class) structures. + * + * A shape describes the IV layout of an object: which syms are stored + * at which indices. Shapes form a tree rooted at the empty root shape. + * Each child adds one IV (its "edge" sym). Objects sharing the same + * set of IVs (assigned in the same order) share the same shape, + * eliminating per-object key storage. + * + * Only MRB_TT_OBJECT instances are shaped. RClass, RHash, etc. keep + * traditional iv_tbl. + */ + +/* Maximum IV count before de-shaping to iv_tbl */ +#define MRB_SHAPE_MAX_IVS 16 + +/* Shape descriptor -- shared across objects with same IV layout */ +typedef struct mrb_iv_shape { + struct mrb_iv_shape *parent; /* parent shape (one fewer IV) */ + struct mrb_iv_shape *children; /* linked list of child shapes */ + struct mrb_iv_shape *sibling; /* next child of same parent */ + mrb_sym edge; /* IV sym added from parent */ + uint16_t count; /* number of IV slots */ +} mrb_iv_shape; + +/* Per-object shaped IV storage (allocated via struct hack) */ +typedef struct mrb_shaped_iv { + mrb_iv_shape *shape; + mrb_value values[1]; /* shape->count elements */ +} mrb_shaped_iv; + +/* Create the empty root shape */ +static mrb_iv_shape* +shape_root(mrb_state *mrb) +{ + mrb_iv_shape *s = (mrb_iv_shape*)mrb_calloc(mrb, 1, sizeof(mrb_iv_shape)); + return s; +} + +/* Find a child shape with the given edge sym */ +static mrb_iv_shape* +shape_find_child(mrb_iv_shape *shape, mrb_sym sym) +{ + mrb_iv_shape *c = shape->children; + while (c) { + if (c->edge == sym) return c; + c = c->sibling; + } + return NULL; +} + +/* Find or create a child shape for adding sym */ +static mrb_iv_shape* +shape_transition(mrb_state *mrb, mrb_iv_shape *shape, mrb_sym sym) +{ + mrb_iv_shape *child = shape_find_child(shape, sym); + if (child) return child; + + /* create new child shape */ + child = (mrb_iv_shape*)mrb_malloc(mrb, sizeof(mrb_iv_shape)); + child->parent = shape; + child->children = NULL; + child->sibling = shape->children; + child->edge = sym; + child->count = shape->count + 1; + shape->children = child; + return child; +} + +/* + * Look up sym in shape by walking the parent chain. + * Returns the value index (0-based), or -1 if not found. + */ +static int +shape_lookup(mrb_iv_shape *shape, mrb_sym sym) +{ + mrb_iv_shape *s = shape; + while (s->count > 0) { + if (s->edge == sym) return s->count - 1; + s = s->parent; + } + return -1; +} + +/* Recursively free all shapes in the tree */ +static void +shape_free_tree(mrb_state *mrb, mrb_iv_shape *shape) +{ + mrb_iv_shape *c = shape->children; + while (c) { + mrb_iv_shape *next = c->sibling; + shape_free_tree(mrb, c); + c = next; + } + mrb_free(mrb, shape); +} + +/* Allocate a mrb_shaped_iv with room for count values */ +static mrb_shaped_iv* +shaped_iv_alloc(mrb_state *mrb, mrb_iv_shape *shape) +{ + size_t sz = offsetof(mrb_shaped_iv, values) + + sizeof(mrb_value) * shape->count; + mrb_shaped_iv *siv = (mrb_shaped_iv*)mrb_malloc(mrb, sz); + siv->shape = shape; + return siv; +} + +/* Convert a shaped object back to traditional iv_tbl (de-shape) */ +static void +shaped_to_iv_tbl(mrb_state *mrb, struct RObject *obj) +{ + mrb_shaped_iv *siv = (mrb_shaped_iv*)obj->iv; + iv_tbl *t = NULL; + + if (siv) { + mrb_iv_shape *shape = siv->shape; + + if (shape->count > 0) { + /* reconstruct keys from parent chain */ + mrb_sym keys[MRB_SHAPE_MAX_IVS]; + mrb_iv_shape *s = shape; + while (s->count > 0) { + keys[s->count - 1] = s->edge; + s = s->parent; + } + + t = iv_new(mrb); + for (int i = 0; i < shape->count; i++) { + if (!mrb_undef_p(siv->values[i])) { + iv_put(mrb, t, keys[i], siv->values[i]); + } + } + } + mrb_free(mrb, siv); + } + obj->iv = t; + obj->flags &= ~MRB_FL_OBJ_SHAPED; +} + +/* --- Shaped IV operations --- */ + +static void +shaped_iv_set(mrb_state *mrb, struct RObject *obj, mrb_sym sym, mrb_value v) +{ + mrb_shaped_iv *siv = (mrb_shaped_iv*)obj->iv; + mrb_iv_shape *shape = siv ? siv->shape : mrb->root_shape; + + /* check if sym already exists in current shape */ + int idx = shape_lookup(shape, sym); + if (idx >= 0) { + siv->values[idx] = v; + return; + } + + /* transition to new shape */ + mrb_iv_shape *new_shape = shape_transition(mrb, shape, sym); + + /* de-shape if too many IVs */ + if (new_shape->count > MRB_SHAPE_MAX_IVS) { + shaped_to_iv_tbl(mrb, obj); + if (!obj->iv) { + obj->iv = iv_new(mrb); + } + iv_put(mrb, obj->iv, sym, v); + return; + } + + /* allocate new shaped_iv with room for new shape */ + mrb_shaped_iv *new_siv = shaped_iv_alloc(mrb, new_shape); + + /* copy old values (they are a prefix) */ + if (siv) { + memcpy(new_siv->values, siv->values, + sizeof(mrb_value) * shape->count); + mrb_free(mrb, siv); + } + /* new IV goes in the last slot */ + new_siv->values[new_shape->count - 1] = v; + + obj->iv = (iv_tbl*)new_siv; +} + +static mrb_value +shaped_iv_get(struct RObject *obj, mrb_sym sym) +{ + mrb_shaped_iv *siv = (mrb_shaped_iv*)obj->iv; + if (!siv) return mrb_nil_value(); + int idx = shape_lookup(siv->shape, sym); + if (idx >= 0 && !mrb_undef_p(siv->values[idx])) + return siv->values[idx]; + return mrb_nil_value(); +} + +static mrb_bool +shaped_iv_defined(struct RObject *obj, mrb_sym sym) +{ + mrb_shaped_iv *siv = (mrb_shaped_iv*)obj->iv; + if (!siv) return FALSE; + int idx = shape_lookup(siv->shape, sym); + if (idx >= 0 && !mrb_undef_p(siv->values[idx])) + return TRUE; + return FALSE; +} + +static void +shaped_iv_foreach(mrb_state *mrb, struct RObject *obj, + mrb_iv_foreach_func *func, void *p) +{ + mrb_shaped_iv *siv = (mrb_shaped_iv*)obj->iv; + if (!siv) return; + mrb_iv_shape *shape = siv->shape; + if (shape->count == 0) return; + + /* reconstruct keys from parent chain */ + mrb_sym keys[MRB_SHAPE_MAX_IVS]; + mrb_iv_shape *s = shape; + while (s->count > 0) { + keys[s->count - 1] = s->edge; + s = s->parent; + } + + for (int i = 0; i < shape->count; i++) { + if (!mrb_undef_p(siv->values[i])) { + if ((*func)(mrb, keys[i], siv->values[i], p) != 0) return; + } + } +} + +static size_t +shaped_iv_mark(mrb_state *mrb, struct RObject *obj) +{ + mrb_shaped_iv *siv = (mrb_shaped_iv*)obj->iv; + if (!siv) return 0; + mrb_iv_shape *shape = siv->shape; + for (int i = 0; i < shape->count; i++) { + if (!mrb_undef_p(siv->values[i])) { + mrb_gc_mark_value(mrb, siv->values[i]); + } + } + return shape->count; +} + +static void +shaped_iv_free(mrb_state *mrb, struct RObject *obj) +{ + if (obj->iv) { + mrb_free(mrb, obj->iv); + } +} + +static void +shaped_iv_copy(mrb_state *mrb, struct RObject *dst, struct RObject *src) +{ + mrb_shaped_iv *ssiv = (mrb_shaped_iv*)src->iv; + if (!ssiv) { + dst->iv = NULL; + return; + } + mrb_iv_shape *shape = ssiv->shape; + mrb_shaped_iv *dsiv = shaped_iv_alloc(mrb, shape); + memcpy(dsiv->values, ssiv->values, sizeof(mrb_value) * shape->count); + dst->iv = (iv_tbl*)dsiv; +} + +/* Public init/free for shape tree (called from state.c) */ +void +mrb_init_shape(mrb_state *mrb) +{ + mrb->root_shape = shape_root(mrb); +} + +void +mrb_free_shape(mrb_state *mrb) +{ + if (mrb->root_shape) { + shape_free_tree(mrb, mrb->root_shape); + mrb->root_shape = NULL; + } +} + static int iv_mark_i(mrb_state *mrb, mrb_sym sym, mrb_value v, void *p) { @@ -256,25 +541,29 @@ mrb_gc_mark_gv(mrb_state *mrb) void mrb_gc_free_gv(mrb_state *mrb) { - if (mrb->globals) + if (mrb->globals) { iv_free(mrb, mrb->globals); + mrb->globals = NULL; + } } -void +size_t mrb_gc_mark_iv(mrb_state *mrb, struct RObject *obj) { + if (MRB_OBJ_SHAPED_P(obj)) { + return shaped_iv_mark(mrb, obj); + } mark_tbl(mrb, obj->iv); -} - -size_t -mrb_gc_mark_iv_size(mrb_state *mrb, struct RObject *obj) -{ return iv_size(mrb, obj->iv); } void mrb_gc_free_iv(mrb_state *mrb, struct RObject *obj) { + if (MRB_OBJ_SHAPED_P(obj)) { + shaped_iv_free(mrb, obj); + return; + } if (obj->iv) { iv_free(mrb, obj->iv); } @@ -294,13 +583,13 @@ mrb_vm_special_set(mrb_state *mrb, mrb_sym i, mrb_value v) static mrb_bool obj_iv_p(mrb_value obj) { - switch (mrb_type(obj)) { + switch (mrb_unboxed_type(obj)) { case MRB_TT_OBJECT: case MRB_TT_CLASS: case MRB_TT_MODULE: case MRB_TT_SCLASS: case MRB_TT_HASH: - case MRB_TT_DATA: + case MRB_TT_CDATA: case MRB_TT_EXCEPTION: return TRUE; default: @@ -308,16 +597,54 @@ obj_iv_p(mrb_value obj) } } +static iv_tbl* +class_iv_ptr(struct RClass *c) +{ + return c->tt == MRB_TT_ICLASS ? c->c->iv : c->iv; +} + +/* + * Retrieves an instance variable from an object. + * + * Args: + * mrb: The mruby state. + * obj: The object from which to retrieve the instance variable. + * sym: The symbol representing the name of the instance variable. + * + * Returns: + * The value of the instance variable, or mrb_nil_value() if the + * instance variable is not defined. + */ MRB_API mrb_value mrb_obj_iv_get(mrb_state *mrb, struct RObject *obj, mrb_sym sym) { - mrb_value v; + if (MRB_OBJ_SHAPED_P(obj)) { + return shaped_iv_get(obj, sym); + } + mrb_value v; if (obj->iv && iv_get(mrb, obj->iv, sym, &v)) return v; return mrb_nil_value(); } +/* + * Retrieves an instance variable from an mrb_value. + * + * This function is a wrapper around mrb_obj_iv_get. It checks if the + * given mrb_value is an object that can have instance variables before + * attempting to retrieve the variable. + * + * Args: + * mrb: The mruby state. + * obj: The mrb_value from which to retrieve the instance variable. + * sym: The symbol representing the name of the instance variable. + * + * Returns: + * The value of the instance variable, or mrb_nil_value() if the + * instance variable is not defined or if the object cannot have + * instance variables. + */ MRB_API mrb_value mrb_iv_get(mrb_state *mrb, mrb_value obj, mrb_sym sym) { @@ -327,34 +654,6 @@ mrb_iv_get(mrb_state *mrb, mrb_value obj, mrb_sym sym) return mrb_nil_value(); } -static inline void assign_class_name(mrb_state *mrb, struct RObject *obj, mrb_sym sym, mrb_value v); - -void -mrb_obj_iv_set_force(mrb_state *mrb, struct RObject *obj, mrb_sym sym, mrb_value v) -{ - assign_class_name(mrb, obj, sym, v); - if (!obj->iv) { - obj->iv = iv_new(mrb); - } - iv_put(mrb, obj->iv, sym, v); - mrb_field_write_barrier_value(mrb, (struct RBasic*)obj, v); -} - -MRB_API void -mrb_obj_iv_set(mrb_state *mrb, struct RObject *obj, mrb_sym sym, mrb_value v) -{ - mrb_check_frozen(mrb, obj); - mrb_obj_iv_set_force(mrb, obj, sym, v); -} - -/* Iterates over the instance variable table. */ -MRB_API void -mrb_iv_foreach(mrb_state *mrb, mrb_value obj, mrb_iv_foreach_func *func, void *p) -{ - if (!obj_iv_p(obj)) return; - iv_foreach(mrb, mrb_obj_ptr(obj)->iv, func, p); -} - static inline mrb_bool namespace_p(enum mrb_vtype tt) { @@ -364,9 +663,10 @@ namespace_p(enum mrb_vtype tt) static inline void assign_class_name(mrb_state *mrb, struct RObject *obj, mrb_sym sym, mrb_value v) { - if (namespace_p(obj->tt) && namespace_p(mrb_type(v))) { + if (namespace_p(mrb_type(v))) { struct RObject *c = mrb_obj_ptr(v); - if (obj != c && ISUPPER(mrb_sym_name_len(mrb, sym, NULL)[0])) { + const char *name = mrb_sym_name_len(mrb, sym, NULL); + if (obj != c && name && ISUPPER(name[0])) { mrb_sym id_classname = MRB_SYM(__classname__); mrb_value o = mrb_obj_iv_get(mrb, c, id_classname); @@ -375,7 +675,7 @@ assign_class_name(mrb_state *mrb, struct RObject *obj, mrb_sym sym, mrb_value v) o = mrb_obj_iv_get(mrb, c, id_outer); if (mrb_nil_p(o)) { - if ((struct RClass *)obj == mrb->object_class) { + if ((struct RClass*)obj == mrb->object_class) { mrb_obj_iv_set_force(mrb, c, id_classname, mrb_symbol_value(sym)); } else { @@ -387,6 +687,86 @@ assign_class_name(mrb_state *mrb, struct RObject *obj, mrb_sym sym, mrb_value v) } } +void +mrb_obj_iv_set_force(mrb_state *mrb, struct RObject *obj, mrb_sym sym, mrb_value v) +{ + if (namespace_p(obj->tt)) { + assign_class_name(mrb, obj, sym, v); + } + if (MRB_OBJ_SHAPED_P(obj)) { + shaped_iv_set(mrb, obj, sym, v); + mrb_field_write_barrier_value(mrb, (struct RBasic*)obj, v); + return; + } + if (!obj->iv) { + obj->iv = iv_new(mrb); + } + iv_put(mrb, obj->iv, sym, v); + mrb_field_write_barrier_value(mrb, (struct RBasic*)obj, v); +} + +/* + * Sets an instance variable on an object. + * + * This function checks if the object is frozen before setting the variable. + * It then calls mrb_obj_iv_set_force to actually set the variable. + * + * Args: + * mrb: The mruby state. + * obj: The object on which to set the instance variable. + * sym: The symbol representing the name of the instance variable. + * v: The value to set for the instance variable. + */ +MRB_API void +mrb_obj_iv_set(mrb_state *mrb, struct RObject *obj, mrb_sym sym, mrb_value v) +{ + mrb_check_frozen(mrb, obj); + mrb_obj_iv_set_force(mrb, obj, sym, v); +} + +/* + * Iterates over the instance variables of an object. + * + * This function calls the provided callback function for each instance + * variable in the object. + * + * Args: + * mrb: The mruby state. + * obj: The mrb_value whose instance variables to iterate over. + * func: The callback function to call for each instance variable. + * The function should take mrb_state*, mrb_sym, mrb_value, and void* + * as arguments and return an int. If the callback returns a non-zero + * value, iteration stops. + * p: A pointer to user data that will be passed to the callback function. + */ +MRB_API void +mrb_iv_foreach(mrb_state *mrb, mrb_value obj, mrb_iv_foreach_func *func, void *p) +{ + if (!obj_iv_p(obj)) return; + if (MRB_OBJ_SHAPED_P(mrb_obj_ptr(obj))) { + shaped_iv_foreach(mrb, mrb_obj_ptr(obj), func, p); + return; + } + iv_foreach(mrb, mrb_obj_ptr(obj)->iv, func, p); +} + +/* + * Sets an instance variable on an mrb_value. + * + * This function is a wrapper around mrb_obj_iv_set. It checks if the + * given mrb_value is an object that can have instance variables before + * attempting to set the variable. If the object cannot have instance + * variables, it raises an E_ARGUMENT_ERROR. + * + * Args: + * mrb: The mruby state. + * obj: The mrb_value on which to set the instance variable. + * sym: The symbol representing the name of the instance variable. + * v: The value to set for the instance variable. + * + * Raises: + * E_ARGUMENT_ERROR: If the object cannot have instance variables. + */ MRB_API void mrb_iv_set(mrb_state *mrb, mrb_value obj, mrb_sym sym, mrb_value v) { @@ -398,16 +778,45 @@ mrb_iv_set(mrb_state *mrb, mrb_value obj, mrb_sym sym, mrb_value v) } } +/* + * Checks if an instance variable is defined on an object. + * + * Args: + * mrb: The mruby state. + * obj: The object to check. + * sym: The symbol representing the name of the instance variable. + * + * Returns: + * TRUE if the instance variable is defined, FALSE otherwise. + */ MRB_API mrb_bool mrb_obj_iv_defined(mrb_state *mrb, struct RObject *obj, mrb_sym sym) { - iv_tbl *t; + if (MRB_OBJ_SHAPED_P(obj)) { + return shaped_iv_defined(obj, sym); + } - t = obj->iv; + iv_tbl *t = obj->iv; if (t && iv_get(mrb, t, sym, NULL)) return TRUE; return FALSE; } +/* + * Checks if an instance variable is defined on an mrb_value. + * + * This function is a wrapper around mrb_obj_iv_defined. It checks if the + * given mrb_value is an object that can have instance variables before + * attempting to check for the variable. + * + * Args: + * mrb: The mruby state. + * obj: The mrb_value to check. + * sym: The symbol representing the name of the instance variable. + * + * Returns: + * TRUE if the instance variable is defined and the object can have + * instance variables, FALSE otherwise. + */ MRB_API mrb_bool mrb_iv_defined(mrb_state *mrb, mrb_value obj, mrb_sym sym) { @@ -415,19 +824,45 @@ mrb_iv_defined(mrb_state *mrb, mrb_value obj, mrb_sym sym) return mrb_obj_iv_defined(mrb, mrb_obj_ptr(obj), sym); } +/* + * Checks if a symbol is a valid instance variable name. + * + * A valid instance variable name must: + * - Be at least 2 characters long. + * - Start with '@'. + * - Not have a digit as the second character. + * - The rest of the name must be a valid identifier. + * + * Args: + * mrb: The mruby state. + * iv_name: The symbol to check. + * + * Returns: + * TRUE if the symbol is a valid instance variable name, FALSE otherwise. + */ MRB_API mrb_bool mrb_iv_name_sym_p(mrb_state *mrb, mrb_sym iv_name) { - const char *s; mrb_int len; + const char *s = mrb_sym_name_len(mrb, iv_name, &len); - s = mrb_sym_name_len(mrb, iv_name, &len); if (len < 2) return FALSE; if (s[0] != '@') return FALSE; if (ISDIGIT(s[1])) return FALSE; return mrb_ident_p(s+1, len-1); } +/* + * Checks if a symbol is a valid instance variable name and raises a + * name error if it's not. + * + * Args: + * mrb: The mruby state. + * iv_name: The symbol to check. + * + * Raises: + * E_NAME_ERROR: If the symbol is not a valid instance variable name. + */ MRB_API void mrb_iv_name_sym_check(mrb_state *mrb, mrb_sym iv_name) { @@ -436,82 +871,112 @@ mrb_iv_name_sym_check(mrb_state *mrb, mrb_sym iv_name) } } +/* + * Copies instance variables from one object to another. + * + * If the destination object already has instance variables, they are freed + * before copying. + * + * Args: + * mrb: The mruby state. + * dest: The destination object (mrb_value). + * src: The source object (mrb_value). + */ MRB_API void mrb_iv_copy(mrb_state *mrb, mrb_value dest, mrb_value src) { struct RObject *d = mrb_obj_ptr(dest); struct RObject *s = mrb_obj_ptr(src); - if (d->iv) { - iv_free(mrb, d->iv); - d->iv = 0; + /* free dest's existing IVs */ + if (MRB_OBJ_SHAPED_P(d)) { + shaped_iv_free(mrb, d); + d->iv = NULL; } - if (s->iv) { - mrb_write_barrier(mrb, (struct RBasic*)d); - d->iv = iv_copy(mrb, s->iv); + else if (d->iv) { + iv_free(mrb, d->iv); + d->iv = NULL; } -} - -static int -inspect_i(mrb_state *mrb, mrb_sym sym, mrb_value v, void *p) -{ - mrb_value str = *(mrb_value*)p; - const char *s; - mrb_int len; - mrb_value ins; - char *sp = RSTRING_PTR(str); - /* need not to show internal data */ - if (sp[0] == '-') { /* first element */ - sp[0] = '#'; - mrb_str_cat_lit(mrb, str, " "); - } - else { - mrb_str_cat_lit(mrb, str, ", "); + if (MRB_OBJ_SHAPED_P(s) && MRB_OBJ_SHAPED_P(d)) { + /* both shaped: share shape, memcpy values */ + if (s->iv) { + mrb_write_barrier(mrb, (struct RBasic*)d); + shaped_iv_copy(mrb, d, s); + } } - s = mrb_sym_name_len(mrb, sym, &len); - mrb_str_cat(mrb, str, s, len); - mrb_str_cat_lit(mrb, str, "="); - if (mrb_object_p(v)) { - ins = mrb_any_to_s(mrb, v); + else if (MRB_OBJ_SHAPED_P(s)) { + /* src shaped, dest unshaped: convert src to iv_tbl copy */ + mrb_shaped_iv *ssiv = (mrb_shaped_iv*)s->iv; + if (ssiv) { + mrb_iv_shape *shape = ssiv->shape; + if (shape->count > 0) { + mrb_sym keys[MRB_SHAPE_MAX_IVS]; + mrb_iv_shape *sh = shape; + while (sh->count > 0) { + keys[sh->count - 1] = sh->edge; + sh = sh->parent; + } + iv_tbl *t = iv_new(mrb); + for (int i = 0; i < shape->count; i++) { + if (!mrb_undef_p(ssiv->values[i])) { + iv_put(mrb, t, keys[i], ssiv->values[i]); + } + } + mrb_write_barrier(mrb, (struct RBasic*)d); + d->iv = t; + } + } } else { - ins = mrb_inspect(mrb, v); - } - mrb_str_cat_str(mrb, str, ins); - return 0; -} - -mrb_value -mrb_obj_iv_inspect(mrb_state *mrb, struct RObject *obj) -{ - iv_tbl *t = obj->iv; - size_t len = iv_size(mrb, t); - - if (len > 0) { - const char *cn = mrb_obj_classname(mrb, mrb_obj_value(obj)); - mrb_value str = mrb_str_new_capa(mrb, 30); - - mrb_str_cat_lit(mrb, str, "-<"); - mrb_str_cat_cstr(mrb, str, cn); - mrb_str_cat_lit(mrb, str, ":"); - mrb_str_cat_str(mrb, str, mrb_ptr_to_str(mrb, obj)); - - iv_foreach(mrb, t, inspect_i, &str); - mrb_str_cat_lit(mrb, str, ">"); - return str; + /* both unshaped or dest shaped but src unshaped */ + if (MRB_OBJ_SHAPED_P(d)) { + d->flags &= ~MRB_FL_OBJ_SHAPED; + } + if (s->iv) { + mrb_write_barrier(mrb, (struct RBasic*)d); + d->iv = iv_copy(mrb, s->iv); + } } - return mrb_any_to_s(mrb, mrb_obj_value(obj)); } +/* + * Removes an instance variable from an object. + * + * Args: + * mrb: The mruby state. + * obj: The object (mrb_value) from which to remove the instance variable. + * sym: The symbol representing the name of the instance variable. + * + * Returns: + * The value of the removed instance variable, or mrb_undef_value() if + * the instance variable was not defined or if the object cannot have + * instance variables. + */ MRB_API mrb_value mrb_iv_remove(mrb_state *mrb, mrb_value obj, mrb_sym sym) { if (obj_iv_p(obj)) { - iv_tbl *t = mrb_obj_ptr(obj)->iv; - mrb_value val; + struct RObject *o = mrb_obj_ptr(obj); + mrb_check_frozen(mrb, o); + + if (MRB_OBJ_SHAPED_P(o)) { + mrb_shaped_iv *siv = (mrb_shaped_iv*)o->iv; + if (siv) { + int idx = shape_lookup(siv->shape, sym); + if (idx >= 0 && !mrb_undef_p(siv->values[idx])) { + mrb_value val = siv->values[idx]; + /* de-shape, then remove the key */ + shaped_to_iv_tbl(mrb, o); + iv_del(mrb, o->iv, sym, NULL); + return val; + } + } + return mrb_undef_value(); + } - mrb_check_frozen(mrb, mrb_obj_ptr(obj)); + iv_tbl *t = o->iv; + mrb_value val; if (iv_del(mrb, t, sym, &val)) { return val; } @@ -522,12 +987,10 @@ mrb_iv_remove(mrb_state *mrb, mrb_value obj, mrb_sym sym) static int iv_i(mrb_state *mrb, mrb_sym sym, mrb_value v, void *p) { - mrb_value ary; - const char* s; + mrb_value ary = *(mrb_value*)p; mrb_int len; + const char* s = mrb_sym_name_len(mrb, sym, &len); - ary = *(mrb_value*)p; - s = mrb_sym_name_len(mrb, sym, &len); if (len > 1 && s[0] == '@' && s[1] != '@') { mrb_ary_push(mrb, ary, mrb_symbol_value(sym)); } @@ -554,11 +1017,16 @@ iv_i(mrb_state *mrb, mrb_sym sym, mrb_value v, void *p) mrb_value mrb_obj_instance_variables(mrb_state *mrb, mrb_value self) { - mrb_value ary; + mrb_value ary = mrb_ary_new(mrb); - ary = mrb_ary_new(mrb); if (obj_iv_p(self)) { - iv_foreach(mrb, mrb_obj_ptr(self)->iv, iv_i, &ary); + struct RObject *obj = mrb_obj_ptr(self); + if (MRB_OBJ_SHAPED_P(obj)) { + shaped_iv_foreach(mrb, obj, iv_i, &ary); + } + else { + iv_foreach(mrb, obj->iv, iv_i, &ary); + } } return ary; } @@ -566,12 +1034,10 @@ mrb_obj_instance_variables(mrb_state *mrb, mrb_value self) static int cv_i(mrb_state *mrb, mrb_sym sym, mrb_value v, void *p) { - mrb_value ary; - const char* s; + mrb_value ary = *(mrb_value*)p; mrb_int len; + const char* s = mrb_sym_name_len(mrb, sym, &len); - ary = *(mrb_value*)p; - s = mrb_sym_name_len(mrb, sym, &len); if (len > 2 && s[0] == '@' && s[1] == '@') { mrb_ary_push(mrb, ary, mrb_symbol_value(sym)); } @@ -583,7 +1049,7 @@ cv_i(mrb_state *mrb, mrb_sym sym, mrb_value v, void *p) * call-seq: * mod.class_variables(inherit=true) -> array * - * Returns an array of the names of class variables in mod. + * Returns an array of the names of class variables in *mod*. * * class One * @@var1 = 1 @@ -597,15 +1063,13 @@ cv_i(mrb_state *mrb, mrb_sym sym, mrb_value v, void *p) mrb_value mrb_mod_class_variables(mrb_state *mrb, mrb_value mod) { - mrb_value ary; - struct RClass *c; mrb_bool inherit = TRUE; mrb_get_args(mrb, "|b", &inherit); - ary = mrb_ary_new(mrb); - c = mrb_class_ptr(mod); + mrb_value ary = mrb_ary_new(mrb); + struct RClass *c = mrb_class_ptr(mod); while (c) { - iv_foreach(mrb, c->iv, cv_i, &ary); + iv_foreach(mrb, class_iv_ptr(c), cv_i, &ary); if (!inherit) break; c = c->super; } @@ -616,25 +1080,24 @@ mrb_value mrb_mod_cv_get(mrb_state *mrb, struct RClass *c, mrb_sym sym) { struct RClass * cls = c; - mrb_value v; - int given = FALSE; + mrb_value v = mrb_nil_value(); + mrb_bool given = FALSE; while (c) { - if (c->iv && iv_get(mrb, c->iv, sym, &v)) { + if (iv_get(mrb, class_iv_ptr(c), sym, &v)) { given = TRUE; } c = c->super; } if (given) return v; - if (cls && cls->tt == MRB_TT_SCLASS) { - mrb_value klass; + if (cls->tt == MRB_TT_SCLASS) { + mrb_value klass = mrb_obj_iv_get(mrb, (struct RObject*)cls, MRB_SYM(__attached__)); - klass = mrb_obj_iv_get(mrb, (struct RObject *)cls, MRB_SYM(__attached__)); c = mrb_class_ptr(klass); if (c->tt == MRB_TT_CLASS || c->tt == MRB_TT_MODULE) { given = FALSE; while (c) { - if (c->iv && iv_get(mrb, c->iv, sym, &v)) { + if (iv_get(mrb, class_iv_ptr(c), sym, &v)) { given = TRUE; } c = c->super; @@ -647,19 +1110,48 @@ mrb_mod_cv_get(mrb_state *mrb, struct RClass *c, mrb_sym sym) return mrb_nil_value(); } +/* + * Retrieves a class variable from a module or class. + * + * This function is a wrapper around mrb_mod_cv_get. + * + * Args: + * mrb: The mruby state. + * mod: The module or class (mrb_value) from which to retrieve the class variable. + * sym: The symbol representing the name of the class variable. + * + * Returns: + * The value of the class variable. + * + * Raises: + * E_NAME_ERROR: If the class variable is not defined. + */ MRB_API mrb_value mrb_cv_get(mrb_state *mrb, mrb_value mod, mrb_sym sym) { return mrb_mod_cv_get(mrb, mrb_class_ptr(mod), sym); } +/* + * Sets a class variable in a module or class. + * + * This function searches for the class variable in the superclass chain. + * If found, it updates the value in the class where it's defined. + * Otherwise, it sets the variable in the given class `c`. + * + * Args: + * mrb: The mruby state. + * c: The class or module (struct RClass*) in which to set the class variable. + * sym: The symbol representing the name of the class variable. + * v: The value to set for the class variable. + */ MRB_API void mrb_mod_cv_set(mrb_state *mrb, struct RClass *c, mrb_sym sym, mrb_value v) { struct RClass * cls = c; while (c) { - iv_tbl *t = c->iv; + iv_tbl *t = class_iv_ptr(c); int pos = iv_get(mrb, t, sym, NULL); if (pos) { @@ -671,10 +1163,9 @@ mrb_mod_cv_set(mrb_state *mrb, struct RClass *c, mrb_sym sym, mrb_value v) c = c->super; } - if (cls && cls->tt == MRB_TT_SCLASS) { - mrb_value klass; + if (cls->tt == MRB_TT_SCLASS) { + mrb_value klass = mrb_obj_iv_get(mrb, (struct RObject*)cls, MRB_SYM(__attached__)); - klass = mrb_obj_iv_get(mrb, (struct RObject*)cls, MRB_SYM(__attached__)); switch (mrb_type(klass)) { case MRB_TT_CLASS: case MRB_TT_MODULE: @@ -686,7 +1177,10 @@ mrb_mod_cv_set(mrb_state *mrb, struct RClass *c, mrb_sym sym, mrb_value v) break; } } - else{ + else if (cls->tt == MRB_TT_ICLASS) { + c = cls->c; + } + else { c = cls; } @@ -699,17 +1193,39 @@ mrb_mod_cv_set(mrb_state *mrb, struct RClass *c, mrb_sym sym, mrb_value v) mrb_field_write_barrier_value(mrb, (struct RBasic*)c, v); } +/* + * Sets a class variable in a module or class. + * + * This function is a wrapper around mrb_mod_cv_set. + * + * Args: + * mrb: The mruby state. + * mod: The module or class (mrb_value) in which to set the class variable. + * sym: The symbol representing the name of the class variable. + * v: The value to set for the class variable. + */ MRB_API void mrb_cv_set(mrb_state *mrb, mrb_value mod, mrb_sym sym, mrb_value v) { mrb_mod_cv_set(mrb, mrb_class_ptr(mod), sym, v); } +/* + * Checks if a class variable is defined in a module or class or its ancestors. + * + * Args: + * mrb: The mruby state. + * c: The class or module (struct RClass*) to check. + * sym: The symbol representing the name of the class variable. + * + * Returns: + * TRUE if the class variable is defined, FALSE otherwise. + */ mrb_bool mrb_mod_cv_defined(mrb_state *mrb, struct RClass * c, mrb_sym sym) { while (c) { - iv_tbl *t = c->iv; + iv_tbl *t = class_iv_ptr(c); if (iv_get(mrb, t, sym, NULL)) return TRUE; c = c->super; } @@ -717,6 +1233,19 @@ mrb_mod_cv_defined(mrb_state *mrb, struct RClass * c, mrb_sym sym) return FALSE; } +/* + * Checks if a class variable is defined in a module or class or its ancestors. + * + * This function is a wrapper around mrb_mod_cv_defined. + * + * Args: + * mrb: The mruby state. + * mod: The module or class (mrb_value) to check. + * sym: The symbol representing the name of the class variable. + * + * Returns: + * TRUE if the class variable is defined, FALSE otherwise. + */ MRB_API mrb_bool mrb_cv_defined(mrb_state *mrb, mrb_value mod, mrb_sym sym) { @@ -767,33 +1296,73 @@ mod_const_check(mrb_state *mrb, mrb_value mod) } static mrb_value -const_get(mrb_state *mrb, struct RClass *base, mrb_sym sym, mrb_bool skip) +const_get_nohook(mrb_state *mrb, struct RClass *base, mrb_sym sym, mrb_bool skip) { struct RClass *c = base; mrb_value v; mrb_bool retry = FALSE; - mrb_value name; /* if skip then skip the current class (already searched) */ if (skip) c = c->super; L_RETRY: while (c) { - if (!MRB_FLAG_TEST(c, MRB_FL_CLASS_IS_PREPENDED) && c->iv) { - if (iv_get(mrb, c->iv, sym, &v)) - return v; + if (!MRB_FLAG_TEST(c, MRB_FL_CLASS_IS_PREPENDED) && iv_get(mrb, class_iv_ptr(c), sym, &v)) { + return v; } c = c->super; if (!skip && c == mrb->object_class) break; } - if (!retry && base->tt == MRB_TT_MODULE) { + if (!retry && base->tt == MRB_TT_MODULE && skip) { c = mrb->object_class; retry = TRUE; goto L_RETRY; } - name = mrb_symbol_value(sym); - return mrb_funcall_argv(mrb, mrb_obj_value(base), MRB_SYM(const_missing), 1, &name); + return mrb_undef_value(); } +static mrb_value +const_get(mrb_state *mrb, struct RClass *base, mrb_sym sym, mrb_bool skip) +{ + mrb_value v = const_get_nohook(mrb, base, sym, skip); + + /* call const_missing hook */ + if (mrb_undef_p(v)) { + mrb_value mod = mrb_obj_value(base); + if (mrb_func_basic_p(mrb, mod, MRB_SYM(const_missing), mrb_mod_const_missing)) { + return mrb_const_missing(mrb, mod, sym); + } + mrb_value name = mrb_symbol_value(sym); + return mrb_funcall_argv(mrb, mod, MRB_SYM(const_missing), 1, &name); + } + return v; +} + +mrb_value +mrb_exc_const_get(mrb_state *mrb, mrb_sym sym) +{ + return const_get_nohook(mrb, mrb->object_class, sym, FALSE); +} + +/* + * Retrieves a constant from a module or class. + * + * It first checks if `mod` is a class or module, then calls the + * internal `const_get` function to retrieve the constant. + * This function will also call the `const_missing` hook if the + * constant is not found. + * + * Args: + * mrb: The mruby state. + * mod: The module or class (mrb_value) from which to retrieve the constant. + * sym: The symbol representing the name of the constant. + * + * Returns: + * The value of the constant. + * + * Raises: + * E_TYPE_ERROR: If `mod` is not a class or module. + * E_NAME_ERROR: If the constant is not defined and `const_missing` is not defined or also raises an error. + */ MRB_API mrb_value mrb_const_get(mrb_state *mrb, mrb_value mod, mrb_sym sym) { @@ -804,39 +1373,69 @@ mrb_const_get(mrb_state *mrb, mrb_value mod, mrb_sym sym) mrb_value mrb_vm_const_get(mrb_state *mrb, mrb_sym sym) { - struct RClass *c; - struct RClass *c2; - mrb_value v; const struct RProc *proc = mrb->c->ci->proc; + struct RClass *c = MRB_PROC_TARGET_CLASS(proc), *c2; + mrb_value v; - c = MRB_PROC_TARGET_CLASS(proc); if (!c) c = mrb->object_class; - if (iv_get(mrb, c->iv, sym, &v)) { + if (iv_get(mrb, class_iv_ptr(c), sym, &v)) { return v; } - c2 = c; - while (c2 && c2->tt == MRB_TT_SCLASS) { - mrb_value klass; - - if (!iv_get(mrb, c2->iv, MRB_SYM(__attached__), &klass)) { - c2 = NULL; - break; - } - c2 = mrb_class_ptr(klass); - } - if (c2 && (c2->tt == MRB_TT_CLASS || c2->tt == MRB_TT_MODULE)) c = c2; - proc = proc->upper; - while (proc) { + for (proc = proc->upper; proc; proc = proc->upper) { c2 = MRB_PROC_TARGET_CLASS(proc); if (!c2) c2 = mrb->object_class; - if (c2 && iv_get(mrb, c2->iv, sym, &v)) { + if (iv_get(mrb, class_iv_ptr(c2), sym, &v)) { + return v; + } + } + if (c->tt == MRB_TT_SCLASS) { + v = const_get_nohook(mrb, c, sym, TRUE); + if (!mrb_undef_p(v)) { return v; } - proc = proc->upper; + + mrb_value klass; + for (c2 = c; c2 && c2->tt == MRB_TT_SCLASS; c2 = mrb_class_ptr(klass)) { + if (!iv_get(mrb, class_iv_ptr(c2), MRB_SYM(__attached__), &klass)) { + c2 = NULL; + break; + } + } + if (c2 && (c2->tt == MRB_TT_CLASS || c2->tt == MRB_TT_MODULE)) c = c2; } return const_get(mrb, c, sym, TRUE); } +/* + * Sets a constant in a module or class. + * + * It first checks if `mod` is a class or module. + * If the value `v` being set is a class or module, it calls + * `mrb_class_name_class` to set up the class/module name. + * Then, it sets the constant using `mrb_obj_iv_set` and calls + * the `const_added` hook. + * + * Args: + * mrb: The mruby state. + * mod: The module or class (mrb_value) in which to set the constant. + * sym: The symbol representing the name of the constant. + * v: The value to set for the constant. + * + * Raises: + * E_TYPE_ERROR: If `mod` is not a class or module. + */ +#ifndef MRB_NO_CONST_CACHE +void +mrb_const_cache_clear(mrb_state *mrb) +{ + struct mrb_const_cache_entry *cc = mrb->const_cache; + + for (int i=0; iirep = NULL; + } +} +#endif + MRB_API void mrb_const_set(mrb_state *mrb, mrb_value mod, mrb_sym sym, mrb_value v) { @@ -844,38 +1443,84 @@ mrb_const_set(mrb_state *mrb, mrb_value mod, mrb_sym sym, mrb_value v) if (mrb_type(v) == MRB_TT_CLASS || mrb_type(v) == MRB_TT_MODULE) { mrb_class_name_class(mrb, mrb_class_ptr(mod), mrb_class_ptr(v), sym); } - mrb_iv_set(mrb, mod, sym, v); -} + mrb_obj_iv_set(mrb, mrb_obj_ptr(mod), sym, v); + mrb_const_cache_clear(mrb); -void -mrb_vm_const_set(mrb_state *mrb, mrb_sym sym, mrb_value v) -{ - struct RClass *c; - - c = MRB_PROC_TARGET_CLASS(mrb->c->ci->proc); - if (!c) c = mrb->object_class; - mrb_obj_iv_set(mrb, (struct RObject*)c, sym, v); + if (!mrb->bootstrapping) { + mrb_value name = mrb_symbol_value(sym); + mrb_funcall_argv(mrb, mod, MRB_SYM(const_added), 1, &name); + } } +/* + * Removes a constant from a module or class. + * + * It first checks if `mod` is a class or module, then removes the + * constant using `mrb_iv_remove`. + * + * Args: + * mrb: The mruby state. + * mod: The module or class (mrb_value) from which to remove the constant. + * sym: The symbol representing the name of the constant. + * + * Raises: + * E_TYPE_ERROR: If `mod` is not a class or module. + */ MRB_API void mrb_const_remove(mrb_state *mrb, mrb_value mod, mrb_sym sym) { mod_const_check(mrb, mod); mrb_iv_remove(mrb, mod, sym); + mrb_const_cache_clear(mrb); } +/* + * Defines a constant in a module or class using a symbol for the name. + * + * This is a direct way to set a constant without triggering `const_added` hook. + * + * Args: + * mrb: The mruby state. + * mod: The module or class (struct RClass*) in which to define the constant. + * name: The symbol representing the name of the constant. + * v: The value to set for the constant. + */ MRB_API void mrb_define_const_id(mrb_state *mrb, struct RClass *mod, mrb_sym name, mrb_value v) { mrb_obj_iv_set(mrb, (struct RObject*)mod, name, v); + mrb_const_cache_clear(mrb); } +/* + * Defines a constant in a module or class using a C string for the name. + * + * This is a direct way to set a constant without triggering `const_added` hook. + * The C string name is interned into a symbol. + * + * Args: + * mrb: The mruby state. + * mod: The module or class (struct RClass*) in which to define the constant. + * name: The C string representing the name of the constant. + * v: The value to set for the constant. + */ MRB_API void mrb_define_const(mrb_state *mrb, struct RClass *mod, const char *name, mrb_value v) { mrb_obj_iv_set(mrb, (struct RObject*)mod, mrb_intern_cstr(mrb, name), v); } +/* + * Defines a global constant. + * + * Global constants are defined in the `Object` class. + * This function is a convenience wrapper around `mrb_define_const`. + * + * Args: + * mrb: The mruby state. + * name: The C string representing the name of the global constant. + * val: The value to set for the global constant. + */ MRB_API void mrb_define_global_const(mrb_state *mrb, const char *name, mrb_value val) { @@ -885,12 +1530,10 @@ mrb_define_global_const(mrb_state *mrb, const char *name, mrb_value val) static int const_i(mrb_state *mrb, mrb_sym sym, mrb_value v, void *p) { - mrb_value ary; - const char* s; + mrb_value ary = *(mrb_value*)p; mrb_int len; + const char* s = mrb_sym_name_len(mrb, sym, &len); - ary = *(mrb_value*)p; - s = mrb_sym_name_len(mrb, sym, &len); if (len >= 1 && ISUPPER(s[0])) { mrb_int i, alen = RARRAY_LEN(ary); @@ -905,6 +1548,13 @@ const_i(mrb_state *mrb, mrb_sym sym, mrb_value v, void *p) return 0; } +mrb_value +mrb_mod_const_at(mrb_state *mrb, struct RClass *c, mrb_value ary) +{ + iv_foreach(mrb, class_iv_ptr(c), const_i, &ary); + return ary; +} + /* 15.2.2.4.24 */ /* * call-seq: @@ -915,14 +1565,13 @@ const_i(mrb_state *mrb, mrb_sym sym, mrb_value v, void *p) mrb_value mrb_mod_constants(mrb_state *mrb, mrb_value mod) { - mrb_value ary; mrb_bool inherit = TRUE; struct RClass *c = mrb_class_ptr(mod); mrb_get_args(mrb, "|b", &inherit); - ary = mrb_ary_new(mrb); + mrb_value ary = mrb_ary_new(mrb); while (c) { - iv_foreach(mrb, c->iv, const_i, &ary); + mrb_mod_const_at(mrb, c, ary); if (!inherit) break; c = c->super; if (c == mrb->object_class) break; @@ -930,6 +1579,17 @@ mrb_mod_constants(mrb_state *mrb, mrb_value mod) return ary; } +/* + * Retrieves a global variable. + * + * Args: + * mrb: The mruby state. + * sym: The symbol representing the name of the global variable. + * + * Returns: + * The value of the global variable, or mrb_nil_value() if the + * global variable is not defined. + */ MRB_API mrb_value mrb_gv_get(mrb_state *mrb, mrb_sym sym) { @@ -940,6 +1600,16 @@ mrb_gv_get(mrb_state *mrb, mrb_sym sym) return mrb_nil_value(); } +/* + * Sets a global variable. + * + * If the global variable table (`mrb->globals`) does not exist, it is created. + * + * Args: + * mrb: The mruby state. + * sym: The symbol representing the name of the global variable. + * v: The value to set for the global variable. + */ MRB_API void mrb_gv_set(mrb_state *mrb, mrb_sym sym, mrb_value v) { @@ -952,6 +1622,13 @@ mrb_gv_set(mrb_state *mrb, mrb_sym sym, mrb_value v) iv_put(mrb, t, sym, v); } +/* + * Removes a global variable. + * + * Args: + * mrb: The mruby state. + * sym: The symbol representing the name of the global variable to remove. + */ MRB_API void mrb_gv_remove(mrb_state *mrb, mrb_sym sym) { @@ -961,9 +1638,7 @@ mrb_gv_remove(mrb_state *mrb, mrb_sym sym) static int gv_i(mrb_state *mrb, mrb_sym sym, mrb_value v, void *p) { - mrb_value ary; - - ary = *(mrb_value*)p; + mrb_value ary = *(mrb_value*)p; mrb_ary_push(mrb, ary, mrb_symbol_value(sym)); return 0; } @@ -998,7 +1673,7 @@ const_defined_0(mrb_state *mrb, mrb_value mod, mrb_sym id, mrb_bool exclude, mrb tmp = klass; retry: while (tmp) { - if (iv_get(mrb, tmp->iv, id, NULL)) { + if (iv_get(mrb, class_iv_ptr(tmp), id, NULL)) { return TRUE; } if (!recurse && (klass != mrb->object_class)) break; @@ -1012,18 +1687,59 @@ const_defined_0(mrb_state *mrb, mrb_value mod, mrb_sym id, mrb_bool exclude, mrb return FALSE; } +/* + * Checks if a constant is defined in a module or class or its ancestors. + * + * This function calls `const_defined_0` with `recurse = TRUE`, meaning + * it will search the superclass chain. + * + * Args: + * mrb: The mruby state. + * mod: The module or class (mrb_value) to check. + * id: The symbol representing the name of the constant. + * + * Returns: + * TRUE if the constant is defined, FALSE otherwise. + */ MRB_API mrb_bool mrb_const_defined(mrb_state *mrb, mrb_value mod, mrb_sym id) { return const_defined_0(mrb, mod, id, TRUE, TRUE); } +/* + * Checks if a constant is defined directly in a module or class. + * + * This function calls `const_defined_0` with `recurse = FALSE`, meaning + * it will only search the given module/class, not its ancestors. + * + * Args: + * mrb: The mruby state. + * mod: The module or class (mrb_value) to check. + * id: The symbol representing the name of the constant. + * + * Returns: + * TRUE if the constant is defined directly in the module/class, FALSE otherwise. + */ MRB_API mrb_bool mrb_const_defined_at(mrb_state *mrb, mrb_value mod, mrb_sym id) { return const_defined_0(mrb, mod, id, TRUE, FALSE); } +/* + * Retrieves an attribute (instance variable) from an object. + * + * This function is a simple wrapper around `mrb_iv_get`. + * + * Args: + * mrb: The mruby state. + * obj: The object (mrb_value) from which to retrieve the attribute. + * id: The symbol representing the name of the attribute (instance variable). + * + * Returns: + * The value of the attribute, or mrb_nil_value() if not defined. + */ MRB_API mrb_value mrb_attr_get(mrb_state *mrb, mrb_value obj, mrb_sym id) { @@ -1057,16 +1773,14 @@ find_class_sym(mrb_state *mrb, struct RClass *outer, struct RClass *c) if (outer == c) return 0; arg.c = c; arg.sym = 0; - iv_foreach(mrb, outer->iv, csym_i, &arg); + iv_foreach(mrb, class_iv_ptr(outer), csym_i, &arg); return arg.sym; } -static struct RClass* -outer_class(mrb_state *mrb, struct RClass *c) +MRB_API struct RClass* +mrb_class_outer(mrb_state *mrb, struct RClass *c) { - mrb_value ov; - - ov = mrb_obj_iv_get(mrb, (struct RObject*)c, MRB_SYM(__outer__)); + mrb_value ov = mrb_obj_iv_get(mrb, (struct RObject*)c, MRB_SYM(__outer__)); if (mrb_nil_p(ov)) return NULL; switch (mrb_type(ov)) { case MRB_TT_CLASS: @@ -1086,10 +1800,10 @@ detect_outer_loop(mrb_state *mrb, struct RClass *c) for (;;) { if (h == NULL) return FALSE; - h = outer_class(mrb, h); + h = mrb_class_outer(mrb, h); if (h == NULL) return FALSE; - h = outer_class(mrb, h); - t = outer_class(mrb, t); + h = mrb_class_outer(mrb, h); + t = mrb_class_outer(mrb, t); if (t == h) return TRUE; } } @@ -1097,23 +1811,20 @@ detect_outer_loop(mrb_state *mrb, struct RClass *c) mrb_value mrb_class_find_path(mrb_state *mrb, struct RClass *c) { - struct RClass *outer; - mrb_value path; - mrb_sym name; - const char *str; - mrb_int len; - if (detect_outer_loop(mrb, c)) return mrb_nil_value(); - outer = outer_class(mrb, c); + struct RClass *outer = mrb_class_outer(mrb, c); if (outer == NULL) return mrb_nil_value(); - name = find_class_sym(mrb, outer, c); + + mrb_sym name = find_class_sym(mrb, outer, c); if (name == 0) return mrb_nil_value(); - path = mrb_str_new_capa(mrb, 40); - str = mrb_class_name(mrb, outer); - mrb_str_cat_cstr(mrb, path, str); + + mrb_value path = mrb_str_new_capa(mrb, 40); + const char *cname = mrb_class_name(mrb, outer); + mrb_str_cat_cstr(mrb, path, cname); mrb_str_cat_cstr(mrb, path, "::"); - str = mrb_sym_name_len(mrb, name, &len); + mrb_int len; + const char *str = mrb_sym_name_len(mrb, name, &len); mrb_str_cat(mrb, path, str, len); if (RSTRING_PTR(path)[0] != '#') { iv_del(mrb, c->iv, MRB_SYM(__outer__), NULL); @@ -1127,7 +1838,14 @@ mrb_class_find_path(mrb_state *mrb, struct RClass *c) size_t mrb_obj_iv_tbl_memsize(mrb_value obj) { - iv_tbl *t = mrb_obj_ptr(obj)->iv; + struct RObject *o = mrb_obj_ptr(obj); + if (MRB_OBJ_SHAPED_P(o)) { + mrb_shaped_iv *siv = (mrb_shaped_iv*)o->iv; + if (!siv) return 0; + return offsetof(mrb_shaped_iv, values) + + sizeof(mrb_value) * siv->shape->count; + } + iv_tbl *t = o->iv; if (t == NULL) return 0; return sizeof(iv_tbl) + t->alloc*(sizeof(mrb_value)+sizeof(mrb_sym)); } @@ -1137,9 +1855,7 @@ mrb_obj_iv_tbl_memsize(mrb_value obj) mrb_bool mrb_ident_p(const char *s, mrb_int len) { - mrb_int i; - - for (i = 0; i < len; i++) { + for (mrb_int i = 0; i < len; i++) { if (!identchar(s[i])) return FALSE; } return TRUE; diff --git a/src/vm.c b/src/vm.c index 4b81878a62..0a84798b0d 100644 --- a/src/vm.c +++ b/src/vm.c @@ -20,7 +20,6 @@ #include #include #include -#include #ifdef MRB_NO_STDIO #if defined(__cplusplus) @@ -28,16 +27,10 @@ extern "C" { #endif void abort(void); #if defined(__cplusplus) -} /* extern "C" { */ +} /* extern "C" */ #endif #endif -#if defined(MRB_USE_CXX_EXCEPTION) && defined(__cplusplus) -# if !defined(MRB_USE_CXX_ABI) -extern "C" { -# endif -#endif - #define STACK_INIT_SIZE 128 #define CALLINFO_INIT_SIZE 32 @@ -47,9 +40,19 @@ extern "C" { #endif /* Maximum recursive depth. Should be set lower on memory constrained systems. */ +#ifdef __clang__ +#if __has_feature(address_sanitizer) && !defined(__SANITIZE_ADDRESS__) +#define __SANITIZE_ADDRESS__ +#endif +#endif + #ifndef MRB_CALL_LEVEL_MAX +#if defined(__SANITIZE_ADDRESS__) +#define MRB_CALL_LEVEL_MAX 128 +#else #define MRB_CALL_LEVEL_MAX 512 #endif +#endif /* Maximum stack depth. Should be set lower on memory constrained systems. The value below allows about 60000 recursive calls in the simplest case. */ @@ -71,7 +74,7 @@ mrb_gc_arena_shrink(mrb_state *mrb, int idx) mrb_gc *gc = &mrb->gc; int capa = gc->arena_capa; - mrb->gc.arena_idx = idx; + gc->arena_idx = idx; if (idx < capa / 4) { capa >>= 2; if (capa < MRB_GC_ARENA_SIZE) { @@ -84,7 +87,7 @@ mrb_gc_arena_shrink(mrb_state *mrb, int idx) } } #else -#define mrb_gc_arena_shrink(mrb,idx) mrb_gc_arena_restore(mrb,idx) +#define mrb_gc_arena_shrink(mrb, idx) mrb_gc_arena_restore(mrb, idx) #endif #define CALL_MAXARGS 15 @@ -112,57 +115,58 @@ stack_init(mrb_state *mrb) struct mrb_context *c = mrb->c; /* mrb_assert(mrb->stack == NULL); */ - c->stbase = (mrb_value *)mrb_calloc(mrb, STACK_INIT_SIZE, sizeof(mrb_value)); + c->stbase = (mrb_value*)mrb_malloc(mrb, STACK_INIT_SIZE * sizeof(mrb_value)); c->stend = c->stbase + STACK_INIT_SIZE; + stack_clear(c->stbase, STACK_INIT_SIZE); /* mrb_assert(ci == NULL); */ - c->cibase = (mrb_callinfo *)mrb_calloc(mrb, CALLINFO_INIT_SIZE, sizeof(mrb_callinfo)); + static const mrb_callinfo ci_zero = { 0 }; + c->cibase = (mrb_callinfo*)mrb_malloc(mrb, CALLINFO_INIT_SIZE * sizeof(mrb_callinfo)); c->ciend = c->cibase + CALLINFO_INIT_SIZE; + c->cibase[0] = ci_zero; c->ci = c->cibase; c->ci->u.target_class = mrb->object_class; c->ci->stack = c->stbase; + c->ci->vis = 1; /* private (2-bit packed) */ } static inline void -envadjust(mrb_state *mrb, mrb_value *oldbase, mrb_value *newbase, size_t oldsize) +envadjust(mrb_state *mrb, mrb_value *oldbase, mrb_value *newbase) { mrb_callinfo *ci = mrb->c->cibase; - - if (newbase == oldbase) return; + /* + * Byte-level calculation to avoid truncation when allocator alignment is + * smaller than sizeof(mrb_value). + * eg: MRB_NO_BOXING + MRB_INT64 with MRB_32BIT => sizeof(mrb_value)=16 + * And when memory allocator's alignment is 8 bytes + * Pointer subtraction on mrb_value* would truncate (8/16 -> 0). + * So, we use char* for pointer calculation to get the correct offset in bytes, + * then apply that offset to mrb_value* pointers. + */ + ptrdiff_t off = (char *)newbase - (char *)oldbase; + + if (off == 0) return; while (ci <= mrb->c->ci) { struct REnv *e = mrb_vm_ci_env(ci); - mrb_value *st; - - if (e && MRB_ENV_ONSTACK_P(e) && - (st = e->stack) && oldbase <= st && st < oldbase+oldsize) { - ptrdiff_t off = e->stack - oldbase; - - e->stack = newbase + off; - } - if (ci->proc && MRB_PROC_ENV_P(ci->proc) && e != MRB_PROC_ENV(ci->proc)) { - e = MRB_PROC_ENV(ci->proc); + mrb_value *new_stack = (mrb_value *)((char *)ci->stack + off); - if (e && MRB_ENV_ONSTACK_P(e) && - (st = e->stack) && oldbase <= st && st < oldbase+oldsize) { - ptrdiff_t off = e->stack - oldbase; - - e->stack = newbase + off; - } + if (e) { + mrb_assert(e->cxt == mrb->c && MRB_ENV_ONSTACK_P(e)); + mrb_assert(e->stack == ci->stack); + e->stack = new_stack; } - - ci->stack = newbase + (ci->stack - oldbase); + ci->stack = new_stack; ci++; } } -/** def rec ; $deep =+ 1 ; if $deep > 1000 ; return 0 ; end ; rec ; end */ +/** def rec; $deep =+ 1; if $deep > 1000; return 0; end; rec; end **/ static void stack_extend_alloc(mrb_state *mrb, mrb_int room) { mrb_value *oldbase = mrb->c->stbase; - mrb_value *newstack; size_t oldsize = mrb->c->stend - mrb->c->stbase; size_t size = oldsize; size_t off = mrb->c->ci->stack ? mrb->c->stend - mrb->c->ci->stack : 0; @@ -174,21 +178,22 @@ stack_extend_alloc(mrb_state *mrb, mrb_int room) else size += room; #else - /* Use linear stack growth. + /* Use 1.5x stack growth. It is slightly slower than doubling the stack space, but it saves memory on small devices. */ - if (room <= MRB_STACK_GROWTH) - size += MRB_STACK_GROWTH; - else - size += room; + { + size_t newsize = size + (size >> 1); /* 1.5x growth */ + if (newsize < size + MRB_STACK_GROWTH) + newsize = size + MRB_STACK_GROWTH; + if (newsize < size + (size_t)room) + newsize = size + room; + size = newsize; + } #endif - newstack = (mrb_value *)mrb_realloc_simple(mrb, mrb->c->stbase, sizeof(mrb_value) * size); - if (newstack == NULL) { - mrb_exc_raise(mrb, mrb_obj_value(mrb->stack_err)); - } + mrb_value *newstack = (mrb_value*)mrb_realloc(mrb, mrb->c->stbase, sizeof(mrb_value) * size); stack_clear(&(newstack[oldsize]), size - oldsize); - envadjust(mrb, oldbase, newstack, oldsize); + envadjust(mrb, oldbase, newstack); mrb->c->stbase = newstack; mrb->c->stend = mrb->c->stbase + size; @@ -199,14 +204,30 @@ stack_extend_alloc(mrb_state *mrb, mrb_int room) } } -MRB_API void -mrb_stack_extend(mrb_state *mrb, mrb_int room) +static inline void +stack_extend(mrb_state *mrb, mrb_int room) { - if (!mrb->c->ci->stack || mrb->c->ci->stack + room >= mrb->c->stend) { + if (mrb_unlikely(!mrb->c->ci->stack || mrb->c->ci->stack + room >= mrb->c->stend)) { stack_extend_alloc(mrb, room); } } +/** + * @brief Extends the VM stack. + * + * This function extends the virtual machine stack to accommodate more values. + * If the current stack size is insufficient, it reallocates the stack + * with a larger size. + * + * @param mrb The mruby state. + * @param room The additional number of mrb_value slots required. + */ +MRB_API void +mrb_stack_extend(mrb_state *mrb, mrb_int room) +{ + stack_extend(mrb, room); +} + static void stack_extend_adjust(mrb_state *mrb, mrb_int room, const mrb_value **argp) { @@ -214,10 +235,10 @@ stack_extend_adjust(mrb_state *mrb, mrb_int room, const mrb_value **argp) ptrdiff_t voff = *argp - c->stbase; if (voff < 0 || voff >= c->stend - c->stbase) { - mrb_stack_extend(mrb, room); + stack_extend(mrb, room); } else { - mrb_stack_extend(mrb, room); + stack_extend(mrb, room); *argp = c->stbase + voff; } } @@ -226,34 +247,23 @@ static inline struct REnv* uvenv(mrb_state *mrb, mrb_int up) { const struct RProc *proc = mrb->c->ci->proc; - struct REnv *e; while (up--) { proc = proc->upper; if (!proc) return NULL; } - e = MRB_PROC_ENV(proc); + struct REnv *e = MRB_PROC_ENV(proc); if (e) return e; /* proc has enclosed env */ - else { - mrb_callinfo *ci = mrb->c->ci; - mrb_callinfo *cb = mrb->c->cibase; - - while (cb <= ci) { - if (ci->proc == proc) { - return mrb_vm_ci_env(ci); - } - ci--; - } - } return NULL; } static inline const struct RProc* -top_proc(mrb_state *mrb, const struct RProc *proc) +top_proc(mrb_state *mrb, const struct RProc *proc, const struct REnv **envp) { while (proc->upper) { if (MRB_PROC_SCOPE_P(proc) || MRB_PROC_STRICT_P(proc)) return proc; + *envp = proc->e.env; proc = proc->upper; } return proc; @@ -261,7 +271,13 @@ top_proc(mrb_state *mrb, const struct RProc *proc) #define CI_PROC_SET(ci, p) do {\ ci->proc = p;\ - ci->pc = (p && !MRB_PROC_CFUNC_P(p)) ? p->body.irep->iseq : NULL;\ + if (p) {\ + mrb_assert(!MRB_PROC_ALIAS_P(p));\ + ci->pc = (!MRB_PROC_CFUNC_P(p) && p->body.irep) ? p->body.irep->iseq : NULL;\ + }\ + else {\ + ci->pc = NULL;\ + }\ } while (0) void @@ -270,6 +286,13 @@ mrb_vm_ci_proc_set(mrb_callinfo *ci, const struct RProc *p) CI_PROC_SET(ci, p); } +#define MRB_PROC_RESOLVE_ALIAS(ci, p) do {\ + if (MRB_PROC_ALIAS_P(p)) {\ + (ci)->mid = (p)->body.mid;\ + (p) = (p)->upper;\ + }\ +} while (0) + #define CI_TARGET_CLASS(ci) (((ci)->u.env && (ci)->u.env->tt == MRB_TT_ENV)? (ci)->u.env->c : (ci)->u.target_class) struct RClass* @@ -298,8 +321,8 @@ mrb_vm_ci_env(const mrb_callinfo *ci) return CI_ENV(ci); } -void -mrb_vm_ci_env_set(mrb_callinfo *ci, struct REnv *e) +static inline void +ci_env_set(mrb_callinfo *ci, struct REnv *e) { if (ci->u.env) { if (ci->u.env->tt == MRB_TT_ENV) { @@ -311,11 +334,9 @@ mrb_vm_ci_env_set(mrb_callinfo *ci, struct REnv *e) ci->u.target_class = ci->u.env->c; } } - else { - if (e) { - e->c = ci->u.target_class; - ci->u.env = e; - } + else if (e) { + e->c = ci->u.target_class; + ci->u.env = e; } } else { @@ -323,10 +344,26 @@ mrb_vm_ci_env_set(mrb_callinfo *ci, struct REnv *e) } } -#define CINFO_NONE 0 -#define CINFO_SKIP 1 -#define CINFO_DIRECT 2 -#define CINFO_RESUMED 3 +void +mrb_vm_ci_env_set(mrb_callinfo *ci, struct REnv *e) +{ + ci_env_set(ci, e); +} + +MRB_API void +mrb_vm_ci_env_clear(mrb_state *mrb, mrb_callinfo *ci) +{ + struct REnv *e = ci->u.env; + if (e && e->tt == MRB_TT_ENV) { + ci->u.target_class = e->c; + mrb_env_unshare(mrb, e, FALSE); + } +} + +#define CINFO_NONE 0 // called method from mruby VM (without C functions) +#define CINFO_SKIP 1 // ignited mruby VM from C +#define CINFO_DIRECT 2 // called method from C +#define CINFO_RESUMED 3 // resumed by `Fiber.yield` (probably the main call is `mrb_fiber_resume()`) #define BLK_PTR(b) ((mrb_proc_p(b)) ? mrb_proc_ptr(b) : NULL) @@ -335,19 +372,21 @@ cipush(mrb_state *mrb, mrb_int push_stacks, uint8_t cci, struct RClass *target_c const struct RProc *proc, struct RProc *blk, mrb_sym mid, uint16_t argc) { struct mrb_context *c = mrb->c; - mrb_callinfo *ci = c->ci; + mrb_callinfo *ci = c->ci + 1; - if (ci + 1 == c->ciend) { + if (ci < c->ciend) { + c->ci = ci; + } + else { ptrdiff_t size = ci - c->cibase; - if (size > MRB_CALL_LEVEL_MAX) { + if (size >= MRB_CALL_LEVEL_MAX) { mrb_exc_raise(mrb, mrb_obj_value(mrb->stack_err)); } - c->cibase = (mrb_callinfo *)mrb_realloc(mrb, c->cibase, sizeof(mrb_callinfo)*size*2); - c->ci = c->cibase + size; + c->cibase = (mrb_callinfo*)mrb_realloc(mrb, c->cibase, sizeof(mrb_callinfo)*size*2); + c->ci = ci = c->cibase + size; c->ciend = c->cibase + size * 2; } - ci = ++c->ci; ci->mid = mid; CI_PROC_SET(ci, proc); ci->blk = blk; @@ -355,18 +394,67 @@ cipush(mrb_state *mrb, mrb_int push_stacks, uint8_t cci, struct RClass *target_c ci->n = argc & 0xf; ci->nk = (argc>>4) & 0xf; ci->cci = cci; + ci->vis = MRB_METHOD_PUBLIC_FL; ci->u.target_class = target_class; return ci; } +static void +fiber_terminate(mrb_state *mrb, struct mrb_context *c, mrb_callinfo *ci) +{ + mrb_assert(c != mrb->root_c); + + struct REnv *env = CI_ENV(ci); + mrb_assert(env == NULL || MRB_ENV_LEN(env) <= c->stend - ci->stack); + + c->status = MRB_FIBER_TERMINATED; + mrb_free(mrb, c->cibase); + c->cibase = c->ciend = c->ci = NULL; + mrb_value *stack = c->stbase; + c->stbase = c->stend = NULL; + + if (!env) { + mrb_free(mrb, stack); + } + else { + size_t len = (size_t)MRB_ENV_LEN(env); + if (len == 0) { + env->stack = NULL; + MRB_ENV_CLOSE(env); + mrb_free(mrb, stack); + } + else { + mrb_assert(stack == env->stack); + mrb_write_barrier(mrb, (struct RBasic*)env); + + // don't call MRB_ENV_CLOSE() before mrb_realloc(). + // the reason is that env->stack may be freed by mrb_realloc() if MRB_DEBUG + MRB_GC_STRESS are enabled. + // realloc() on a freed heap will cause double-free. + + stack = (mrb_value*)mrb_realloc(mrb, stack, len * sizeof(mrb_value)); + if (mrb_object_dead_p(mrb, (struct RBasic*)env)) { + mrb_free(mrb, stack); + } + else { + env->stack = stack; + MRB_ENV_CLOSE(env); + } + } + } + + /* fiber termination should automatic yield or transfer to root */ + mrb->c = c->prev; + if (!mrb->c) mrb->c = mrb->root_c; + else c->prev = NULL; + mrb->c->status = MRB_FIBER_RUNNING; +} + mrb_bool mrb_env_unshare(mrb_state *mrb, struct REnv *e, mrb_bool noraise) { - if (e == NULL) return TRUE; - if (!MRB_ENV_ONSTACK_P(e)) return TRUE; - if (e->cxt != mrb->c) return TRUE; - if (e == CI_ENV(mrb->c->cibase)) return TRUE; /* for mirb */ + mrb_assert(e != NULL); + mrb_assert(MRB_ENV_ONSTACK_P(e)); size_t len = (size_t)MRB_ENV_LEN(e); if (len == 0) { @@ -376,8 +464,8 @@ mrb_env_unshare(mrb_state *mrb, struct REnv *e, mrb_bool noraise) } size_t live = mrb->gc.live; - mrb_value *p = (mrb_value *)mrb_malloc_simple(mrb, sizeof(mrb_value)*len); - if (live != mrb->gc.live && mrb_object_dead_p(mrb, (struct RBasic *)e)) { + mrb_value *p = (mrb_value*)mrb_malloc_simple(mrb, sizeof(mrb_value)*len); + if (live != mrb->gc.live && mrb_object_dead_p(mrb, (struct RBasic*)e)) { // The e object is now subject to GC inside mrb_malloc_simple(). // Moreover, if NULL is returned due to mrb_malloc_simple() failure, simply ignore it. mrb_free(mrb, p); @@ -387,7 +475,7 @@ mrb_env_unshare(mrb_state *mrb, struct REnv *e, mrb_bool noraise) stack_copy(p, e->stack, len); e->stack = p; MRB_ENV_CLOSE(e); - mrb_write_barrier(mrb, (struct RBasic *)e); + mrb_write_barrier(mrb, (struct RBasic*)e); return TRUE; } else { @@ -406,11 +494,18 @@ static inline mrb_callinfo* cipop(mrb_state *mrb) { struct mrb_context *c = mrb->c; - struct REnv *env = CI_ENV(c->ci); + mrb_callinfo *ci = c->ci; + + /* Fast path: no env and no blk (most common for simple method calls) */ + if (mrb_likely((!ci->u.env || ci->u.env->tt != MRB_TT_ENV) && !ci->blk)) { + c->ci--; + return c->ci; + } - mrb_vm_ci_env_set(c->ci, NULL); // make possible to free by GC if env is not needed - struct RProc *b = c->ci->blk; - if (b && b->tt == MRB_TT_PROC && !MRB_PROC_STRICT_P(b) && MRB_PROC_ENV(b) == CI_ENV(&c->ci[-1])) { + struct REnv *env = CI_ENV(ci); + ci_env_set(ci, NULL); // make possible to free env by GC if not needed + struct RProc *b = ci->blk; + if (b && !MRB_PROC_STRICT_P(b) && MRB_PROC_ENV(b) == CI_ENV(&ci[-1])) { b->flags |= MRB_PROC_ORPHAN; } if (env && !mrb_env_unshare(mrb, env, TRUE)) { @@ -421,12 +516,32 @@ cipop(mrb_state *mrb) return c->ci; } +/** + * @brief Protects a C function call from mruby exceptions. + * + * This function executes a C function (`body`) within a protected environment. + * If an mruby exception occurs during the execution of `body`, this function + * catches the exception, sets the `error` flag, and returns the exception object. + * Otherwise, it returns the result of the `body` function and `error` remains FALSE. + * + * This is crucial for calling mruby-related C functions from within C code + * that needs to handle potential mruby exceptions gracefully. + * + * @param mrb The mruby state. + * @param body A pointer to the C function to be executed. + * The function should have the signature: `mrb_value func(mrb_state *mrb, void *userdata)` + * @param userdata A pointer to arbitrary data that will be passed to the `body` function. + * @param error A pointer to an mrb_bool that will be set to TRUE if an exception + * occurred, and FALSE otherwise. Can be NULL if not needed. + * @return The value returned by the `body` function if no exception occurred, + * or the exception object if an exception occurred. + */ MRB_API mrb_value mrb_protect_error(mrb_state *mrb, mrb_protect_error_func *body, void *userdata, mrb_bool *error) { struct mrb_jmpbuf *prev_jmp = mrb->jmp; struct mrb_jmpbuf c_jmp; - mrb_value result = mrb_nil_value(); + mrb_value result; int ai = mrb_gc_arena_save(mrb); const struct mrb_context *c = mrb->c; ptrdiff_t ci_index = c->ci - c->cibase; @@ -471,39 +586,69 @@ static mrb_value mrb_run(mrb_state *mrb, const struct RProc* proc, mrb_value sel #define MRB_FUNCALL_ARGC_MAX 16 #endif +/** + * @brief Calls a method on an object. + * + * This function invokes a method identified by its name on the `self` object, + * passing the given arguments. + * + * @param mrb The mruby state. + * @param self The receiver object of the method call. + * @param name The name of the method to call (C string). + * @param argc The number of arguments to pass to the method. + * @param ... The variable arguments to pass to the method. + * Each argument must be of type `mrb_value`. + * @return The result of the method call. + * @raise E_ARGUMENT_ERROR if `argc` is greater than `MRB_FUNCALL_ARGC_MAX`. + */ MRB_API mrb_value mrb_funcall(mrb_state *mrb, mrb_value self, const char *name, mrb_int argc, ...) { mrb_value argv[MRB_FUNCALL_ARGC_MAX]; - va_list ap; - mrb_int i; mrb_sym mid = mrb_intern_cstr(mrb, name); if (argc > MRB_FUNCALL_ARGC_MAX) { mrb_raise(mrb, E_ARGUMENT_ERROR, "Too long arguments. (limit=" MRB_STRINGIZE(MRB_FUNCALL_ARGC_MAX) ")"); } + va_list ap; va_start(ap, argc); - for (i = 0; i < argc; i++) { + for (mrb_int i = 0; i < argc; i++) { argv[i] = va_arg(ap, mrb_value); } va_end(ap); return mrb_funcall_argv(mrb, self, mid, argc, argv); } +/** + * @brief Calls a method on an object using a method ID. + * + * This function invokes a method identified by its symbol ID (`mid`) on + * the `self` object, passing the given arguments. Using a method ID + * can be more efficient than using a string name if the method is called + * frequently, as it avoids repeated string-to-symbol lookups. + * + * @param mrb The mruby state. + * @param self The receiver object of the method call. + * @param mid The symbol ID of the method to call. + * @param argc The number of arguments to pass to the method. + * @param ... The variable arguments to pass to the method. + * Each argument must be of type `mrb_value`. + * @return The result of the method call. + * @raise E_ARGUMENT_ERROR if `argc` is greater than `MRB_FUNCALL_ARGC_MAX`. + */ MRB_API mrb_value mrb_funcall_id(mrb_state *mrb, mrb_value self, mrb_sym mid, mrb_int argc, ...) { mrb_value argv[MRB_FUNCALL_ARGC_MAX]; - va_list ap; - mrb_int i; if (argc > MRB_FUNCALL_ARGC_MAX) { mrb_raise(mrb, E_ARGUMENT_ERROR, "Too long arguments. (limit=" MRB_STRINGIZE(MRB_FUNCALL_ARGC_MAX) ")"); } + va_list ap; va_start(ap, argc); - for (i = 0; i < argc; i++) { + for (mrb_int i = 0; i < argc; i++) { argv[i] = va_arg(ap, mrb_value); } va_end(ap); @@ -526,20 +671,24 @@ mrb_bidx(uint8_t n, uint8_t k) return n + 1; /* self + args + kargs */ } +static inline mrb_int +ci_bidx(mrb_callinfo *ci) +{ + return mrb_bidx(ci->n, ci->nk); +} + mrb_int mrb_ci_bidx(mrb_callinfo *ci) { - return mrb_bidx(ci->n, ci->nk); + return ci_bidx(ci); } mrb_int mrb_ci_nregs(mrb_callinfo *ci) { - const struct RProc *p; - if (!ci) return 4; - mrb_int nregs = mrb_ci_bidx(ci) + 1; /* self + args + kargs + blk */ - p = ci->proc; + mrb_int nregs = ci_bidx(ci) + 1; /* self + args + kargs + blk */ + const struct RProc *p = ci->proc; if (p && !MRB_PROC_CFUNC_P(p) && p->body.irep && p->body.irep->nregs > nregs) { return p->body.irep->nregs; } @@ -549,46 +698,46 @@ mrb_ci_nregs(mrb_callinfo *ci) mrb_value mrb_obj_missing(mrb_state *mrb, mrb_value mod); static mrb_method_t -prepare_missing(mrb_state *mrb, mrb_value recv, mrb_sym mid, struct RClass **clsp, uint32_t a, uint16_t *c, mrb_value blk, int super) +prepare_missing(mrb_state *mrb, mrb_callinfo *ci, mrb_value recv, mrb_sym mid, mrb_value blk, mrb_bool super) { mrb_sym missing = MRB_SYM(method_missing); - mrb_callinfo *ci = mrb->c->ci; - uint16_t b = *c; - mrb_int n = b & 0xf; - mrb_int nk = (b>>4) & 0xf; - mrb_value *argv = &ci->stack[a+1]; + mrb_value *argv = &ci->stack[1]; mrb_value args; mrb_method_t m; /* pack positional arguments */ - if (n == 15) args = argv[0]; - else args = mrb_ary_new_from_values(mrb, n, argv); + if (ci->n == 15) args = argv[0]; + else args = mrb_ary_new_from_values(mrb, ci->n, argv); if (mrb_func_basic_p(mrb, recv, missing, mrb_obj_missing)) { method_missing: - if (super) mrb_no_method_error(mrb, mid, args, "no superclass method '%n'", mid); + if (super) mrb_no_method_error(mrb, mid, args, "no superclass method '%n' for %T", mid, recv); else mrb_method_missing(mrb, mid, recv, args); /* not reached */ } if (mid != missing) { - *clsp = mrb_class(mrb, recv); + ci->u.target_class = mrb_class(mrb, recv); } - m = mrb_method_search_vm(mrb, clsp, missing); + m = mrb_vm_find_method(mrb, ci->u.target_class, &ci->u.target_class, missing); if (MRB_METHOD_UNDEF_P(m)) goto method_missing; /* just in case */ - mrb_stack_extend(mrb, a+4); + stack_extend(mrb, 4); - argv = &ci->stack[a+1]; /* maybe reallocated */ - argv[0] = args; - if (nk == 0) { + argv = &ci->stack[1]; /* maybe reallocated */ + if (ci->nk == 0) { argv[1] = blk; } else { - mrb_assert(nk == 15); - argv[1] = argv[n]; + mrb_assert(ci->nk == 15); + if (ci->n != CALL_MAXARGS) { + argv[1] = argv[ci->n]; /* keyword arguments */ + } argv[2] = blk; } - *c = 15 | (uint16_t)(nk<<4); + argv[0] = args; /* must be replaced after saving argv[0] as it may be a keyword argument */ + ci->n = CALL_MAXARGS; + /* ci->nk is already set to zero or CALL_MAXARGS */ mrb_ary_unshift(mrb, args, mrb_symbol_value(mid)); + ci->mid = missing; return m; } @@ -601,13 +750,13 @@ funcall_args_capture(mrb_state *mrb, int stoff, mrb_int argc, const mrb_value *a ci->nk = 0; /* funcall does not support keyword arguments */ if (argc < CALL_MAXARGS) { - int extends = stoff + argc + 2 /* self + block */; + mrb_int extends = stoff + argc + 2 /* self + block */; stack_extend_adjust(mrb, extends, &argv); mrb_value *args = mrb->c->ci->stack + stoff + 1 /* self */; stack_copy(args, argv, argc); args[argc] = block; - ci->n = argc; + ci->n = (uint8_t)argc; } else { int extends = stoff + 3 /* self + splat + block */; @@ -620,7 +769,7 @@ funcall_args_capture(mrb_state *mrb, int stoff, mrb_int argc, const mrb_value *a } } -static mrb_value +static inline mrb_value ensure_block(mrb_state *mrb, mrb_value blk) { if (!mrb_nil_p(blk) && !mrb_proc_p(blk)) { @@ -630,6 +779,24 @@ ensure_block(mrb_state *mrb, mrb_value blk) return blk; } +/** + * @brief Calls a method on an object with a block. + * + * This function invokes a method identified by its symbol ID (`mid`) on + * the `self` object, passing the given arguments (`argv`) and a block (`blk`). + * + * @param mrb The mruby state. + * @param self The receiver object of the method call. + * @param mid The symbol ID of the method to call. + * @param argc The number of arguments in `argv`. + * @param argv A pointer to an array of `mrb_value` arguments. + * @param blk The block to pass to the method. If no block is to be passed, + * use `mrb_nil_value()`. If `blk` is not nil and not a proc, + * it will be converted to a proc using `to_proc`. + * @return The result of the method call. + * @raise E_ARGUMENT_ERROR if `argc` is negative or too large. + * @raise E_STACK_ERROR if the call level exceeds `MRB_CALL_LEVEL_MAX`. + */ MRB_API mrb_value mrb_funcall_with_block(mrb_state *mrb, mrb_value self, mrb_sym mid, mrb_int argc, const mrb_value *argv, mrb_value blk) { @@ -644,7 +811,7 @@ mrb_funcall_with_block(mrb_state *mrb, mrb_value self, mrb_sym mid, mrb_int argc mrb->jmp = &c_jmp; /* recursive call */ val = mrb_funcall_with_block(mrb, self, mid, argc, argv, blk); - mrb->jmp = 0; + mrb->jmp = NULL; } MRB_CATCH(&c_jmp) { /* error */ while (nth_ci < (mrb->c->ci - mrb->c->cibase)) { @@ -654,7 +821,7 @@ mrb_funcall_with_block(mrb_state *mrb, mrb_value self, mrb_sym mid, mrb_int argc val = mrb_obj_value(mrb->exc); } MRB_END_EXC(&c_jmp); - mrb->jmp = 0; + mrb->jmp = NULL; } else { mrb_method_t m; @@ -671,23 +838,27 @@ mrb_funcall_with_block(mrb_state *mrb, mrb_value self, mrb_sym mid, mrb_int argc ci = cipush(mrb, n, CINFO_DIRECT, NULL, NULL, BLK_PTR(blk), 0, 0); funcall_args_capture(mrb, 0, argc, argv, blk, ci); ci->u.target_class = mrb_class(mrb, self); - m = mrb_method_search_vm(mrb, &ci->u.target_class, mid); + m = mrb_vm_find_method(mrb, ci->u.target_class, &ci->u.target_class, mid); if (MRB_METHOD_UNDEF_P(m)) { - uint16_t arginfo = ci->n; - m = prepare_missing(mrb, self, mid, &ci->u.target_class, 0, &arginfo, mrb_nil_value(), 0); - mid = MRB_SYM(method_missing); - ci->n = (arginfo >> 0) & 0x0f; - ci->nk = (arginfo >> 4) & 0x0f; + m = prepare_missing(mrb, ci, self, mid, mrb_nil_value(), FALSE); + } + else { + ci->mid = mid; } - ci->mid = mid; ci->proc = MRB_METHOD_PROC_P(m) ? MRB_METHOD_PROC(m) : NULL; if (MRB_METHOD_CFUNC_P(m)) { + mrb->exc = NULL; ci->stack[0] = self; val = MRB_METHOD_CFUNC(m)(mrb, self); cipop(mrb); + if (mrb->exc != NULL) { + mrb_exc_raise(mrb, mrb_obj_value(mrb->exc)); + } } else { + /* handle alias */ + MRB_PROC_RESOLVE_ALIAS(ci, ci->proc); ci->cci = CINFO_SKIP; val = mrb_run(mrb, ci->proc, self); } @@ -697,6 +868,23 @@ mrb_funcall_with_block(mrb_state *mrb, mrb_value self, mrb_sym mid, mrb_int argc return val; } +/** + * @brief Calls a method on an object with an array of arguments. + * + * This function is similar to `mrb_funcall_with_block` but takes arguments + * as a C array (`argv`) and does not take an explicit block argument. + * If a block is needed, `mrb_funcall_with_block` should be used. + * This function is essentially a convenience wrapper around + * `mrb_funcall_with_block` with `mrb_nil_value()` for the block. + * + * @param mrb The mruby state. + * @param self The receiver object of the method call. + * @param mid The symbol ID of the method to call. + * @param argc The number of arguments in `argv`. + * @param argv A pointer to an array of `mrb_value` arguments. + * @return The result of the method call. + * @see mrb_funcall_with_block + */ MRB_API mrb_value mrb_funcall_argv(mrb_state *mrb, mrb_value self, mrb_sym mid, mrb_int argc, const mrb_value *argv) { @@ -704,41 +892,52 @@ mrb_funcall_argv(mrb_state *mrb, mrb_value self, mrb_sym mid, mrb_int argc, cons } static void -check_method_noarg(mrb_state *mrb, const mrb_callinfo *ci) +check_argument_count(mrb_state *mrb, const mrb_callinfo *ci, mrb_aspec aspec) { - mrb_int argc = ci->n == CALL_MAXARGS ? RARRAY_LEN(ci->stack[1]) : ci->n; - if (ci->nk > 0) { + mrb_int argc = ci->n; + if (mrb_unlikely(argc == CALL_MAXARGS)) { + argc = RARRAY_LEN(ci->stack[1]); + } + /* keyword hash counts as positional if method doesn't accept keywords */ + if (ci->nk > 0 && MRB_ASPEC_KEY(aspec) == 0 && !MRB_ASPEC_KDICT(aspec)) { mrb_value kdict = ci->stack[mrb_ci_kidx(ci)]; - if (!(mrb_hash_p(kdict) && mrb_hash_empty_p(mrb, kdict))) { + if (mrb_hash_p(kdict) && !mrb_hash_empty_p(mrb, kdict)) { argc++; } } - if (argc > 0) { - mrb_argnum_error(mrb, argc, 0, 0); + int min = MRB_ASPEC_REQ(aspec) + MRB_ASPEC_POST(aspec); + int max = MRB_ASPEC_REST(aspec) ? -1 : min + MRB_ASPEC_OPT(aspec); + if (mrb_unlikely(argc < min || (max >= 0 && argc > max))) { + mrb_argnum_error(mrb, argc, min, max); } } static mrb_value -exec_irep(mrb_state *mrb, mrb_value self, struct RProc *p) +exec_irep(mrb_state *mrb, mrb_value self, const struct RProc *p) { mrb_callinfo *ci = mrb->c->ci; - mrb_int keep, nregs; ci->stack[0] = self; + /* handle alias */ + MRB_PROC_RESOLVE_ALIAS(ci, p); CI_PROC_SET(ci, p); if (MRB_PROC_CFUNC_P(p)) { - if (MRB_PROC_NOARG_P(p)) { - check_method_noarg(mrb, ci); + uint32_t caspec_bits = p->flags & MRB_PROC_CASPEC_MASK; + if (caspec_bits != 0) { + check_argument_count(mrb, ci, mrb_proc_decompress_caspec(caspec_bits)); + } + else if (MRB_PROC_NOARG_P(p) && (ci->n > 0 || ci->nk > 0)) { + check_argument_count(mrb, ci, 0); } return MRB_PROC_CFUNC(p)(mrb, self); } - nregs = p->body.irep->nregs; - keep = mrb_ci_bidx(ci)+1; + mrb_int nregs = p->body.irep->nregs; + mrb_int keep = ci_bidx(ci)+1; if (nregs < keep) { - mrb_stack_extend(mrb, keep); + stack_extend(mrb, keep); } else { - mrb_stack_extend(mrb, nregs); + stack_extend(mrb, nregs); stack_clear(ci->stack+keep, nregs-keep); } @@ -748,7 +947,7 @@ exec_irep(mrb_state *mrb, mrb_value self, struct RProc *p) } mrb_value -mrb_exec_irep(mrb_state *mrb, mrb_value self, struct RProc *p) +mrb_exec_irep(mrb_state *mrb, mrb_value self, const struct RProc *p) { mrb_callinfo *ci = mrb->c->ci; if (ci->cci == CINFO_NONE) { @@ -757,16 +956,18 @@ mrb_exec_irep(mrb_state *mrb, mrb_value self, struct RProc *p) else { mrb_value ret; if (MRB_PROC_CFUNC_P(p)) { - if (MRB_PROC_NOARG_P(p)) { - check_method_noarg(mrb, ci); + if (MRB_PROC_NOARG_P(p) && (ci->n > 0 || ci->nk > 0)) { + check_argument_count(mrb, ci, 0); } - cipush(mrb, 0, CINFO_DIRECT, CI_TARGET_CLASS(ci), p, NULL, ci->mid, ci->n|(ci->nk<<4)); + ci = cipush(mrb, 0, CINFO_DIRECT, CI_TARGET_CLASS(ci), p, NULL, ci->mid, ci->n|(ci->nk<<4)); + mrb->exc = NULL; ret = MRB_PROC_CFUNC(p)(mrb, self); cipop(mrb); } else { - mrb_int keep = mrb_ci_bidx(ci) + 1; /* receiver + block */ - ret = mrb_top_run(mrb, p, self, keep); + mrb_int keep = ci_bidx(ci) + 1; /* receiver + block */ + ci = cipush(mrb, 0, CINFO_SKIP, CI_TARGET_CLASS(ci), p, NULL, ci->mid, ci->n|(ci->nk<<4)); + ret = mrb_vm_run(mrb, p, self, keep); } if (mrb->exc && mrb->jmp) { mrb_exc_raise(mrb, mrb_obj_value(mrb->exc)); @@ -775,44 +976,47 @@ mrb_exec_irep(mrb_state *mrb, mrb_value self, struct RProc *p) } } -/* 15.3.1.3.4 */ -/* 15.3.1.3.44 */ -/* - * call-seq: - * obj.send(symbol [, args...]) -> obj - * obj.__send__(symbol [, args...]) -> obj - * - * Invokes the method identified by _symbol_, passing it any - * arguments specified. You can use __send__ if the name - * +send+ clashes with an existing method in _obj_. - * - * class Klass - * def hello(*args) - * "Hello " + args.join(' ') - * end - * end - * k = Klass.new - * k.send :hello, "gentle", "readers" #=> "Hello gentle readers" - */ mrb_value -mrb_f_send(mrb_state *mrb, mrb_value self) +mrb_object_exec(mrb_state *mrb, mrb_value self, struct RClass *target_class) +{ + mrb_callinfo *ci = mrb->c->ci; + mrb_int bidx = ci_bidx(ci); + mrb_value blk = ci->stack[bidx]; + if (mrb_nil_p(blk)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "no block given"); + } + + mrb_assert(mrb_proc_p(blk)); + mrb_gc_protect(mrb, blk); + ci->stack[bidx] = mrb_nil_value(); + mrb_vm_ci_target_class_set(ci, target_class); + return mrb_exec_irep(mrb, self, mrb_proc_ptr(blk)); +} + +static mrb_noreturn void +vis_error(mrb_state *mrb, mrb_sym mid, mrb_value args, mrb_value recv, mrb_bool priv) +{ + mrb_no_method_error(mrb, mid, args, "%s method '%n' called for %T", (priv ? "private" : "protected"), mid, recv); +} + +static mrb_value +send_method(mrb_state *mrb, mrb_value self, mrb_bool pub) { - mrb_sym name; - mrb_value block, *regs; - mrb_method_t m; - struct RClass *c; mrb_callinfo *ci = mrb->c->ci; int n = ci->n; + mrb_sym name; if (ci->cci > CINFO_NONE) { funcall:; const mrb_value *argv; mrb_int argc; + mrb_value block; mrb_get_args(mrb, "n*&", &name, &argv, &argc, &block); return mrb_funcall_with_block(mrb, self, name, argc, argv, block); } - regs = mrb->c->ci->stack+1; + mrb_method_t m; + mrb_value *regs = mrb->c->ci->stack+1; if (n == 0) { argnum_error: @@ -826,12 +1030,28 @@ mrb_f_send(mrb_state *mrb, mrb_value self) name = mrb_obj_to_sym(mrb, regs[0]); } - c = mrb_class(mrb, self); - m = mrb_method_search_vm(mrb, &c, name); - if (MRB_METHOD_UNDEF_P(m)) { /* call method_mising */ + struct RClass *c = mrb_class(mrb, self); + m = mrb_vm_find_method(mrb, c, &c, name); + if (MRB_METHOD_UNDEF_P(m)) { /* call method_missing */ goto funcall; } + if (pub) { + mrb_bool priv = TRUE; + if (m.flags & MRB_METHOD_PRIVATE_FL) { + vis_err:; + if (n == 15) { + n = (int)(RARRAY_LEN(regs[0]) - 1); + regs = RARRAY_PTR(regs[0]); + } + vis_error(mrb, name, mrb_ary_new_from_values(mrb, n, regs+1), self, priv); + } + else if ((m.flags & MRB_METHOD_PROTECTED_FL) && mrb_obj_is_kind_of(mrb, self, ci->u.target_class)) { + priv = FALSE; + goto vis_err; + } + } + ci->mid = name; ci->u.target_class = c; /* remove first symbol from arguments */ @@ -849,18 +1069,66 @@ mrb_f_send(mrb_state *mrb, mrb_value self) ci->n--; } - if (MRB_METHOD_CFUNC_P(m)) { - if (MRB_METHOD_NOARG_P(m)) { - check_method_noarg(mrb, ci); + if (MRB_METHOD_FUNC_P(m)) { + check_argument_count(mrb, ci, MRB_MT_ASPEC(m.flags)); + return MRB_METHOD_FUNC(m)(mrb, self); + } + const struct RProc *p = MRB_METHOD_PROC(m); + MRB_PROC_RESOLVE_ALIAS(ci, p); + CI_PROC_SET(ci, p); + if (MRB_PROC_CFUNC_P(p)) { + uint32_t caspec_bits = p->flags & MRB_PROC_CASPEC_MASK; + if (caspec_bits != 0) { + check_argument_count(mrb, ci, mrb_proc_decompress_caspec(caspec_bits)); } - - if (MRB_METHOD_PROC_P(m)) { - const struct RProc *p = MRB_METHOD_PROC(m); - CI_PROC_SET(ci, p); + else if (MRB_PROC_NOARG_P(p) && (ci->n > 0 || ci->nk > 0)) { + check_argument_count(mrb, ci, 0); } - return MRB_METHOD_CFUNC(m)(mrb, self); + return MRB_PROC_CFUNC(p)(mrb, self); } - return exec_irep(mrb, self, MRB_METHOD_PROC(m)); + return exec_irep(mrb, self, p); +} + +/* 15.3.1.3.4 */ +/* 15.3.1.3.44 */ +/* + * call-seq: + * obj.send(symbol [, args...]) -> obj + * obj.__send__(symbol [, args...]) -> obj + * + * Invokes the method identified by _symbol_, passing it any + * arguments specified. You can use `__send__` if the name + * `send` clashes with an existing method in _obj_. + * + * class Klass + * def hello(*args) + * "Hello " + args.join(' ') + * end + * end + * k = Klass.new + * k.send :hello, "gentle", "readers" #=> "Hello gentle readers" + */ +mrb_value +mrb_f_send(mrb_state *mrb, mrb_value self) +{ + return send_method(mrb, self, FALSE); +} + +/* + * call-seq: + * obj.public_send(symbol [, args...]) -> obj + * + * Invokes the method identified by symbol, passing it any + * arguments specified. Unlike send, public_send calls public methods only. + * When the method is identified by a string, the string is converted to a + * symbol. + * + * 1.public_send(:puts, "hello") # causes NoMethodError + */ +mrb_value +mrb_f_public_send(mrb_state *mrb, mrb_value self) +{ + return send_method(mrb, self, TRUE); } static void @@ -877,35 +1145,34 @@ check_block(mrb_state *mrb, mrb_value blk) static mrb_value eval_under(mrb_state *mrb, mrb_value self, mrb_value blk, struct RClass *c) { - struct RProc *p; - mrb_callinfo *ci; - int nregs; - check_block(mrb, blk); - ci = mrb->c->ci; + mrb_callinfo *ci = mrb->c->ci; if (ci->cci == CINFO_DIRECT) { return mrb_yield_with_class(mrb, blk, 1, &self, self, c); } ci->u.target_class = c; - p = mrb_proc_ptr(blk); + const struct RProc *p = mrb_proc_ptr(blk); + /* just in case irep is NULL; #6065 */ + if (p->body.irep == NULL) return mrb_nil_value(); CI_PROC_SET(ci, p); ci->n = 1; ci->nk = 0; ci->mid = ci[-1].mid; + MRB_CI_SET_VISIBILITY_BREAK(ci); if (MRB_PROC_CFUNC_P(p)) { - mrb_stack_extend(mrb, 4); + stack_extend(mrb, 4); mrb->c->ci->stack[0] = self; mrb->c->ci->stack[1] = self; mrb->c->ci->stack[2] = mrb_nil_value(); return MRB_PROC_CFUNC(p)(mrb, self); } - nregs = p->body.irep->nregs; + int nregs = p->body.irep->nregs; if (nregs < 4) nregs = 4; - mrb_stack_extend(mrb, nregs); + stack_extend(mrb, nregs); mrb->c->ci->stack[0] = self; mrb->c->ci->stack[1] = self; stack_clear(mrb->c->ci->stack+2, nregs-2); - ci = cipush(mrb, 0, 0, NULL, NULL, NULL, 0, 0); + cipush(mrb, 0, 0, NULL, NULL, NULL, 0, 0); return self; } @@ -917,7 +1184,7 @@ eval_under(mrb_state *mrb, mrb_value self, mrb_value blk, struct RClass *c) * mod.module_eval {| | block } -> obj * * Evaluates block in the context of _mod_. This can - * be used to add methods to a class. module_eval returns + * be used to add methods to a class. `module_eval` returns * the result of evaluating its argument. */ mrb_value @@ -937,10 +1204,10 @@ mrb_mod_module_eval(mrb_state *mrb, mrb_value mod) * obj.instance_eval {| | block } -> obj * * Evaluates the given block,within the context of the receiver (_obj_). - * In order to set the context, the variable +self+ is set to _obj_ while + * In order to set the context, the variable `self` is set to _obj_ while * the code is executing, giving the code access to _obj_'s - * instance variables. In the version of instance_eval - * that takes a +String+, the optional second and third + * instance variables. In the version of `instance_eval` + * that takes a `String`, the optional second and third * parameters supply a filename and starting line number that are used * when reporting compilation errors. * @@ -963,29 +1230,40 @@ mrb_obj_instance_eval(mrb_state *mrb, mrb_value self) return eval_under(mrb, self, b, mrb_singleton_class_ptr(mrb, self)); } -MRB_API mrb_value -mrb_yield_with_class(mrb_state *mrb, mrb_value b, mrb_int argc, const mrb_value *argv, mrb_value self, struct RClass *c) +static mrb_value +yield_with_attr(mrb_state *mrb, mrb_value b, mrb_int argc, const mrb_value *argv, mrb_value self, struct RClass *c, + mrb_bool vis_break) { - struct RProc *p; - mrb_sym mid = mrb->c->ci->mid; - mrb_callinfo *ci; - mrb_value val; - mrb_int n; - check_block(mrb, b); - ci = mrb->c->ci; - n = mrb_ci_nregs(ci); - p = mrb_proc_ptr(b); - ci = cipush(mrb, n, CINFO_DIRECT, NULL, NULL, NULL, 0, 0); + + mrb_callinfo *ci = mrb->c->ci; + mrb_int n = mrb_ci_nregs(ci); + const struct RProc *p = mrb_proc_ptr(b); + mrb_sym mid; + + if (MRB_PROC_ENV_P(p)) { + mid = p->e.env->mid; + } + else { + mid = ci->mid; + } + ci = cipush(mrb, n, CINFO_DIRECT, NULL, NULL, NULL, mid, 0); funcall_args_capture(mrb, 0, argc, argv, mrb_nil_value(), ci); ci->u.target_class = c; - ci->mid = mid; ci->proc = p; + if (vis_break) { + MRB_CI_SET_VISIBILITY_BREAK(ci); + } + mrb_value val; if (MRB_PROC_CFUNC_P(p)) { + mrb->exc = NULL; ci->stack[0] = self; val = MRB_PROC_CFUNC(p)(mrb, self); cipop(mrb); + if (mrb->exc && mrb->jmp) { + mrb_exc_raise(mrb, mrb_obj_value(mrb->exc)); + } } else { ci->cci = CINFO_SKIP; @@ -994,31 +1272,94 @@ mrb_yield_with_class(mrb_state *mrb, mrb_value b, mrb_int argc, const mrb_value return val; } +/** + * @brief Yields to a block with a specific `self` object and class context. + * + * This function executes a given block (`b`) with the provided arguments (`argv`). + * The `self` object within the block will be `self`, and the class context + * will be `c`. This allows for more control over the execution environment of + * the block. The `vis_break` flag is set to TRUE, meaning visibility checks + * (public/private/protected) are enforced. + * + * @param mrb The mruby state. + * @param b The block (proc) to yield to. + * @param argc The number of arguments in `argv`. + * @param argv A pointer to an array of `mrb_value` arguments to pass to the block. + * @param self The object that will be `self` inside the block. + * @param c The class context for the block execution. + * @return The result of the block execution. + * @raise E_TYPE_ERROR if `b` is not a proc or nil. + * @see mrb_yield_argv + * @see mrb_yield + */ +MRB_API mrb_value +mrb_yield_with_class(mrb_state *mrb, mrb_value b, mrb_int argc, const mrb_value *argv, mrb_value self, struct RClass *c) +{ + return yield_with_attr(mrb, b, argc, argv, self, c, TRUE); +} + +/** + * @brief Yields to a block with an array of arguments. + * + * This function executes a given block (`b`) with the provided arguments (`argv`). + * The `self` object and class context for the block execution are determined + * from the block itself (its captured environment). + * Visibility checks (public/private/protected) are not strictly enforced + * in the same way as `mrb_yield_with_class` (vis_break is FALSE). + * + * @param mrb The mruby state. + * @param b The block (proc) to yield to. + * @param argc The number of arguments in `argv`. + * @param argv A pointer to an array of `mrb_value` arguments to pass to the block. + * @return The result of the block execution. + * @raise E_TYPE_ERROR if `b` is not a proc or nil. + * @see mrb_yield_with_class + * @see mrb_yield + */ MRB_API mrb_value mrb_yield_argv(mrb_state *mrb, mrb_value b, mrb_int argc, const mrb_value *argv) { - struct RProc *p = mrb_proc_ptr(b); + const struct RProc *p = mrb_proc_ptr(b); + struct RClass *tc; + mrb_value self = mrb_proc_get_self(mrb, p, &tc); - return mrb_yield_with_class(mrb, b, argc, argv, MRB_PROC_ENV(p)->stack[0], MRB_PROC_TARGET_CLASS(p)); + return yield_with_attr(mrb, b, argc, argv, self, tc, FALSE); } +/** + * @brief Yields to a block with a single argument. + * + * This function executes a given block (`b`) with a single argument (`arg`). + * It's a convenience function for the common case of yielding with one argument. + * The `self` object and class context for the block execution are determined + * from the block itself. + * Visibility checks are not strictly enforced (vis_break is FALSE). + * + * @param mrb The mruby state. + * @param b The block (proc) to yield to. + * @param arg The single `mrb_value` argument to pass to the block. + * @return The result of the block execution. + * @raise E_TYPE_ERROR if `b` is not a proc or nil. + * @see mrb_yield_with_class + * @see mrb_yield_argv + */ MRB_API mrb_value mrb_yield(mrb_state *mrb, mrb_value b, mrb_value arg) { - struct RProc *p = mrb_proc_ptr(b); + const struct RProc *p = mrb_proc_ptr(b); + struct RClass *tc; + mrb_value self = mrb_proc_get_self(mrb, p, &tc); - return mrb_yield_with_class(mrb, b, 1, &arg, MRB_PROC_ENV(p)->stack[0], MRB_PROC_TARGET_CLASS(p)); + return yield_with_attr(mrb, b, 1, &arg, self, tc, FALSE); } mrb_value mrb_yield_cont(mrb_state *mrb, mrb_value b, mrb_value self, mrb_int argc, const mrb_value *argv) { - struct RProc *p; - mrb_callinfo *ci; - check_block(mrb, b); - p = mrb_proc_ptr(b); - ci = mrb->c->ci; + + const struct RProc *p = mrb_proc_ptr(b); + mrb_callinfo *ci = mrb->c->ci; stack_extend_adjust(mrb, 4, &argv); mrb->c->ci->stack[1] = mrb_ary_new_from_values(mrb, argc, argv); @@ -1031,13 +1372,8 @@ mrb_yield_cont(mrb_state *mrb, mrb_value b, mrb_value self, mrb_int argc, const #define RBREAK_TAG_FOREACH(f) \ f(RBREAK_TAG_BREAK, 0) \ - f(RBREAK_TAG_BREAK_UPPER, 1) \ - f(RBREAK_TAG_BREAK_INTARGET, 2) \ - f(RBREAK_TAG_RETURN_BLOCK, 3) \ - f(RBREAK_TAG_RETURN, 4) \ - f(RBREAK_TAG_RETURN_TOPLEVEL, 5) \ - f(RBREAK_TAG_JUMP, 6) \ - f(RBREAK_TAG_STOP, 7) + f(RBREAK_TAG_JUMP, 1) \ + f(RBREAK_TAG_STOP, 2) #define RBREAK_TAG_DEFINE(tag, i) tag = i, enum { @@ -1063,12 +1399,12 @@ mrb_break_tag_set(struct RBreak *brk, uint32_t tag) } static struct RBreak* -break_new(mrb_state *mrb, uint32_t tag, const struct RProc *p, mrb_value val) +break_new(mrb_state *mrb, uint32_t tag, const mrb_callinfo *return_ci, mrb_value val) { - struct RBreak *brk; + mrb_assert((size_t)(return_ci - mrb->c->cibase) <= (size_t)(mrb->c->ci - mrb->c->cibase)); - brk = MRB_OBJ_ALLOC(mrb, MRB_TT_BREAK, NULL); - mrb_break_proc_set(brk, p); + struct RBreak *brk = MRB_OBJ_ALLOC(mrb, MRB_TT_BREAK, NULL); + brk->ci_break_index = return_ci - mrb->c->cibase; mrb_break_value_set(brk, val); mrb_break_tag_set(brk, tag); @@ -1080,28 +1416,21 @@ break_new(mrb_state *mrb, uint32_t tag, const struct RProc *p, mrb_value val) #define MRB_CATCH_FILTER_ALL (MRB_CATCH_FILTER_RESCUE | MRB_CATCH_FILTER_ENSURE) static const struct mrb_irep_catch_handler * -catch_handler_find(mrb_state *mrb, mrb_callinfo *ci, const mrb_code *pc, uint32_t filter) +catch_handler_find(const mrb_irep *irep, const mrb_code *pc, uint32_t filter) { - const mrb_irep *irep; - ptrdiff_t xpc; - size_t cnt; - const struct mrb_irep_catch_handler *e; - /* The comparison operators use `>` and `<=` because pc already points to the next instruction */ #define catch_cover_p(pc, beg, end) ((pc) > (ptrdiff_t)(beg) && (pc) <= (ptrdiff_t)(end)) - if (ci->proc == NULL || MRB_PROC_CFUNC_P(ci->proc)) return NULL; - irep = ci->proc->body.irep; - if (irep->clen < 1) return NULL; - xpc = pc - irep->iseq; + mrb_assert(irep && irep->clen > 0); + ptrdiff_t xpc = pc - irep->iseq; /* If it retry at the top level, pc will be 0, so check with -1 as the start position */ mrb_assert(catch_cover_p(xpc, -1, irep->ilen)); if (!catch_cover_p(xpc, -1, irep->ilen)) return NULL; /* Currently uses a simple linear search to avoid processing complexity. */ - cnt = irep->clen; - e = mrb_irep_catch_handler_table(irep) + cnt - 1; - for (; cnt > 0; cnt --, e --) { + size_t cnt = irep->clen; + const struct mrb_irep_catch_handler *e = mrb_irep_catch_handler_table(irep) + cnt - 1; + for (; cnt > 0; cnt--, e--) { if (((UINT32_C(1) << e->type) & filter) && catch_cover_p(xpc, mrb_irep_catch_handler_unpack(e->begin), mrb_irep_catch_handler_unpack(e->end))) { return e; @@ -1113,33 +1442,31 @@ catch_handler_find(mrb_state *mrb, mrb_callinfo *ci, const mrb_code *pc, uint32_ return NULL; } -typedef enum { - LOCALJUMP_ERROR_RETURN = 0, - LOCALJUMP_ERROR_BREAK = 1, - LOCALJUMP_ERROR_YIELD = 2 -} localjump_error_kind; +#define RAISE_EXC(mrb, exc) do { \ + mrb_value exc_value = (exc); \ + mrb_exc_set(mrb, exc_value); \ + goto L_RAISE; \ +} while (0) -static void -localjump_error(mrb_state *mrb, localjump_error_kind kind) -{ - char kind_str[3][7] = { "return", "break", "yield" }; - char kind_str_len[] = { 6, 5, 5 }; - static const char lead[] = "unexpected "; - mrb_value msg; - mrb_value exc; - - msg = mrb_str_new_capa(mrb, sizeof(lead) + 7); - mrb_str_cat(mrb, msg, lead, sizeof(lead) - 1); - mrb_str_cat(mrb, msg, kind_str[kind], kind_str_len[kind]); - exc = mrb_exc_new_str(mrb, E_LOCALJUMP_ERROR, msg); - mrb_exc_set(mrb, exc); -} +#define RAISE_LIT(mrb, c, str) RAISE_EXC(mrb, mrb_exc_new_lit(mrb, c, str)) +#define RAISE_FORMAT(mrb, c, fmt, ...) RAISE_EXC(mrb, mrb_exc_new_str(mrb, c, mrb_format(mrb, fmt, __VA_ARGS__))) + +/* return codes for extracted opcode handlers */ +#define VM_NEXT 0 /* continue to next instruction */ +#define VM_RAISE 1 /* exception: goto L_RAISE */ +#define VM_SEND_SYM 2 /* fallback send: goto L_SEND_SYM */ +#define VM_SENDB_SYM 3 /* fallback sendb: goto L_SENDB_SYM */ +#define VM_RETURN_NIL 4 /* nil irep: return nil via L_OP_RETURN */ + +#if defined(__GNUC__) || defined(__clang__) +#define MRB_FLATTEN __attribute__((flatten)) +#else +#define MRB_FLATTEN +#endif static void argnum_error(mrb_state *mrb, mrb_int num) { - mrb_value exc; - mrb_value str; mrb_int argc = mrb->c->ci->n; if (argc == 15) { @@ -1151,8 +1478,8 @@ argnum_error(mrb_state *mrb, mrb_int num) if (argc == 0 && mrb->c->ci->nk != 0 && !mrb_hash_empty_p(mrb, mrb->c->ci->stack[1])) { argc++; } - str = mrb_format(mrb, "wrong number of arguments (given %i, expected %i)", argc, num); - exc = mrb_exc_new_str(mrb, E_ARGUMENT_ERROR, str); + mrb_value str = mrb_format(mrb, "wrong number of arguments (given %i, expected %i)", argc, num); + mrb_value exc = mrb_exc_new_str(mrb, E_ARGUMENT_ERROR, str); mrb_exc_set(mrb, exc); } @@ -1163,27 +1490,28 @@ break_tag_p(struct RBreak *brk, uint32_t tag) } static void -prepare_tagged_break(mrb_state *mrb, uint32_t tag, const struct RProc *proc, mrb_value val) +prepare_tagged_break(mrb_state *mrb, uint32_t tag, const mrb_callinfo *return_ci, mrb_value val) { if (break_tag_p((struct RBreak*)mrb->exc, tag)) { mrb_break_tag_set((struct RBreak*)mrb->exc, tag); } else { - mrb->exc = (struct RObject*)break_new(mrb, tag, proc, val); + mrb->exc = (struct RObject*)break_new(mrb, tag, return_ci, val); } } -#define THROW_TAGGED_BREAK(mrb, tag, proc, val) \ +#define THROW_TAGGED_BREAK(mrb, tag, return_ci, val) \ do { \ - prepare_tagged_break(mrb, tag, proc, val); \ + prepare_tagged_break(mrb, tag, return_ci, val); \ goto L_CATCH_TAGGED_BREAK; \ } while (0) -#define UNWIND_ENSURE(mrb, ci, pc, tag, proc, val) \ +#define UNWIND_ENSURE(mrb, ci, pc, tag, return_ci, val) \ do { \ - ch = catch_handler_find(mrb, ci, pc, MRB_CATCH_FILTER_ENSURE); \ - if (ch) { \ - THROW_TAGGED_BREAK(mrb, tag, proc, val); \ + const struct RProc *proc = (ci)->proc; \ + if (proc && !MRB_PROC_CFUNC_P(proc) && (irep = proc->body.irep) && irep->clen > 0 && \ + (ch = catch_handler_find(irep, pc, MRB_CATCH_FILTER_ENSURE))) { \ + THROW_TAGGED_BREAK(mrb, tag, return_ci, val); \ } \ } while (0) @@ -1232,38 +1560,121 @@ prepare_tagged_break(mrb_state *mrb, uint32_t tag, const struct RProc *proc, mrb #define BYTECODE_DECODER(x) (x) #endif -#ifndef MRB_NO_DIRECT_THREADING -#if defined __GNUC__ || defined __clang__ || defined __INTEL_COMPILER -#define DIRECT_THREADED +#ifndef MRB_USE_VM_SWITCH_DISPATCH +#if !defined __GNUC__ && !defined __clang__ && !defined __INTEL_COMPILER +#define MRB_USE_VM_SWITCH_DISPATCH #endif -#endif /* ifndef MRB_NO_DIRECT_THREADING */ +#endif /* ifndef MRB_USE_VM_SWITCH_DISPATCH */ -#ifndef DIRECT_THREADED +#ifdef MRB_USE_VM_SWITCH_DISPATCH -#define INIT_DISPATCH for (;;) { insn = BYTECODE_DECODER(*pc); CODE_FETCH_HOOK(mrb, irep, pc, regs); switch (insn) { -#define CASE(insn,ops) case insn: pc++; FETCH_ ## ops (); mrb->c->ci->pc = pc; L_ ## insn ## _BODY: +#define INIT_DISPATCH for (;;) { CALL_CODE_HOOKS(); switch (insn) { +#define CASE(insn,ops) case insn: DECODE_OPERANDS(ops); L_ ## insn ## _BODY: #define NEXT goto L_END_DISPATCH #define JUMP NEXT -#define END_DISPATCH L_END_DISPATCH:;}} +#define END_DISPATCH L_END_DISPATCH: RETURN_IF_TASK_STOPPED(mrb);}} #else #define INIT_DISPATCH JUMP; return mrb_nil_value(); -#define CASE(insn,ops) L_ ## insn: pc++; FETCH_ ## ops (); mrb->c->ci->pc = pc; L_ ## insn ## _BODY: -#define NEXT insn=BYTECODE_DECODER(*pc); CODE_FETCH_HOOK(mrb, irep, pc, regs); goto *optable[insn] +#define CASE(insn,ops) L_ ## insn: DECODE_OPERANDS(ops); L_ ## insn ## _BODY: +#define NEXT RETURN_IF_TASK_STOPPED(mrb); CALL_CODE_HOOKS(); goto *optable[insn] #define JUMP NEXT - -#define END_DISPATCH +#define END_DISPATCH RETURN_IF_TASK_STOPPED(mrb) #endif -MRB_API mrb_value -mrb_vm_run(mrb_state *mrb, const struct RProc *proc, mrb_value self, mrb_int stack_keep) +#define DECODE_OPERANDS(ops) do { const mrb_code *pc = ci->pc+1; FETCH_ ## ops (); ci->pc = pc; } while (0) +#define CALL_CODE_HOOKS() do { insn = BYTECODE_DECODER(*ci->pc); CODE_FETCH_HOOK(mrb, irep, ci->pc, regs); } while (0) + +#ifdef MRB_USE_TASK_SCHEDULER +/* TRUE when the current context is executing across a C call boundary, i.e. + a C function on the stack re-entered the VM (mrb_funcall / mrb_yield / + mrb_vm_run). A task cannot be suspended at such a point: the C stack + frame between the scheduler's mrb_vm_exec and the current frame cannot + be saved or restored, and returning early from the inner mrb_vm_exec + would leave the call-info stack drifted, tripping the assertion in + mrb_vm_run (issues #6864, #6868). The scheduler defers the switch until + execution unwinds back to a frame with no C boundary. This mirrors the + cooperative guard in Task.pass, which raises rather than defers. + cibase is excluded: it is the entry frame of this mrb_vm_exec. */ +static mrb_bool +task_across_c_boundary(mrb_state *mrb) { - const mrb_irep *irep = proc->body.irep; - mrb_value result; - struct mrb_context *c = mrb->c; + for (mrb_callinfo *ci = mrb->c->ci; ci > mrb->c->cibase; ci--) { + if (ci->cci > 0) return TRUE; + } + return FALSE; +} + +/* Defer task switches while a C-level ObjectSpace walk holds gc.iterating + true. The walk runs callbacks (which may call back into mrb_vm_exec via + mrb_yield); returning early from an inner exec while the outer C + iteration is still active drifts the call-info stack and eventually + crashes (issue #6862). Switches resume at the next OP boundary after + the walk releases gc.iterating. A pending switch is also deferred while + executing across a C call boundary (see task_across_c_boundary). A + pending MRB_TASK_STOPPED is not deferred, since the task is going away. + + mrb->jmp is restored to prev_jmp before returning, exactly as the + normal return paths below do. mrb_vm_exec set mrb->jmp to its own + stack-local c_jmp on entry; leaving it dangling after this early return + means a later raise longjmps into a freed frame (issue #6863). + + This macro must only be expanded where prev_jmp is in scope, i.e. + inside mrb_vm_exec (via NEXT / END_DISPATCH). */ +#define RETURN_IF_TASK_STOPPED(mrb) do { \ + if (((mrb)->task.switching && !(mrb)->gc.iterating && \ + !task_across_c_boundary(mrb)) || \ + (mrb)->c->status == MRB_TASK_STOPPED) { \ + (mrb)->jmp = prev_jmp; \ + return mrb_nil_value(); \ + } \ +} while (0) +#define TASK_STOP(mrb) do { \ + if (mrb->c->status != MRB_TASK_STOPPED) \ + mrb->c->status = MRB_TASK_STOPPED; \ +} while (0) +#define TASK_RETURN_EXCEPTION_AS_VALUE(mrb) ((mrb)->task.exception_as_result) +#else +#define RETURN_IF_TASK_STOPPED(mrb) +#define TASK_STOP(mrb) +#define TASK_RETURN_EXCEPTION_AS_VALUE(mrb) FALSE +#endif + +/** + * @brief Executes a mruby bytecode sequence (iseq) within the VM. + * + * This function is a core part of the mruby execution process. It sets up + * the VM environment for executing the bytecode instructions associated with + * the given proc (Ruby procedure/method). + * + * It initializes the stack if necessary, extends it to accommodate the + * required number of registers for the proc, and then calls `mrb_vm_exec` + * to actually execute the bytecode. + * + * @param mrb The mruby state. + * @param proc The RProc object containing the bytecode (iseq) to execute. + * This proc represents a Ruby method or block. + * @param self The `self` object for the context of this execution. + * @param stack_keep The number of values to preserve on the stack from the + * previous context. This is used for managing nested calls + * and ensuring that arguments or local variables from the + * caller are accessible if needed, or that the stack is + * correctly cleared. + * @return The result of the bytecode execution (typically the value of the + * last evaluated expression). + * @see mrb_vm_exec + * @see mrb_top_run + */ +MRB_API mrb_value +mrb_vm_run(mrb_state *mrb, const struct RProc *proc, mrb_value self, mrb_int stack_keep) +{ + const mrb_irep *irep = proc->body.irep; + struct mrb_context *c = mrb->c; +#ifdef MRB_DEBUG ptrdiff_t cioff = c->ci - c->cibase; +#endif mrb_int nregs = irep->nregs; if (!c->stbase) { @@ -1271,19 +1682,19 @@ mrb_vm_run(mrb_state *mrb, const struct RProc *proc, mrb_value self, mrb_int sta } if (stack_keep > nregs) nregs = stack_keep; - mrb_stack_extend(mrb, nregs); - stack_clear(c->ci->stack + stack_keep, nregs - stack_keep); - c->ci->stack[0] = self; - result = mrb_vm_exec(mrb, proc, irep->iseq); - if (mrb->c != c) { - if (mrb->c->fib) { - mrb_write_barrier(mrb, (struct RBasic*)mrb->c->fib); + else { + struct REnv *e = CI_ENV(mrb->c->ci); + if (e && (stack_keep == 0 || irep->nlocals < MRB_ENV_LEN(e))) { + ci_env_set(mrb->c->ci, NULL); + mrb_env_unshare(mrb, e, FALSE); } - mrb->c = c; - } - else if (c->ci - c->cibase > cioff) { - c->ci = c->cibase + cioff; } + stack_extend(mrb, nregs); + stack_clear(c->ci->stack + stack_keep, nregs - stack_keep); + c->ci->stack[0] = self; + mrb_value result = mrb_vm_exec(mrb, proc, irep->iseq); + mrb_assert(mrb->c == c); /* do not switch fibers via mrb_vm_run(), unlike mrb_vm_exec() */ + mrb_assert(c->ci == c->cibase || (c->ci - c->cibase) == cioff - 1); return result; } @@ -1292,42 +1703,559 @@ check_target_class(mrb_state *mrb) { struct RClass *target = CI_TARGET_CLASS(mrb->c->ci); if (!target) { - return mrb->object_class; + mrb_raise(mrb, E_TYPE_ERROR, "no class/module to add method"); } return target; } -#define regs (mrb->c->ci->stack) +#define regs (ci->stack) static mrb_value hash_new_from_regs(mrb_state *mrb, mrb_int argc, mrb_int idx) { mrb_value hash = mrb_hash_new_capa(mrb, argc); + mrb_callinfo *ci = mrb->c->ci; while (argc--) { mrb_hash_set(mrb, hash, regs[idx+0], regs[idx+1]); + ci = mrb->c->ci; idx += 2; } return hash; } -static mrb_value -ary_new_from_regs(mrb_state *mrb, mrb_int argc, mrb_int idx) +#define ary_new_from_regs(mrb, argc, idx) mrb_ary_new_from_values(mrb, (argc), ®s[idx]); + +/* type pair for arithmetic/comparison dispatch */ +#define TYPES2(a,b) ((((uint16_t)(a))<<8)|(((uint16_t)(b))&0xff)) + +/* + * Extracted opcode handlers. + * These are static functions force-inlined back into mrb_vm_exec via + * __attribute__((flatten)). The source stays clean while the compiled + * output is identical to having the code inline. + * + * Return VM_NEXT to continue, VM_RAISE when an exception has been set. + * VM_SEND_SYM/VM_SENDB_SYM for method fallback (mid set via out-param). + */ + +static int +vm_op_blkpush(mrb_state *mrb, uint32_t a, uint16_t b) { - mrb_value ary = mrb_ary_new_capa(mrb, argc); - while (argc--) { - mrb_ary_push(mrb, ary, regs[idx]); - idx++; + mrb_callinfo *ci = mrb->c->ci; + int m1 = (b>>11)&0x3f; + int r = (b>>10)&0x1; + int m2 = (b>>5)&0x1f; + int kd = (b>>4)&0x1; + int lv = (b>>0)&0xf; + int offset = m1+r+m2+kd; + mrb_value *stack; + + if (lv == 0) stack = regs + 1; + else { + struct REnv *e = uvenv(mrb, lv-1); + if (!e || (!MRB_ENV_ONSTACK_P(e) && e->mid == 0) || + MRB_ENV_LEN(e) <= offset+1) { + mrb_exc_set(mrb, mrb_exc_new_lit(mrb, E_LOCALJUMP_ERROR, "unexpected yield")); + return VM_RAISE; + } + stack = e->stack + 1; } - return ary; + if (mrb_nil_p(stack[offset])) { + mrb_exc_set(mrb, mrb_exc_new_lit(mrb, E_LOCALJUMP_ERROR, "unexpected yield")); + return VM_RAISE; + } + regs[a] = stack[offset]; + return VM_NEXT; } -MRB_API mrb_value -mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) +static int +vm_op_argary(mrb_state *mrb, uint32_t a, uint16_t b) { - /* mrb_assert(MRB_PROC_CFUNC_P(proc)) */ - const mrb_irep *irep = proc->body.irep; - const mrb_pool_value *pool = irep->pool; - const mrb_sym *syms = irep->syms; + mrb_callinfo *ci = mrb->c->ci; + mrb_int m1 = (b>>11)&0x3f; + mrb_int r = (b>>10)&0x1; + mrb_int m2 = (b>>5)&0x1f; + mrb_int kd = (b>>4)&0x1; + mrb_int lv = (b>>0)&0xf; + mrb_value *stack; + + if (ci->mid == 0 || CI_TARGET_CLASS(ci) == NULL) { + L_NOSUPER: + mrb_exc_set(mrb, mrb_exc_new_lit(mrb, E_NOMETHOD_ERROR, "super called outside of method")); + return VM_RAISE; + } + if (lv == 0) stack = regs + 1; + else { + struct REnv *e = uvenv(mrb, lv-1); + if (!e) goto L_NOSUPER; + if (MRB_ENV_LEN(e) <= m1+r+m2+1) + goto L_NOSUPER; + stack = e->stack + 1; + } + if (r == 0) { + regs[a] = mrb_ary_new_from_values(mrb, m1+m2, stack); + } + else { + mrb_value *pp = NULL; + struct RArray *rest; + mrb_int len = 0; + + if (mrb_array_p(stack[m1])) { + struct RArray *ary = mrb_ary_ptr(stack[m1]); + + pp = ARY_PTR(ary); + len = ARY_LEN(ary); + } + regs[a] = mrb_ary_new_capa(mrb, m1+len+m2); + rest = mrb_ary_ptr(regs[a]); + if (m1 > 0) { + stack_copy(ARY_PTR(rest), stack, m1); + } + if (len > 0) { + stack_copy(ARY_PTR(rest)+m1, pp, len); + } + if (m2 > 0) { + stack_copy(ARY_PTR(rest)+m1+len, stack+m1+1, m2); + } + ARY_SET_LEN(rest, m1+len+m2); + } + if (kd) { + regs[a+1] = stack[m1+r+m2]; + regs[a+2] = stack[m1+r+m2+1]; + } + else { + regs[a+1] = stack[m1+r+m2]; + } + return VM_NEXT; +} + +static int +vm_op_enter(mrb_state *mrb, uint32_t a) +{ + mrb_callinfo *ci = mrb->c->ci; + const mrb_irep *irep = ci->proc->body.irep; + mrb_int argc = ci->n; + mrb_value *argv = regs+1; + + mrb_int m1 = MRB_ASPEC_REQ(a); + + /* no other args */ + if ((a & ~0x7c0001) == 0 && argc < 15 && MRB_PROC_STRICT_P(ci->proc)) { + if (mrb_unlikely(argc+(ci->nk==15) != m1)) { /* count kdict too */ + argnum_error(mrb, m1); + return VM_RAISE; + } + /* clear local (but non-argument) variables */ + mrb_int pos = m1+2; /* self+m1+blk */ + if (irep->nlocals-pos > 0) { + stack_clear(®s[pos], irep->nlocals-pos); + } + return VM_NEXT; + } + + mrb_int o = MRB_ASPEC_OPT(a); + mrb_int r = MRB_ASPEC_REST(a); + mrb_int m2 = MRB_ASPEC_POST(a); + mrb_int kd = (MRB_ASPEC_KEY(a) > 0 || MRB_ASPEC_KDICT(a))? 1 : 0; + /* unused + int b = MRB_ASPEC_BLOCK(a); + */ + mrb_int const len = m1 + o + r + m2; + + mrb_value * const argv0 = argv; + mrb_value blk = regs[ci_bidx(ci)]; + + /* &nil: reject block */ + if (MRB_ASPEC_NOBLOCK(a) && !mrb_nil_p(blk)) { + mrb_exc_set(mrb, mrb_exc_new_lit(mrb, E_ARGUMENT_ERROR, "no block accepted")); + return VM_RAISE; + } + + mrb_value kdict = mrb_nil_value(); + + /* keyword arguments */ + if (ci->nk == 15) { + kdict = regs[mrb_ci_kidx(ci)]; + } + if (!kd) { + if (!mrb_nil_p(kdict) && mrb_hash_p(kdict) && mrb_hash_size(mrb, kdict) > 0) { + if (argc < 14) { + ci->n++; + argc++; /* include kdict in normal arguments */ + } + else if (argc == 14) { + /* pack arguments and kdict */ + regs[1] = mrb_ary_new_from_values(mrb, argc+1, ®s[1]); + argc = ci->n = 15; + } + else {/* argc == 15 */ + /* push kdict to packed arguments */ + mrb_ary_push(mrb, regs[1], kdict); + } + } + kdict = mrb_nil_value(); + ci->nk = 0; + } + else if (!mrb_nil_p(kdict)) { + mrb_gc_protect(mrb, kdict); + } + + /* arguments is passed with Array */ + if (argc == 15) { + struct RArray *ary = mrb_ary_ptr(regs[1]); + argv = ARY_PTR(ary); + argc = (int)ARY_LEN(ary); + mrb_gc_protect(mrb, regs[1]); + } + + /* strict argument check */ + if (ci->proc && MRB_PROC_STRICT_P(ci->proc)) { + if (mrb_unlikely(argc < m1 + m2 || (r == 0 && argc > len))) { + argnum_error(mrb, m1+m2); + return VM_RAISE; + } + } + /* extract first argument array to arguments */ + else if (len > 1 && argc == 1 && mrb_array_p(argv[0])) { + mrb_gc_protect(mrb, argv[0]); + argc = (int)RARRAY_LEN(argv[0]); + argv = RARRAY_PTR(argv[0]); + } + + /* rest arguments */ + mrb_value rest; + if (argc < len) { + mrb_int mlen = m2; + if (argc < m1+m2) { + mlen = m1 < argc ? argc - m1 : 0; + } + + /* copy mandatory and optional arguments */ + if (argv0 != argv && argv) { + value_move(®s[1], argv, argc-mlen); /* m1 + o */ + } + if (argc < m1) { + stack_clear(®s[argc+1], m1-argc); + } + /* copy post mandatory arguments */ + if (mlen) { + value_move(®s[len-m2+1], &argv[argc-mlen], mlen); + } + if (mlen < m2) { + stack_clear(®s[len-m2+mlen+1], m2-mlen); + } + /* initialize rest arguments with empty Array */ + if (r) { + rest = mrb_ary_new_capa(mrb, 0); + regs[m1+o+1] = rest; + } + /* skip initializer of passed arguments */ + if (o > 0 && argc > m1+m2) + ci->pc += (argc - m1 - m2)*3; + } + else { + mrb_int rnum = 0; + if (argv0 != argv) { + mrb_gc_protect(mrb, blk); + value_move(®s[1], argv, m1+o); + } + if (r) { + rnum = argc-m1-o-m2; + rest = mrb_ary_new_from_values(mrb, rnum, argv+m1+o); + regs[m1+o+1] = rest; + } + if (m2 > 0 && argc-m2 > m1) { + value_move(®s[m1+o+r+1], &argv[m1+o+rnum], m2); + } + ci->pc += o*3; + } + + /* need to be update blk first to protect blk from GC */ + mrb_int const kw_pos = len + kd; /* where kwhash should be */ + mrb_int const blk_pos = kw_pos + 1; /* where block should be */ + regs[blk_pos] = blk; /* move block */ + if (kd) { + if (mrb_nil_p(kdict)) { + kdict = mrb_hash_new_capa(mrb, 0); + } + regs[kw_pos] = kdict; /* set kwhash */ + ci->nk = 15; + } + + /* format arguments for generated code */ + ci->n = (uint8_t)len; + + /* clear local (but non-argument) variables */ + if (irep->nlocals-blk_pos-1 > 0) { + stack_clear(®s[blk_pos+1], irep->nlocals-blk_pos-1); + } + return VM_NEXT; +} + +static int +vm_op_getidx(mrb_state *mrb, uint32_t a, mrb_sym *midp) +{ + mrb_callinfo *ci = mrb->c->ci; + mrb_value va = regs[a], vb = regs[a+1]; + enum mrb_vtype tt = mrb_type(va); + + /* Array case is most common - check first with branch hint */ + if (mrb_likely(tt == MRB_TT_ARRAY)) { + struct RArray *ary = mrb_ary_ptr(va); + /* optimize only for Array class; subclasses/singleton may override [] */ + if (mrb_unlikely(ary->c != mrb->array_class)) goto getidx_fallback; + if (mrb_likely(mrb_integer_p(vb))) { + mrb_int idx = mrb_integer(vb); + mrb_int len; + mrb_value *ptr; + + /* Single ARY_EMBED_P check instead of two */ +#ifndef MRB_ARY_NO_EMBED + if (ARY_EMBED_P(ary)) { + len = ARY_EMBED_LEN(ary); + ptr = ary->as.ary; + } + else +#endif + { + len = ary->as.heap.len; + ptr = ary->as.heap.ptr; + } + + /* Unsigned comparison: handles negative idx as large positive */ + if (mrb_likely((mrb_uint)idx < (mrb_uint)len)) { + regs[a] = ptr[idx]; + } + else { + regs[a] = mrb_ary_entry(va, idx); + } + return VM_NEXT; + } + goto getidx_fallback; + } + else if (tt == MRB_TT_HASH) { + /* optimize only for Hash class; subclasses/singleton may override [] */ + if (mrb_obj_ptr(va)->c != mrb->hash_class) goto getidx_fallback; + va = mrb_hash_get(mrb, va, vb); + ci = mrb->c->ci; + regs[a] = va; + return VM_NEXT; + } + else if (tt == MRB_TT_STRING) { + /* optimize only for String class; subclasses/singleton may override [] */ + if (mrb_obj_ptr(va)->c != mrb->string_class) goto getidx_fallback; + switch (mrb_type(vb)) { + case MRB_TT_INTEGER: + case MRB_TT_STRING: + case MRB_TT_RANGE: + va = mrb_str_aref(mrb, va, vb, mrb_undef_value()); + regs[a] = va; + return VM_NEXT; + default: + break; + } + } +getidx_fallback: + *midp = MRB_OPSYM(aref); + return VM_SEND_SYM; +} + +static int +vm_op_getidx0(mrb_state *mrb, uint32_t a, uint16_t b, mrb_sym *midp) +{ + mrb_callinfo *ci = mrb->c->ci; + mrb_value recv = regs[b]; + enum mrb_vtype tt = mrb_type(recv); + + if (mrb_likely(tt == MRB_TT_ARRAY)) { + struct RArray *ary = mrb_ary_ptr(recv); + if (mrb_unlikely(ary->c != mrb->array_class)) goto getidx0_fallback; +#ifndef MRB_ARY_NO_EMBED + if (ARY_EMBED_P(ary)) { + regs[a] = ARY_EMBED_LEN(ary) > 0 ? ary->as.ary[0] : mrb_nil_value(); + } + else +#endif + { + regs[a] = ary->as.heap.len > 0 ? ary->as.heap.ptr[0] : mrb_nil_value(); + } + return VM_NEXT; + } + else if (tt == MRB_TT_HASH) { + if (mrb_obj_ptr(recv)->c != mrb->hash_class) goto getidx0_fallback; + regs[a] = mrb_hash_get(mrb, recv, mrb_fixnum_value(0)); + return VM_NEXT; + } +getidx0_fallback: + regs[a] = recv; + SET_FIXNUM_VALUE(regs[a+1], 0); + *midp = MRB_OPSYM(aref); + return VM_SEND_SYM; +} + +static int +vm_op_setidx(mrb_state *mrb, uint32_t a, mrb_sym *midp) +{ + mrb_callinfo *ci = mrb->c->ci; + mrb_value va = regs[a], vb = regs[a+1], vc = regs[a+2]; + switch (mrb_type(va)) { + case MRB_TT_ARRAY: + /* optimize only for Array class; subclasses/singleton may override []= */ + if (mrb_obj_ptr(va)->c != mrb->array_class) goto setidx_fallback; + if (!mrb_integer_p(vb)) goto setidx_fallback; + mrb_ary_set(mrb, va, mrb_integer(vb), vc); + ci = mrb->c->ci; + regs[a] = vc; + return VM_NEXT; + case MRB_TT_HASH: + /* optimize only for Hash class; subclasses/singleton may override []= */ + if (mrb_obj_ptr(va)->c != mrb->hash_class) goto setidx_fallback; + mrb_hash_set(mrb, va, vb, vc); + ci = mrb->c->ci; + regs[a] = vc; + return VM_NEXT; + default: + setidx_fallback: + SET_NIL_VALUE(regs[a+3]); + *midp = MRB_OPSYM(aset); + return VM_SENDB_SYM; + } +} + +static int +vm_op_div(mrb_state *mrb, uint32_t a, mrb_sym *midp) +{ + mrb_callinfo *ci = mrb->c->ci; +#ifndef MRB_NO_FLOAT + mrb_float x, y, f; +#endif + + /* need to check if op is overridden */ + switch (TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1]))) { + case TYPES2(MRB_TT_INTEGER,MRB_TT_INTEGER): + { + mrb_int x = mrb_integer(regs[a]); + mrb_int y = mrb_integer(regs[a+1]); + regs[a] = mrb_div_int_value(mrb, x, y); + } + return VM_NEXT; +#ifndef MRB_NO_FLOAT + case TYPES2(MRB_TT_INTEGER,MRB_TT_FLOAT): + x = (mrb_float)mrb_integer(regs[a]); + y = mrb_float(regs[a+1]); + break; + case TYPES2(MRB_TT_FLOAT,MRB_TT_INTEGER): + x = mrb_float(regs[a]); + y = (mrb_float)mrb_integer(regs[a+1]); + break; + case TYPES2(MRB_TT_FLOAT,MRB_TT_FLOAT): + x = mrb_float(regs[a]); + y = mrb_float(regs[a+1]); + break; +#endif + default: + *midp = MRB_OPSYM(div); + return VM_SEND_SYM; + } + +#ifndef MRB_NO_FLOAT + f = mrb_div_float(x, y); + SET_FLOAT_VALUE(mrb, regs[a], f); +#endif + return VM_NEXT; +} + +static mrb_sym +vm_define_method(mrb_state *mrb, struct RClass *tc, const mrb_irep *irep, uint16_t b, uint16_t c) +{ + struct RProc *p = mrb_proc_new(mrb, irep->reps[c]); + mrb_sym mid = irep->syms[b]; + mrb_method_t m; + + p->flags |= MRB_PROC_SCOPE | MRB_PROC_STRICT; + MRB_METHOD_FROM_PROC(m, p); + MRB_METHOD_SET_VISIBILITY(m, MRB_METHOD_VDEFAULT_FL); + mrb_define_method_raw(mrb, tc, mid, m); + mrb_method_added(mrb, tc, mid); + return mid; +} + +/* Common proc dispatch for OP_CALL and OP_BLKCALL. + Returns VM_NEXT, VM_RAISE, or VM_RETURN_NIL. */ +static int +vm_call_proc(mrb_state *mrb, const struct RProc *p, mrb_int nargs, + const mrb_irep **irepp, int ai) +{ + mrb_callinfo *ci = mrb->c->ci; + mrb_value recv = ci->stack[0]; + + /* handle alias */ + MRB_PROC_RESOLVE_ALIAS(ci, p); + if (MRB_PROC_ENV_P(p)) { + ci->mid = MRB_PROC_ENV(p)->mid; + } + ci->u.target_class = MRB_PROC_TARGET_CLASS(p); + CI_PROC_SET(ci, p); + + if (MRB_PROC_CFUNC_P(p)) { + recv = MRB_PROC_CFUNC(p)(mrb, recv); + mrb_gc_arena_shrink(mrb, ai); + if (mrb_unlikely(mrb->exc)) return VM_RAISE; + ci = cipop(mrb); + ci[1].stack[0] = recv; + *irepp = ci->proc->body.irep; + } + else { + const mrb_irep *irep = p->body.irep; + if (!irep) { + ci->stack[0] = mrb_nil_value(); + return VM_RETURN_NIL; + } + if (nargs < irep->nregs) { + stack_extend(mrb, irep->nregs); + stack_clear(ci->stack+nargs, irep->nregs-nargs); + } + if (MRB_PROC_ENV_P(p)) { + ci->stack[0] = MRB_PROC_ENV(p)->stack[0]; + } + ci->pc = irep->iseq; + *irepp = irep; + } + return VM_NEXT; +} + +/** + * @brief Executes a sequence of mruby bytecode instructions. + * + * This is the main bytecode interpreter loop. It takes a starting proc + * (`begin_proc`) and a pointer to the initial instruction (`iseq`) within + * that proc's instruction sequence. It then enters a loop, fetching and + * dispatching bytecode operations until an OP_STOP instruction is encountered, + * an exception occurs, or a C function call returns. + * + * This function handles the low-level details of instruction decoding, + * stack manipulation, exception handling (try/catch blocks within mruby code), + * and calling C functions or other mruby methods. + * + * @param mrb The mruby state. + * @param begin_proc The initial RProc whose bytecode is to be executed. + * While the name suggests it's the "beginning" proc, + * execution might involve other procs called from this one. + * @param iseq A pointer to the first bytecode instruction to execute within + * `begin_proc`'s instruction sequence. + * @return The result of the execution. This could be the return value of + * the executed Ruby code, an exception object if an unhandled + * exception occurred, or the result of a fiber switch. + * @note This function is highly complex and central to mruby's operation. + * It uses a jump table (`optable`) for efficient instruction dispatch + * when not using switch-based dispatch. It also manages the callinfo + * stack (`ci`) for tracking method/block calls. + */ +MRB_FLATTEN MRB_API mrb_value +mrb_vm_exec(mrb_state *mrb, const struct RProc *begin_proc, const mrb_code *iseq) +{ + /* mrb_assert(MRB_PROC_CFUNC_P(begin_proc)) */ + const mrb_irep *irep = begin_proc->body.irep; mrb_code insn; int ai = mrb_gc_arena_save(mrb); struct mrb_jmpbuf *prev_jmp = mrb->jmp; @@ -1338,28 +2266,45 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) mrb_sym mid; const struct mrb_irep_catch_handler *ch; -#ifdef DIRECT_THREADED +#ifndef MRB_USE_VM_SWITCH_DISPATCH static const void * const optable[] = { #define OPCODE(x,_) &&L_OP_ ## x, -#include "mruby/ops.h" +#include #undef OPCODE }; #endif - mrb_bool exc_catched = FALSE; + mrb->exc = NULL; + + mrb_callinfo *ci = mrb->c->ci; + CI_PROC_SET(ci, begin_proc); + ci->pc = iseq; + RETRY_TRY_BLOCK: MRB_TRY(&c_jmp) { - if (exc_catched) { - exc_catched = FALSE; + if (mrb_unlikely(mrb->exc)) { mrb_gc_arena_restore(mrb, ai); - if (mrb->exc && mrb->exc->tt == MRB_TT_BREAK) + if (mrb->exc->tt == MRB_TT_BREAK) goto L_BREAK; goto L_RAISE; } + /* Intentionally store stack variable address for exception handling. + * This is safe because the pointer is cleared before function returns. + * Suppress GCC 12+ warning about dangling pointer. */ +#if defined(__GNUC__) && !defined(__clang__) + #if __GNUC__ >= 12 + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdangling-pointer" + #endif +#endif mrb->jmp = &c_jmp; - CI_PROC_SET(mrb->c->ci, proc); +#if defined(__GNUC__) && !defined(__clang__) + #if __GNUC__ >= 12 + #pragma GCC diagnostic pop + #endif +#endif INIT_DISPATCH { CASE(OP_NOP, Z) { @@ -1373,18 +2318,18 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) } CASE(OP_LOADL, BB) { - switch (pool[b].tt) { /* number */ + switch (irep->pool[b].tt) { /* number */ case IREP_TT_INT32: - regs[a] = mrb_int_value(mrb, (mrb_int)pool[b].u.i32); + regs[a] = mrb_int_value(mrb, (mrb_int)irep->pool[b].u.i32); break; case IREP_TT_INT64: #if defined(MRB_INT64) - regs[a] = mrb_int_value(mrb, (mrb_int)pool[b].u.i64); + regs[a] = mrb_int_value(mrb, (mrb_int)irep->pool[b].u.i64); break; #else #if defined(MRB_64BIT) - if (INT32_MIN <= pool[b].u.i64 && pool[b].u.i64 <= INT32_MAX) { - regs[a] = mrb_int_value(mrb, (mrb_int)pool[b].u.i64); + if (INT32_MIN <= irep->pool[b].u.i64 && irep->pool[b].u.i64 <= INT32_MAX) { + regs[a] = mrb_int_value(mrb, (mrb_int)irep->pool[b].u.i64); break; } #endif @@ -1393,8 +2338,8 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) case IREP_TT_BIGINT: #ifdef MRB_USE_BIGINT { - const char *s = pool[b].u.str; - regs[a] = mrb_bint_new_str(mrb, s+2, (uint8_t)s[0], s[1]); + const char *s = irep->pool[b].u.str; + regs[a] = mrb_bint_new_str(mrb, s+2, (uint8_t)s[0], (int8_t)s[1]); } break; #else @@ -1402,7 +2347,7 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) #endif #ifndef MRB_NO_FLOAT case IREP_TT_FLOAT: - regs[a] = mrb_float_value(mrb, pool[b].u.f); + regs[a] = mrb_float_value(mrb, irep->pool[b].u.f); break; #endif default: @@ -1413,7 +2358,7 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) NEXT; } - CASE(OP_LOADI, BB) { + CASE(OP_LOADI8, BB) { SET_FIXNUM_VALUE(regs[a], b); NEXT; } @@ -1448,7 +2393,7 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) } CASE(OP_LOADSYM, BB) { - SET_SYM_VALUE(regs[a], syms[b]); + SET_SYM_VALUE(regs[a], irep->syms[b]); NEXT; } @@ -1462,129 +2407,142 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) NEXT; } - CASE(OP_LOADT, B) { + CASE(OP_LOADTRUE, B) { SET_TRUE_VALUE(regs[a]); NEXT; } - CASE(OP_LOADF, B) { + CASE(OP_LOADFALSE, B) { SET_FALSE_VALUE(regs[a]); NEXT; } CASE(OP_GETGV, BB) { - mrb_value val = mrb_gv_get(mrb, syms[b]); + mrb_value val = mrb_gv_get(mrb, irep->syms[b]); + ci = mrb->c->ci; regs[a] = val; NEXT; } CASE(OP_SETGV, BB) { - mrb_gv_set(mrb, syms[b], regs[a]); + mrb_gv_set(mrb, irep->syms[b], regs[a]); + ci = mrb->c->ci; NEXT; } CASE(OP_GETSV, BB) { - mrb_value val = mrb_vm_special_get(mrb, syms[b]); + mrb_value val = mrb_vm_special_get(mrb, irep->syms[b]); + ci = mrb->c->ci; regs[a] = val; NEXT; } CASE(OP_SETSV, BB) { - mrb_vm_special_set(mrb, syms[b], regs[a]); + mrb_vm_special_set(mrb, irep->syms[b], regs[a]); + ci = mrb->c->ci; NEXT; } CASE(OP_GETIV, BB) { - regs[a] = mrb_iv_get(mrb, regs[0], syms[b]); + regs[a] = mrb_iv_get(mrb, regs[0], irep->syms[b]); + ci = mrb->c->ci; NEXT; } CASE(OP_SETIV, BB) { - mrb_iv_set(mrb, regs[0], syms[b], regs[a]); + mrb_iv_set(mrb, regs[0], irep->syms[b], regs[a]); + ci = mrb->c->ci; NEXT; } CASE(OP_GETCV, BB) { mrb_value val; - val = mrb_vm_cv_get(mrb, syms[b]); + val = mrb_vm_cv_get(mrb, irep->syms[b]); + ci = mrb->c->ci; regs[a] = val; NEXT; } CASE(OP_SETCV, BB) { - mrb_vm_cv_set(mrb, syms[b], regs[a]); + mrb_vm_cv_set(mrb, irep->syms[b], regs[a]); + ci = mrb->c->ci; NEXT; } CASE(OP_GETIDX, B) { - mrb_value va = regs[a], vb = regs[a+1]; - switch (mrb_type(va)) { - case MRB_TT_ARRAY: - if (!mrb_integer_p(vb)) goto getidx_fallback; - regs[a] = mrb_ary_entry(va, mrb_integer(vb)); - break; - case MRB_TT_HASH: - va = mrb_hash_get(mrb, va, vb); - regs[a] = va; - break; - case MRB_TT_STRING: - switch (mrb_type(vb)) { - case MRB_TT_INTEGER: - case MRB_TT_STRING: - case MRB_TT_RANGE: - va = mrb_str_aref(mrb, va, vb, mrb_undef_value()); - regs[a] = va; - break; - default: - goto getidx_fallback; - } - break; - default: - getidx_fallback: - mid = MRB_OPSYM(aref); - goto L_SEND_SYM; - } + int r = vm_op_getidx(mrb, a, &mid); + ci = mrb->c->ci; + if (r == VM_SEND_SYM) goto L_SEND_SYM; + NEXT; + } + + CASE(OP_GETIDX0, BB) { + int r = vm_op_getidx0(mrb, a, b, &mid); + ci = mrb->c->ci; + if (r == VM_SEND_SYM) goto L_SEND_SYM; NEXT; } CASE(OP_SETIDX, B) { - c = 2; - mid = MRB_OPSYM(aset); - SET_NIL_VALUE(regs[a+3]); - goto L_SENDB_SYM; + int r = vm_op_setidx(mrb, a, &mid); + ci = mrb->c->ci; + if (r == VM_SENDB_SYM) { c = 2; goto L_SENDB_SYM; } + NEXT; } CASE(OP_GETCONST, BB) { - mrb_value v = mrb_vm_const_get(mrb, syms[b]); - regs[a] = v; +#ifndef MRB_NO_CONST_CACHE + mrb_sym sym = irep->syms[b]; + uint32_t h = mrb_int_hash_func(mrb, ((intptr_t)irep) ^ sym) & (MRB_CONST_CACHE_SIZE-1); + struct mrb_const_cache_entry *cc = &mrb->const_cache[h]; + if (cc->irep == irep && cc->sym == sym) { + regs[a] = cc->value; + NEXT; + } +#endif + { + mrb_value v = mrb_vm_const_get(mrb, irep->syms[b]); + ci = mrb->c->ci; + regs[a] = v; +#ifndef MRB_NO_CONST_CACHE + cc->irep = irep; + cc->sym = sym; + cc->value = v; +#endif + } NEXT; } CASE(OP_SETCONST, BB) { - mrb_vm_const_set(mrb, syms[b], regs[a]); + ci = mrb->c->ci; + struct RClass *c = MRB_PROC_TARGET_CLASS(ci->proc); + if (!c) c = mrb->object_class; + mrb_const_set(mrb, mrb_obj_value(c), irep->syms[b], regs[a]); + ci = mrb->c->ci; NEXT; } CASE(OP_GETMCNST, BB) { - mrb_value v = mrb_const_get(mrb, regs[a], syms[b]); + mrb_value v = mrb_const_get(mrb, regs[a], irep->syms[b]); + ci = mrb->c->ci; regs[a] = v; NEXT; } CASE(OP_SETMCNST, BB) { - mrb_const_set(mrb, regs[a+1], syms[b], regs[a]); + mrb_const_set(mrb, regs[a+1], irep->syms[b], regs[a]); + ci = mrb->c->ci; NEXT; } CASE(OP_GETUPVAR, BBB) { - mrb_value *regs_a = regs + a; struct REnv *e = uvenv(mrb, c); if (e && b < MRB_ENV_LEN(e)) { - *regs_a = e->stack[b]; + regs[a] = e->stack[b]; } else { - *regs_a = mrb_nil_value(); + regs[a] = mrb_nil_value(); } NEXT; } @@ -1593,10 +2551,8 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) struct REnv *e = uvenv(mrb, c); if (e) { - mrb_value *regs_a = regs + a; - if (b < MRB_ENV_LEN(e)) { - e->stack[b] = *regs_a; + e->stack[b] = regs[a]; mrb_write_barrier(mrb, (struct RBasic*)e); } } @@ -1604,33 +2560,33 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) } CASE(OP_JMP, S) { - pc += (int16_t)a; + ci->pc += (int16_t)a; JUMP; } CASE(OP_JMPIF, BS) { if (mrb_test(regs[a])) { - pc += (int16_t)b; + ci->pc += (int16_t)b; JUMP; } NEXT; } CASE(OP_JMPNOT, BS) { if (!mrb_test(regs[a])) { - pc += (int16_t)b; + ci->pc += (int16_t)b; JUMP; } NEXT; } CASE(OP_JMPNIL, BS) { if (mrb_nil_p(regs[a])) { - pc += (int16_t)b; + ci->pc += (int16_t)b; JUMP; } NEXT; } CASE(OP_JMPUW, S) { - a = (uint32_t)((pc - irep->iseq) + (int16_t)a); + a = (uint32_t)((ci->pc - irep->iseq) + (int16_t)a); CHECKPOINT_RESTORE(RBREAK_TAG_JUMP) { struct RBreak *brk = (struct RBreak*)mrb->exc; mrb_value target = mrb_break_value_get(brk); @@ -1639,18 +2595,18 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) mrb_assert(a >= 0 && a < irep->ilen); } CHECKPOINT_MAIN(RBREAK_TAG_JUMP) { - ch = catch_handler_find(mrb, mrb->c->ci, pc, MRB_CATCH_FILTER_ENSURE); - if (ch) { + if (irep->clen > 0 && + (ch = catch_handler_find(irep, ci->pc, MRB_CATCH_FILTER_ENSURE))) { /* avoiding a jump from a catch handler into the same handler */ - if (a < mrb_irep_catch_handler_unpack(ch->begin) || a >= mrb_irep_catch_handler_unpack(ch->end)) { - THROW_TAGGED_BREAK(mrb, RBREAK_TAG_JUMP, proc, mrb_fixnum_value(a)); + if (a < mrb_irep_catch_handler_unpack(ch->begin) || a > mrb_irep_catch_handler_unpack(ch->end)) { + THROW_TAGGED_BREAK(mrb, RBREAK_TAG_JUMP, mrb->c->ci, mrb_fixnum_value(a)); } } } CHECKPOINT_END(RBREAK_TAG_JUMP); mrb->exc = NULL; /* clear break object */ - pc = irep->iseq + a; + ci->pc = irep->iseq + a; JUMP; } @@ -1686,14 +2642,7 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) case MRB_TT_MODULE: break; default: - { - mrb_value exc; - - exc = mrb_exc_new_lit(mrb, E_TYPE_ERROR, - "class or module required for rescue clause"); - mrb_exc_set(mrb, exc); - goto L_RAISE; - } + RAISE_LIT(mrb, E_TYPE_ERROR, "class or module required for rescue clause"); } ec = mrb_class_ptr(e); regs[b] = mrb_bool_value(mrb_obj_is_kind_of(mrb, exc, ec)); @@ -1701,21 +2650,81 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) } CASE(OP_RAISEIF, B) { - mrb_value exc = regs[a]; - if (mrb_break_p(exc)) { + mrb_value exc; + exc = regs[a]; + if (mrb_likely(mrb_nil_p(exc))) { + mrb->exc = NULL; + } + else if (mrb_break_p(exc)) { + struct RBreak *brk; mrb->exc = mrb_obj_ptr(exc); - goto L_BREAK; + L_BREAK: + brk = (struct RBreak*)mrb->exc; + switch (mrb_break_tag_get(brk)) { +#define DISPATCH_CHECKPOINTS(n, i) case n: goto CHECKPOINT_LABEL_MAKE(n); + RBREAK_TAG_FOREACH(DISPATCH_CHECKPOINTS) +#undef DISPATCH_CHECKPOINTS + default: + mrb_assert(!"wrong break tag"); + } } - mrb_exc_set(mrb, exc); - if (mrb->exc) { - goto L_RAISE; + else { + mrb_exc_set(mrb, exc); + L_RAISE: + ci = mrb->c->ci; + while (!ci->proc || MRB_PROC_CFUNC_P(ci->proc) || !(irep = ci->proc->body.irep) || irep->clen < 1 || + (ch = catch_handler_find(irep, ci->pc, MRB_CATCH_FILTER_ALL)) == NULL) { + if (ci != mrb->c->cibase) { + ci = cipop(mrb); + if (ci[1].cci == CINFO_SKIP) { + mrb_assert(prev_jmp != NULL); + mrb->jmp = prev_jmp; + MRB_THROW(prev_jmp); + } + } + else if (mrb->c == mrb->root_c) { + ci->stack = mrb->c->stbase; + mrb->jmp = prev_jmp; + return mrb_obj_value(mrb->exc); + } + else { + struct mrb_context *c = mrb->c; + + fiber_terminate(mrb, c, ci); + if (mrb_unlikely(!c->vmexec)) goto L_RAISE; + mrb->jmp = prev_jmp; + if (TASK_RETURN_EXCEPTION_AS_VALUE(mrb)) return mrb_obj_value(mrb->exc); + if (!prev_jmp) return mrb_obj_value(mrb->exc); + MRB_THROW(prev_jmp); + } + } + + if (FALSE) { + L_CATCH_TAGGED_BREAK: /* from THROW_TAGGED_BREAK() or UNWIND_ENSURE() */ + ci = mrb->c->ci; + } + irep = ci->proc->body.irep; + stack_extend(mrb, irep->nregs); + ci->pc = irep->iseq + mrb_irep_catch_handler_unpack(ch->target); + } + NEXT; + } + + CASE(OP_MATCHERR, B) { + if (mrb_unlikely(!mrb_test(regs[a]))) { + RAISE_LIT(mrb, mrb_exc_get_id(mrb, MRB_ERROR_SYM(NoMatchingPatternError)), "pattern not matched"); } NEXT; } CASE(OP_SSEND, BBB) { regs[a] = regs[0]; - insn = OP_SEND; + } + goto L_SENDB; + + CASE(OP_SSEND0, BB) { + regs[a] = regs[0]; + c = 0; } goto L_SENDB; @@ -1727,6 +2736,11 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) CASE(OP_SEND, BBB) goto L_SENDB; + CASE(OP_SEND0, BB) { + c = 0; + } + goto L_SENDB; + L_SEND_SYM: c = 1; /* push nil after arguments */ @@ -1735,176 +2749,157 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) CASE(OP_SENDB, BBB) L_SENDB: - mid = syms[b]; + mid = irep->syms[b]; L_SENDB_SYM: { - mrb_callinfo *ci; mrb_method_t m; - struct RClass *cls; mrb_value recv, blk; - int n = c&0xf; - int nk = (c>>4)&0xf; - mrb_int bidx = a + mrb_bidx(n,nk); + mrb_int bidx, new_bidx; - if (nk == CALL_MAXARGS) { - mrb_ensure_hash_type(mrb, regs[a+(n==CALL_MAXARGS?1:n)+1]); + if (mrb_likely(c < CALL_MAXARGS)) { + /* fast path limited to fixed length arguments of less than 15 */ + bidx = a + c + 1 /* self */; + new_bidx = bidx; } - else if (nk > 0) { /* pack keyword arguments */ - mrb_int kidx = a+(n==CALL_MAXARGS?1:n)+1; - mrb_value kdict = hash_new_from_regs(mrb, nk, kidx); - regs[kidx] = kdict; - nk = CALL_MAXARGS; - c = n | (nk<<4); + else { + int n = c&0xf; + int nk = (c>>4)&0xf; + bidx = a + mrb_bidx(n,nk); + new_bidx = bidx; + if (nk == CALL_MAXARGS) { + mrb_ensure_hash_type(mrb, regs[a+(n==CALL_MAXARGS?1:n)+1]); + } + else if (nk > 0) { /* pack keyword arguments */ + mrb_int kidx = a+(n==CALL_MAXARGS?1:n)+1; + mrb_value kdict = hash_new_from_regs(mrb, nk, kidx); + ci = mrb->c->ci; + regs[kidx] = kdict; + nk = CALL_MAXARGS; + c = n | (nk<<4); + new_bidx = a+mrb_bidx(n, nk); + } } mrb_assert(bidx < irep->nregs); - mrb_int new_bidx = a+mrb_bidx(n, nk); - if (insn == OP_SEND) { + if (insn == OP_SEND || insn == OP_SEND0 || insn == OP_SSEND || insn == OP_SSEND0) { /* clear block argument */ SET_NIL_VALUE(regs[new_bidx]); SET_NIL_VALUE(blk); } else { blk = ensure_block(mrb, regs[bidx]); + ci = mrb->c->ci; regs[new_bidx] = blk; } - recv = regs[a]; - ci = mrb->c->ci; - cls = (insn == OP_SUPER) ? CI_TARGET_CLASS(ci)->super : mrb_class(mrb, recv); - m = mrb_method_search_vm(mrb, &cls, mid); ci = cipush(mrb, a, CINFO_DIRECT, NULL, NULL, BLK_PTR(blk), 0, c); - if (MRB_METHOD_UNDEF_P(m)) { - m = prepare_missing(mrb, recv, mid, &cls, 0, &c, blk, (insn == OP_SUPER ? 1 : 0)); - mid = MRB_SYM(method_missing); - ci->n = (c >> 0) & 0x0f; - ci->nk = (c >> 4) & 0x0f; + recv = regs[0]; + ci->u.target_class = (insn == OP_SUPER) ? CI_TARGET_CLASS(ci - 1)->super : mrb_class(mrb, recv); + m = mrb_vm_find_method(mrb, ci->u.target_class, &ci->u.target_class, mid); + if (mrb_unlikely(MRB_METHOD_UNDEF_P(m))) { + m = prepare_missing(mrb, ci, recv, mid, blk, (insn == OP_SUPER)); + } + else { + ci->mid = mid; + } + if (insn == OP_SEND || insn == OP_SEND0 || insn == OP_SENDB) { + mrb_bool priv = TRUE; + if (m.flags & MRB_METHOD_PRIVATE_FL) { + vis_err:; + mrb_value args = (ci->n == 15) ? regs[1] : mrb_ary_new_from_values(mrb, ci->n, regs+1); + vis_error(mrb, mid, args, recv, priv); + } + else if ((m.flags & MRB_METHOD_PROTECTED_FL) && mrb_obj_is_kind_of(mrb, recv, ci->u.target_class)) { + priv = FALSE; + goto vis_err; + } } ci->cci = CINFO_NONE; - ci->mid = mid; - ci->u.target_class = cls; - if (!mrb_nil_p(blk)) ci->blk = mrb_proc_ptr(blk); - - if (MRB_METHOD_CFUNC_P(m)) { - if (MRB_METHOD_PROC_P(m)) { - struct RProc *p = MRB_METHOD_PROC(m); - CI_PROC_SET(ci, p); - recv = p->body.func(mrb, recv); + + if (MRB_METHOD_PROC_P(m)) { + const struct RProc *p = MRB_METHOD_PROC(m); + /* handle alias */ + MRB_PROC_RESOLVE_ALIAS(ci, p); + CI_PROC_SET(ci, p); + if (!MRB_PROC_CFUNC_P(p)) { + /* setup environment for calling method */ + irep = p->body.irep; + stack_extend(mrb, (irep->nregs < 4) ? 4 : irep->nregs); + ci->pc = irep->iseq; + JUMP; } else { - if (MRB_METHOD_NOARG_P(m)) { - check_method_noarg(mrb, ci); - } - recv = MRB_METHOD_FUNC(m)(mrb, recv); - } - mrb_gc_arena_shrink(mrb, ai); - if (mrb->exc) goto L_RAISE; - ci = mrb->c->ci; - if (!ci->u.target_class) { /* return from context modifying method (resume/yield) */ - if (ci->cci == CINFO_RESUMED) { - mrb->jmp = prev_jmp; - return recv; - } - else { - mrb_assert(!MRB_PROC_CFUNC_P(ci[-1].proc)); - proc = ci[-1].proc; - irep = proc->body.irep; - pool = irep->pool; - syms = irep->syms; + if (MRB_PROC_NOARG_P(p) && (ci->n > 0 || ci->nk > 0)) { + check_argument_count(mrb, ci, 0); } + recv = MRB_PROC_CFUNC(p)(mrb, recv); } - ci->stack[0] = recv; - /* pop stackpos */ - ci = cipop(mrb); - pc = ci->pc; } else { - /* setup environment for calling method */ - proc = MRB_METHOD_PROC(m); - CI_PROC_SET(ci, proc); - irep = proc->body.irep; - pool = irep->pool; - syms = irep->syms; - mrb_stack_extend(mrb, (irep->nregs < 4) ? 4 : irep->nregs); - pc = irep->iseq; + check_argument_count(mrb, ci, MRB_MT_ASPEC(m.flags)); + recv = MRB_METHOD_FUNC(m)(mrb, recv); } - } - JUMP; - CASE(OP_CALL, Z) { - mrb_callinfo *ci = mrb->c->ci; - mrb_value recv = ci->stack[0]; - struct RProc *m = mrb_proc_ptr(recv); - - /* replace callinfo */ - ci->u.target_class = MRB_PROC_TARGET_CLASS(m); - CI_PROC_SET(ci, m); - if (MRB_PROC_ENV_P(m)) { - ci->mid = MRB_PROC_ENV(m)->mid; - } - - /* prepare stack */ - if (MRB_PROC_CFUNC_P(m)) { - recv = MRB_PROC_CFUNC(m)(mrb, recv); - mrb_gc_arena_shrink(mrb, ai); - if (mrb->exc) goto L_RAISE; - /* pop stackpos */ - ci = cipop(mrb); - pc = ci->pc; - ci[1].stack[0] = recv; - irep = mrb->c->ci->proc->body.irep; - } - else { - /* setup environment for calling method */ - proc = m; - irep = m->body.irep; - if (!irep) { - mrb->c->ci->stack[0] = mrb_nil_value(); - a = 0; - c = OP_R_NORMAL; - goto L_OP_RETURN_BODY; - } - mrb_int nargs = mrb_ci_bidx(ci)+1; - if (nargs < irep->nregs) { - mrb_stack_extend(mrb, irep->nregs); - stack_clear(regs+nargs, irep->nregs-nargs); + /* cfunc epilogue */ + mrb_gc_arena_shrink(mrb, ai); + if (mrb_unlikely(mrb->exc)) goto L_RAISE; + ci = mrb->c->ci; + if (!ci->u.keep_context) { /* return from context modifying method (resume/yield) */ + if (ci->cci == CINFO_RESUMED) { + mrb->jmp = prev_jmp; + return recv; } - if (MRB_PROC_ENV_P(m)) { - regs[0] = MRB_PROC_ENV(m)->stack[0]; + else { + mrb_assert(!MRB_PROC_CFUNC_P(ci[-1].proc)); + irep = ci[-1].proc->body.irep; } - pc = irep->iseq; } - pool = irep->pool; - syms = irep->syms; + mrb_assert(ci > mrb->c->cibase); + ci->stack[0] = recv; + /* pop stackpos */ + ci = cipop(mrb); + JUMP; + } + + CASE(OP_CALL, Z) { + const struct RProc *p = mrb_proc_ptr(ci->stack[0]); + int r = vm_call_proc(mrb, p, ci_bidx(ci)+1, &irep, ai); + ci = mrb->c->ci; + if (r == VM_RAISE) goto L_RAISE; + if (r == VM_RETURN_NIL) { a = 0; goto L_OP_RETURN_BODY; } + JUMP; + } + + CASE(OP_BLKCALL, BB) { + /* Direct block call: R[a] = R[a].call(R[a+1],...,R[a+b]) */ + if (mrb_unlikely(!mrb_proc_p(regs[a]))) { + mrb_raisef(mrb, E_TYPE_ERROR, "wrong type %T (expected Proc)", regs[a]); + } + const struct RProc *p = mrb_proc_ptr(regs[a]); + ci = cipush(mrb, a, CINFO_DIRECT, NULL, NULL, NULL, 0, b); + ci->cci = CINFO_NONE; /* mark as VM-to-VM call for proper break handling */ + int r = vm_call_proc(mrb, p, b+1, &irep, ai); + ci = mrb->c->ci; + if (r == VM_RAISE) goto L_RAISE; + if (r == VM_RETURN_NIL) { a = 0; goto L_OP_RETURN_BODY; } JUMP; } CASE(OP_SUPER, BB) { - mrb_callinfo *ci = mrb->c->ci; mrb_value recv; - const struct RProc *p = ci->proc; struct RClass* target_class = CI_TARGET_CLASS(ci); mid = ci->mid; - if (MRB_PROC_ENV_P(p) && p->e.env->mid && p->e.env->mid != mid) { /* alias support */ - mid = p->e.env->mid; /* restore old mid */ - } - if (mid == 0 || !target_class) { - mrb_value exc = mrb_exc_new_lit(mrb, E_NOMETHOD_ERROR, "super called outside of method"); - mrb_exc_set(mrb, exc); - goto L_RAISE; + RAISE_LIT(mrb, E_NOMETHOD_ERROR, "super called outside of method"); } if ((target_class->flags & MRB_FL_CLASS_IS_PREPENDED) || target_class->tt == MRB_TT_MODULE) { goto super_typeerror; } recv = regs[0]; if (!mrb_obj_is_kind_of(mrb, recv, target_class)) { - super_typeerror: ; - mrb_value exc = mrb_exc_new_lit(mrb, E_TYPE_ERROR, - "self has wrong type to call super in this context"); - mrb_exc_set(mrb, exc); - goto L_RAISE; + super_typeerror: + RAISE_LIT(mrb, E_TYPE_ERROR, "self has wrong type to call super in this context"); } c = b; // arg info @@ -1913,567 +2908,232 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) } CASE(OP_ARGARY, BS) { - mrb_int m1 = (b>>11)&0x3f; - mrb_int r = (b>>10)&0x1; - mrb_int m2 = (b>>5)&0x1f; - mrb_int kd = (b>>4)&0x1; - mrb_int lv = (b>>0)&0xf; - mrb_value *stack; - - if (mrb->c->ci->mid == 0 || CI_TARGET_CLASS(mrb->c->ci) == NULL) { - mrb_value exc; - - L_NOSUPER: - exc = mrb_exc_new_lit(mrb, E_NOMETHOD_ERROR, "super called outside of method"); - mrb_exc_set(mrb, exc); - goto L_RAISE; - } - if (lv == 0) stack = regs + 1; - else { - struct REnv *e = uvenv(mrb, lv-1); - if (!e) goto L_NOSUPER; - if (MRB_ENV_LEN(e) <= m1+r+m2+1) - goto L_NOSUPER; - stack = e->stack + 1; - } - if (r == 0) { - regs[a] = mrb_ary_new_from_values(mrb, m1+m2, stack); - } - else { - mrb_value *pp = NULL; - struct RArray *rest; - mrb_int len = 0; - - if (mrb_array_p(stack[m1])) { - struct RArray *ary = mrb_ary_ptr(stack[m1]); - - pp = ARY_PTR(ary); - len = ARY_LEN(ary); - } - regs[a] = mrb_ary_new_capa(mrb, m1+len+m2); - rest = mrb_ary_ptr(regs[a]); - if (m1 > 0) { - stack_copy(ARY_PTR(rest), stack, m1); - } - if (len > 0) { - stack_copy(ARY_PTR(rest)+m1, pp, len); - } - if (m2 > 0) { - stack_copy(ARY_PTR(rest)+m1+len, stack+m1+1, m2); - } - ARY_SET_LEN(rest, m1+len+m2); - } - if (kd) { - regs[a+1] = stack[m1+r+m2]; - regs[a+2] = stack[m1+r+m2+1]; - } - else { - regs[a+1] = stack[m1+r+m2]; - } + if (vm_op_argary(mrb, a, b) == VM_RAISE) goto L_RAISE; mrb_gc_arena_restore(mrb, ai); NEXT; } CASE(OP_ENTER, W) { - mrb_int m1 = MRB_ASPEC_REQ(a); - mrb_int o = MRB_ASPEC_OPT(a); - mrb_int r = MRB_ASPEC_REST(a); - mrb_int m2 = MRB_ASPEC_POST(a); - mrb_int kd = (MRB_ASPEC_KEY(a) > 0 || MRB_ASPEC_KDICT(a))? 1 : 0; - /* unused - int b = MRB_ASPEC_BLOCK(a); - */ - mrb_int const len = m1 + o + r + m2; - - mrb_callinfo *ci = mrb->c->ci; - mrb_int argc = ci->n; - mrb_value *argv = regs+1; - mrb_value * const argv0 = argv; - mrb_value blk = regs[mrb_ci_bidx(ci)]; - mrb_value kdict = mrb_nil_value(); - - /* keyword arguments */ - if (ci->nk == 15) { - kdict = regs[mrb_ci_kidx(ci)]; - } - if (!kd) { - if (!mrb_nil_p(kdict) && mrb_hash_size(mrb, kdict) > 0) { - if (argc < 14) { - ci->n++; - argc++; /* include kdict in normal arguments */ - } - else if (argc == 14) { - /* pack arguments and kdict */ - regs[1] = ary_new_from_regs(mrb, argc+1, 1); - argc = ci->n = 15; - } - else {/* argc == 15 */ - /* push kdict to packed arguments */ - mrb_ary_push(mrb, regs[1], kdict); - } - } - kdict = mrb_nil_value(); - ci->nk = 0; - } - else if (MRB_ASPEC_KEY(a) > 0 && !mrb_nil_p(kdict)) { - kdict = mrb_hash_dup(mrb, kdict); - } - - /* arguments is passed with Array */ - if (argc == 15) { - struct RArray *ary = mrb_ary_ptr(regs[1]); - argv = ARY_PTR(ary); - argc = (int)ARY_LEN(ary); - mrb_gc_protect(mrb, regs[1]); - if (kd && !mrb_nil_p(kdict)) mrb_gc_protect(mrb, kdict); - } - - /* strict argument check */ - if (ci->proc && MRB_PROC_STRICT_P(ci->proc)) { - if (argc < m1 + m2 || (r == 0 && argc > len)) { - argnum_error(mrb, m1+m2); - goto L_RAISE; - } - } - /* extract first argument array to arguments */ - else if (len > 1 && argc == 1 && mrb_array_p(argv[0])) { - mrb_gc_protect(mrb, argv[0]); - argc = (int)RARRAY_LEN(argv[0]); - argv = RARRAY_PTR(argv[0]); - } - - /* rest arguments */ - mrb_value rest = mrb_nil_value(); - if (argc < len) { - mrb_int mlen = m2; - if (argc < m1+m2) { - mlen = m1 < argc ? argc - m1 : 0; - } - - /* copy mandatory and optional arguments */ - if (argv0 != argv && argv) { - value_move(®s[1], argv, argc-mlen); /* m1 + o */ - } - if (argc < m1) { - stack_clear(®s[argc+1], m1-argc); - } - /* copy post mandatory arguments */ - if (mlen) { - value_move(®s[len-m2+1], &argv[argc-mlen], mlen); - } - if (mlen < m2) { - stack_clear(®s[len-m2+mlen+1], m2-mlen); - } - /* initialize rest arguments with empty Array */ - if (r) { - rest = mrb_ary_new_capa(mrb, 0); - regs[m1+o+1] = rest; - } - /* skip initializer of passed arguments */ - if (o > 0 && argc > m1+m2) - pc += (argc - m1 - m2)*3; - } - else { - mrb_int rnum = 0; - if (argv0 != argv) { - value_move(®s[1], argv, m1+o); - } - if (r) { - rnum = argc-m1-o-m2; - rest = mrb_ary_new_from_values(mrb, rnum, argv+m1+o); - regs[m1+o+1] = rest; - } - if (m2 > 0 && argc-m2 > m1) { - value_move(®s[m1+o+r+1], &argv[m1+o+rnum], m2); - } - pc += o*3; - } - - /* need to be update blk first to protect blk from GC */ - mrb_int const kw_pos = len + kd; /* where kwhash should be */ - mrb_int const blk_pos = kw_pos + 1; /* where block should be */ - regs[blk_pos] = blk; /* move block */ - if (kd) { - if (mrb_nil_p(kdict)) - kdict = mrb_hash_new_capa(mrb, 0); - regs[kw_pos] = kdict; /* set kwhash */ - ci->nk = 15; - } - - /* format arguments for generated code */ - mrb->c->ci->n = (uint8_t)len; - - /* clear local (but non-argument) variables */ - if (irep->nlocals-blk_pos-1 > 0) { - stack_clear(®s[blk_pos+1], irep->nlocals-blk_pos-1); - } + if (vm_op_enter(mrb, a) == VM_RAISE) goto L_RAISE; + ci = mrb->c->ci; + irep = ci->proc->body.irep; JUMP; } CASE(OP_KARG, BB) { - mrb_value k = mrb_symbol_value(syms[b]); - mrb_int kidx = mrb_ci_kidx(mrb->c->ci); + mrb_value k = mrb_symbol_value(irep->syms[b]); + mrb_int kidx = mrb_ci_kidx(ci); mrb_value kdict, v; if (kidx < 0 || !mrb_hash_p(kdict=regs[kidx]) || !mrb_hash_key_p(mrb, kdict, k)) { - mrb_value str = mrb_format(mrb, "missing keyword: %v", k); - mrb_exc_set(mrb, mrb_exc_new_str(mrb, E_ARGUMENT_ERROR, str)); - goto L_RAISE; + RAISE_FORMAT(mrb, E_ARGUMENT_ERROR, "missing keyword: %v", k); } - v = mrb_hash_get(mrb, kdict, k); + + v = mrb_hash_delete_key(mrb, kdict, k); + ci = mrb->c->ci; regs[a] = v; - mrb_hash_delete_key(mrb, kdict, k); NEXT; } CASE(OP_KEY_P, BB) { - mrb_value k = mrb_symbol_value(syms[b]); - mrb_int kidx = mrb_ci_kidx(mrb->c->ci); + mrb_value k = mrb_symbol_value(irep->syms[b]); + mrb_int kidx = mrb_ci_kidx(ci); mrb_value kdict; mrb_bool key_p = FALSE; if (kidx >= 0 && mrb_hash_p(kdict=regs[kidx])) { key_p = mrb_hash_key_p(mrb, kdict, k); + ci = mrb->c->ci; } regs[a] = mrb_bool_value(key_p); NEXT; } CASE(OP_KEYEND, Z) { - mrb_int kidx = mrb_ci_kidx(mrb->c->ci); + mrb_int kidx = mrb_ci_kidx(ci); mrb_value kdict; if (kidx >= 0 && mrb_hash_p(kdict=regs[kidx]) && !mrb_hash_empty_p(mrb, kdict)) { - mrb_value keys = mrb_hash_keys(mrb, kdict); - mrb_value key1 = RARRAY_PTR(keys)[0]; - mrb_value str = mrb_format(mrb, "unknown keyword: %v", key1); - mrb_exc_set(mrb, mrb_exc_new_str(mrb, E_ARGUMENT_ERROR, str)); - goto L_RAISE; + mrb_value key1 = mrb_hash_first_key(mrb, kdict); + RAISE_FORMAT(mrb, E_ARGUMENT_ERROR, "unknown keyword: %v", key1); } NEXT; } CASE(OP_BREAK, B) { - c = OP_R_BREAK; - goto L_RETURN; + if (MRB_PROC_STRICT_P(ci->proc)) goto NORMAL_RETURN; + if (!MRB_PROC_ORPHAN_P(ci->proc) && MRB_PROC_ENV_P(ci->proc) && ci->proc->e.env->cxt == mrb->c) { + const struct RProc *dst = ci->proc->upper; + for (ptrdiff_t i = ci - mrb->c->cibase; i > 0; i--, ci--) { + if (ci[-1].proc == dst) { + goto L_UNWINDING; + } + } + } + RAISE_LIT(mrb, E_LOCALJUMP_ERROR, "break from proc-closure"); + /* not reached */ } CASE(OP_RETURN_BLK, B) { - c = OP_R_RETURN; - goto L_RETURN; - } - CASE(OP_RETURN, B) - c = OP_R_NORMAL; - L_RETURN: - { - mrb_callinfo *ci; + if (!MRB_PROC_ENV_P(ci->proc) || MRB_PROC_STRICT_P(ci->proc)) { + goto NORMAL_RETURN; + } - ci = mrb->c->ci; - if (mrb->exc) { - L_RAISE: - ci = mrb->c->ci; - if (ci == mrb->c->cibase) { - ch = catch_handler_find(mrb, ci, pc, MRB_CATCH_FILTER_ALL); - if (ch == NULL) goto L_FTOP; - goto L_CATCH; - } - while ((ch = catch_handler_find(mrb, ci, pc, MRB_CATCH_FILTER_ALL)) == NULL) { - ci = cipop(mrb); - if (ci[1].cci == CINFO_SKIP && prev_jmp) { - mrb->jmp = prev_jmp; - MRB_THROW(prev_jmp); - } - pc = ci[0].pc; - if (ci == mrb->c->cibase) { - ch = catch_handler_find(mrb, ci, pc, MRB_CATCH_FILTER_ALL); - if (ch == NULL) { - L_FTOP: /* fiber top */ - if (mrb->c == mrb->root_c) { - mrb->c->ci->stack = mrb->c->stbase; - goto L_STOP; - } - else { - struct mrb_context *c = mrb->c; - - c->status = MRB_FIBER_TERMINATED; - mrb->c = c->prev; - c->prev = NULL; - goto L_RAISE; - } - } - break; + const struct REnv *env = ci->u.env; + const struct RProc *dst = top_proc(mrb, ci->proc, &env); + if (!MRB_PROC_ENV_P(dst) || dst->e.env->cxt == mrb->c) { + /* check jump destination */ + for (ptrdiff_t i = ci - mrb->c->cibase; i >= 0; i--, ci--) { + if (ci->u.env == env) { + goto L_UNWINDING; } } - L_CATCH: - if (ch == NULL) goto L_STOP; - if (FALSE) { - L_CATCH_TAGGED_BREAK: /* from THROW_TAGGED_BREAK() or UNWIND_ENSURE() */ + } + /* no jump destination */ + RAISE_LIT(mrb, E_LOCALJUMP_ERROR, "unexpected return"); + /* not reached */ + } + CASE(OP_RETSELF, Z) { + a = 0; + goto NORMAL_RETURN; + } + CASE(OP_RETNIL, Z) { + a = 0; + goto L_RETURN_NIL; + } + CASE(OP_RETTRUE, Z) { + a = 0; + goto L_RETURN_TRUE; + } + CASE(OP_RETFALSE, Z) { + a = 0; + goto L_RETURN_FALSE; + } + CASE(OP_RETURN, B) { + mrb_int acc; + mrb_value v; + mrb_callinfo *return_ci; + + NORMAL_RETURN: + v = regs[a]; + goto L_RETURN; + L_RETURN_NIL: + v = mrb_nil_value(); + goto L_RETURN; + L_RETURN_TRUE: + v = mrb_true_value(); + goto L_RETURN; + L_RETURN_FALSE: + v = mrb_false_value(); + L_RETURN: + mrb_gc_protect(mrb, v); + return_ci = ci; + CHECKPOINT_RESTORE(RBREAK_TAG_BREAK) { + if (TRUE) { + struct RBreak *brk = (struct RBreak*)mrb->exc; + return_ci = &mrb->c->cibase[brk->ci_break_index]; + v = mrb_break_value_get(brk); + } + else { + L_UNWINDING: + return_ci = ci; ci = mrb->c->ci; + v = ci->stack[a]; } - proc = ci->proc; - irep = proc->body.irep; - pool = irep->pool; - syms = irep->syms; - mrb_stack_extend(mrb, irep->nregs); - pc = irep->iseq + mrb_irep_catch_handler_unpack(ch->target); + mrb_gc_protect(mrb, v); } - else { - mrb_int acc; - mrb_value v; + CHECKPOINT_MAIN(RBREAK_TAG_BREAK) { + for (;;) { + UNWIND_ENSURE(mrb, ci, ci->pc, RBREAK_TAG_BREAK, return_ci, v); - ci = mrb->c->ci; - v = regs[a]; - mrb_gc_protect(mrb, v); - switch (c) { - case OP_R_RETURN: - /* Fall through to OP_R_NORMAL otherwise */ - if (ci->cci == CINFO_NONE && MRB_PROC_ENV_P(proc) && !MRB_PROC_STRICT_P(proc)) { - const struct RProc *dst; - mrb_callinfo *cibase; - cibase = mrb->c->cibase; - dst = top_proc(mrb, proc); - - if (MRB_PROC_ENV_P(dst)) { - struct REnv *e = MRB_PROC_ENV(dst); - - if (!MRB_ENV_ONSTACK_P(e) || (e->cxt && e->cxt != mrb->c)) { - localjump_error(mrb, LOCALJUMP_ERROR_RETURN); - goto L_RAISE; - } - } - /* check jump destination */ - while (cibase <= ci && ci->proc != dst) { - if (ci->cci > CINFO_NONE) { /* jump cross C boundary */ - localjump_error(mrb, LOCALJUMP_ERROR_RETURN); - goto L_RAISE; - } - ci--; - } - if (ci <= cibase) { /* no jump destination */ - localjump_error(mrb, LOCALJUMP_ERROR_RETURN); - goto L_RAISE; - } - ci = mrb->c->ci; - while (cibase <= ci && ci->proc != dst) { - CHECKPOINT_RESTORE(RBREAK_TAG_RETURN_BLOCK) { - cibase = mrb->c->cibase; - dst = top_proc(mrb, proc); - } - CHECKPOINT_MAIN(RBREAK_TAG_RETURN_BLOCK) { - UNWIND_ENSURE(mrb, ci, pc, RBREAK_TAG_RETURN_BLOCK, proc, v); - } - CHECKPOINT_END(RBREAK_TAG_RETURN_BLOCK); - ci = cipop(mrb); - pc = ci->pc; - } - proc = ci->proc; - mrb->exc = NULL; /* clear break object */ + if (ci == return_ci) { break; } - /* fallthrough */ - case OP_R_NORMAL: - NORMAL_RETURN: - if (ci == mrb->c->cibase) { - struct mrb_context *c; - c = mrb->c; - - if (!c->prev) { /* toplevel return */ - regs[irep->nlocals] = v; - goto CHECKPOINT_LABEL_MAKE(RBREAK_TAG_STOP); - } - if (!c->vmexec && c->prev->ci == c->prev->cibase) { - mrb_value exc = mrb_exc_new_lit(mrb, E_FIBER_ERROR, "double resume"); - mrb_exc_set(mrb, exc); - goto L_RAISE; - } - CHECKPOINT_RESTORE(RBREAK_TAG_RETURN_TOPLEVEL) { - c = mrb->c; - } - CHECKPOINT_MAIN(RBREAK_TAG_RETURN_TOPLEVEL) { - UNWIND_ENSURE(mrb, ci, pc, RBREAK_TAG_RETURN_TOPLEVEL, proc, v); - } - CHECKPOINT_END(RBREAK_TAG_RETURN_TOPLEVEL); - /* automatic yield at the end */ - c->status = MRB_FIBER_TERMINATED; - mrb->c = c->prev; - mrb->c->status = MRB_FIBER_RUNNING; - c->prev = NULL; - if (c->vmexec) { - mrb_gc_arena_restore(mrb, ai); - c->vmexec = FALSE; - mrb->jmp = prev_jmp; - return v; - } - ci = mrb->c->ci; - } - CHECKPOINT_RESTORE(RBREAK_TAG_RETURN) { - /* do nothing */ - } - CHECKPOINT_MAIN(RBREAK_TAG_RETURN) { - UNWIND_ENSURE(mrb, ci, pc, RBREAK_TAG_RETURN, proc, v); - } - CHECKPOINT_END(RBREAK_TAG_RETURN); - mrb->exc = NULL; /* clear break object */ - break; - case OP_R_BREAK: - if (MRB_PROC_STRICT_P(proc)) goto NORMAL_RETURN; - if (MRB_PROC_ORPHAN_P(proc)) { - mrb_value exc; - - L_BREAK_ERROR: - exc = mrb_exc_new_lit(mrb, E_LOCALJUMP_ERROR, - "break from proc-closure"); - mrb_exc_set(mrb, exc); - goto L_RAISE; - } - if (!MRB_PROC_ENV_P(proc) || !MRB_ENV_ONSTACK_P(MRB_PROC_ENV(proc))) { - goto L_BREAK_ERROR; - } - else { - struct REnv *e = MRB_PROC_ENV(proc); - - if (e->cxt != mrb->c) { - goto L_BREAK_ERROR; - } - } - CHECKPOINT_RESTORE(RBREAK_TAG_BREAK) { - /* do nothing */ - } - CHECKPOINT_MAIN(RBREAK_TAG_BREAK) { - UNWIND_ENSURE(mrb, ci, pc, RBREAK_TAG_BREAK, proc, v); - } - CHECKPOINT_END(RBREAK_TAG_BREAK); - /* break from fiber block */ - if (ci == mrb->c->cibase && ci->pc) { - struct mrb_context *c = mrb->c; - - mrb->c = c->prev; - c->prev = NULL; - ci = mrb->c->ci; - } - if (ci->cci > CINFO_NONE) { - ci = cipop(mrb); - mrb->exc = (struct RObject*)break_new(mrb, RBREAK_TAG_BREAK, proc, v); + ci = cipop(mrb); + if (ci[1].cci != CINFO_NONE) { + mrb_assert(prev_jmp != NULL); + mrb->exc = (struct RObject*)break_new(mrb, RBREAK_TAG_BREAK, return_ci, v); mrb_gc_arena_restore(mrb, ai); mrb->c->vmexec = FALSE; mrb->jmp = prev_jmp; MRB_THROW(prev_jmp); } - if (FALSE) { - struct RBreak *brk; - - L_BREAK: - brk = (struct RBreak*)mrb->exc; - proc = mrb_break_proc_get(brk); - v = mrb_break_value_get(brk); - ci = mrb->c->ci; + } + } + CHECKPOINT_END(RBREAK_TAG_BREAK); + mrb->exc = NULL; /* clear break object */ - switch (mrb_break_tag_get(brk)) { -#define DISPATCH_CHECKPOINTS(n, i) case n: goto CHECKPOINT_LABEL_MAKE(n); - RBREAK_TAG_FOREACH(DISPATCH_CHECKPOINTS) -#undef DISPATCH_CHECKPOINTS - default: - mrb_assert(!"wrong break tag"); - } - } - while (mrb->c->cibase < ci && ci[-1].proc != proc->upper) { - if (ci[-1].cci == CINFO_SKIP) { - goto L_BREAK_ERROR; - } - CHECKPOINT_RESTORE(RBREAK_TAG_BREAK_UPPER) { - /* do nothing */ - } - CHECKPOINT_MAIN(RBREAK_TAG_BREAK_UPPER) { - UNWIND_ENSURE(mrb, ci, pc, RBREAK_TAG_BREAK_UPPER, proc, v); - } - CHECKPOINT_END(RBREAK_TAG_BREAK_UPPER); - ci = cipop(mrb); - pc = ci->pc; - } - CHECKPOINT_RESTORE(RBREAK_TAG_BREAK_INTARGET) { - /* do nothing */ - } - CHECKPOINT_MAIN(RBREAK_TAG_BREAK_INTARGET) { - UNWIND_ENSURE(mrb, ci, pc, RBREAK_TAG_BREAK_INTARGET, proc, v); - } - CHECKPOINT_END(RBREAK_TAG_BREAK_INTARGET); - if (ci == mrb->c->cibase) { - goto L_BREAK_ERROR; - } - mrb->exc = NULL; /* clear break object */ - break; - default: - /* cannot happen */ - break; + if (ci == mrb->c->cibase) { + struct mrb_context *c = mrb->c; + if (c == mrb->root_c) { + /* toplevel return */ + mrb_gc_arena_restore(mrb, ai); + mrb->jmp = prev_jmp; + return v; } - mrb_assert(ci == mrb->c->ci); - mrb_assert(mrb->exc == NULL); - if (mrb->c->vmexec && !CI_TARGET_CLASS(ci)) { +#ifdef MRB_USE_TASK_SCHEDULER + if (mrb->c->status == MRB_TASK_CREATED) { mrb_gc_arena_restore(mrb, ai); - mrb->c->vmexec = FALSE; mrb->jmp = prev_jmp; + TASK_STOP(mrb); return v; } - acc = ci->cci; - ci = cipop(mrb); - if (acc == CINFO_SKIP || acc == CINFO_DIRECT) { +#endif + + fiber_terminate(mrb, c, ci); + if (c->vmexec || + (mrb->c == mrb->root_c && mrb->c->ci == mrb->c->cibase) /* case using Fiber#transfer in mrb_fiber_resume() */) { mrb_gc_arena_restore(mrb, ai); + c->vmexec = FALSE; mrb->jmp = prev_jmp; return v; } - pc = ci->pc; - DEBUG(fprintf(stderr, "from :%s\n", mrb_sym_name(mrb, ci->mid))); - proc = ci->proc; - irep = proc->body.irep; - pool = irep->pool; - syms = irep->syms; - - ci[1].stack[0] = v; + ci = mrb->c->ci; + } + + if (mrb->c->vmexec && !ci->u.keep_context) { + mrb_gc_arena_restore(mrb, ai); + mrb->c->vmexec = FALSE; + mrb->jmp = prev_jmp; + return v; + } + acc = ci->cci; + ci = cipop(mrb); + if (acc == CINFO_SKIP || acc == CINFO_DIRECT) { mrb_gc_arena_restore(mrb, ai); + mrb->jmp = prev_jmp; + return v; } + DEBUG(fprintf(stderr, "from :%s\n", mrb_sym_name(mrb, ci->mid))); + irep = ci->proc->body.irep; + + ci[1].stack[0] = v; + mrb_gc_arena_restore(mrb, ai); JUMP; } CASE(OP_BLKPUSH, BS) { - int m1 = (b>>11)&0x3f; - int r = (b>>10)&0x1; - int m2 = (b>>5)&0x1f; - int kd = (b>>4)&0x1; - int lv = (b>>0)&0xf; - mrb_value *stack; - - if (lv == 0) stack = regs + 1; - else { - struct REnv *e = uvenv(mrb, lv-1); - if (!e || (!MRB_ENV_ONSTACK_P(e) && e->mid == 0) || - MRB_ENV_LEN(e) <= m1+r+m2+1) { - localjump_error(mrb, LOCALJUMP_ERROR_YIELD); - goto L_RAISE; - } - stack = e->stack + 1; - } - if (mrb_nil_p(stack[m1+r+m2+kd])) { - localjump_error(mrb, LOCALJUMP_ERROR_YIELD); - goto L_RAISE; - } - regs[a] = stack[m1+r+m2+kd]; + if (vm_op_blkpush(mrb, a, b) == VM_RAISE) goto L_RAISE; NEXT; } #if !defined(MRB_USE_BIGINT) || defined(MRB_INT32) L_INT_OVERFLOW: - { - mrb_value exc = mrb_exc_new_lit(mrb, E_RANGE_ERROR, "integer overflow"); - mrb_exc_set(mrb, exc); - } - goto L_RAISE; + RAISE_LIT(mrb, E_RANGE_ERROR, "integer overflow"); #endif -#define TYPES2(a,b) ((((uint16_t)(a))<<8)|(((uint16_t)(b))&0xff)) -#define OP_MATH(op_name) \ +#define OP_MATH(op_name) do { \ /* need to check if op is overridden */ \ - switch (TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1]))) { \ - OP_MATH_CASE_INTEGER(op_name); \ + uint16_t tt = TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1])); \ + if (mrb_likely(tt == TYPES2(MRB_TT_INTEGER, MRB_TT_INTEGER))) { \ + mrb_int x = mrb_integer(regs[a]), y = mrb_integer(regs[a+1]), z; \ + if (mrb_int_##op_name##_overflow(x, y, &z)) { \ + OP_MATH_OVERFLOW_INT(op_name,x,y); \ + } \ + else \ + SET_INT_VALUE(mrb,regs[a], z); \ + } \ + else switch (tt) { \ OP_MATH_CASE_FLOAT(op_name, integer, float); \ OP_MATH_CASE_FLOAT(op_name, float, integer); \ OP_MATH_CASE_FLOAT(op_name, float, float); \ @@ -2482,6 +3142,7 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) mid = MRB_OPSYM(op_name); \ goto L_SEND_SYM; \ } \ +} while(0); \ NEXT; #define OP_MATH_CASE_INTEGER(op_name) \ case TYPES2(MRB_TT_INTEGER, MRB_TT_INTEGER): \ @@ -2536,55 +3197,30 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) } CASE(OP_DIV, B) { -#ifndef MRB_NO_FLOAT - mrb_float x, y, f; -#endif - - /* need to check if op is overridden */ - switch (TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1]))) { - case TYPES2(MRB_TT_INTEGER,MRB_TT_INTEGER): - { - mrb_int x = mrb_integer(regs[a]); - mrb_int y = mrb_integer(regs[a+1]); - regs[a] = mrb_div_int_value(mrb, x, y); - } - NEXT; -#ifndef MRB_NO_FLOAT - case TYPES2(MRB_TT_INTEGER,MRB_TT_FLOAT): - x = (mrb_float)mrb_integer(regs[a]); - y = mrb_float(regs[a+1]); - break; - case TYPES2(MRB_TT_FLOAT,MRB_TT_INTEGER): - x = mrb_float(regs[a]); - y = (mrb_float)mrb_integer(regs[a+1]); - break; - case TYPES2(MRB_TT_FLOAT,MRB_TT_FLOAT): - x = mrb_float(regs[a]); - y = mrb_float(regs[a+1]); - break; -#endif - default: - mid = MRB_OPSYM(div); - goto L_SEND_SYM; - } - -#ifndef MRB_NO_FLOAT - f = mrb_div_float(x, y); - SET_FLOAT_VALUE(mrb, regs[a], f); -#endif + int r = vm_op_div(mrb, a, &mid); + ci = mrb->c->ci; + if (r == VM_SEND_SYM) goto L_SEND_SYM; NEXT; } -#define OP_MATHI(op_name) \ +#define OP_MATHI(op_name) do { \ /* need to check if op is overridden */ \ - switch (mrb_type(regs[a])) { \ - OP_MATHI_CASE_INTEGER(op_name); \ + if (mrb_likely(mrb_integer_p(regs[a]))) { \ + mrb_int x = mrb_integer(regs[a]), y = (mrb_int)b, z; \ + if (mrb_int_##op_name##_overflow(x, y, &z)) { \ + OP_MATH_OVERFLOW_INT(op_name,x,y); \ + } \ + else \ + SET_INT_VALUE(mrb,regs[a], z); \ + } \ + else switch (mrb_type(regs[a])) { \ OP_MATHI_CASE_FLOAT(op_name); \ default: \ SET_INT_VALUE(mrb,regs[a+1], b); \ mid = MRB_OPSYM(op_name); \ goto L_SEND_SYM; \ } \ +} while(0); \ NEXT; #define OP_MATHI_CASE_INTEGER(op_name) \ case MRB_TT_INTEGER: \ @@ -2617,17 +3253,58 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) OP_MATHI(sub); } +#ifdef MRB_NO_FLOAT +#define OP_MATHILV_CASE_FLOAT(op_name) (void)0 +#else +#define OP_MATHILV_CASE_FLOAT(op_name) \ + case MRB_TT_FLOAT: \ + { \ + mrb_float z = mrb_float(regs[a]) OP_MATH_OP_##op_name c; \ + SET_FLOAT_VALUE(mrb, regs[a], z); \ + } \ + break +#endif +#define OP_MATHILV(op_name) \ + /* a=local, b=working space, c=immediate */ \ + switch (mrb_type(regs[a])) { \ + case MRB_TT_INTEGER: \ + { \ + mrb_int x = mrb_integer(regs[a]), y = (mrb_int)c, z; \ + if (mrb_int_##op_name##_overflow(x, y, &z)) { \ + OP_MATH_OVERFLOW_INT(op_name,x,y); \ + } \ + else { \ + SET_INT_VALUE(mrb,regs[a], z); \ + } \ + } \ + break; \ + OP_MATHILV_CASE_FLOAT(op_name); \ + default: \ + SET_INT_VALUE(mrb,regs[a+1], c); \ + mid = MRB_OPSYM(op_name); \ + goto L_SEND_SYM; \ + } \ + NEXT + + CASE(OP_ADDILV, BBB) { + OP_MATHILV(add); + } + + CASE(OP_SUBILV, BBB) { + OP_MATHILV(sub); + } + #define OP_CMP_BODY(op,v1,v2) (v1(regs[a]) op v2(regs[a+1])) #ifdef MRB_NO_FLOAT #define OP_CMP(op,sym) do {\ int result;\ - /* need to check if - is overridden */\ - switch (TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1]))) {\ - case TYPES2(MRB_TT_INTEGER,MRB_TT_INTEGER):\ + /* need to check if op is overridden */\ + if (mrb_likely(TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1])) == \ + TYPES2(MRB_TT_INTEGER,MRB_TT_INTEGER))) {\ result = OP_CMP_BODY(op,mrb_fixnum,mrb_fixnum);\ - break;\ - default:\ + }\ + else {\ mid = MRB_OPSYM(sym);\ goto L_SEND_SYM;\ }\ @@ -2641,16 +3318,17 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) #else #define OP_CMP(op, sym) do {\ int result;\ - /* need to check if - is overridden */\ - switch (TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1]))) {\ - case TYPES2(MRB_TT_INTEGER,MRB_TT_INTEGER):\ - result = OP_CMP_BODY(op,mrb_fixnum,mrb_fixnum);\ - break;\ + /* need to check if op is overridden */\ + uint16_t tt = TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1]));\ + if (mrb_likely(tt == TYPES2(MRB_TT_INTEGER,MRB_TT_INTEGER))) {\ + result = OP_CMP_BODY(op,mrb_integer,mrb_integer);\ + }\ + else switch (tt) {\ case TYPES2(MRB_TT_INTEGER,MRB_TT_FLOAT):\ - result = OP_CMP_BODY(op,mrb_fixnum,mrb_float);\ + result = OP_CMP_BODY(op,mrb_integer,mrb_float);\ break;\ case TYPES2(MRB_TT_FLOAT,MRB_TT_INTEGER):\ - result = OP_CMP_BODY(op,mrb_float,mrb_fixnum);\ + result = OP_CMP_BODY(op,mrb_float,mrb_integer);\ break;\ case TYPES2(MRB_TT_FLOAT,MRB_TT_FLOAT):\ result = OP_CMP_BODY(op,mrb_float,mrb_float);\ @@ -2672,6 +3350,9 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) if (mrb_obj_eq(mrb, regs[a], regs[a+1])) { SET_TRUE_VALUE(regs[a]); } + else if (mrb_symbol_p(regs[a])) { + SET_FALSE_VALUE(regs[a]); + } else { OP_CMP(==,eq); } @@ -2711,11 +3392,12 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) CASE(OP_ARYCAT, B) { mrb_value splat = mrb_ary_splat(mrb, regs[a+1]); + ci = mrb->c->ci; if (mrb_nil_p(regs[a])) { regs[a] = splat; } else { - mrb_assert(mrb_array_p(regs[a])); + mrb_ensure_array_type(mrb, regs[a]); mrb_ary_concat(mrb, regs[a], splat); } mrb_gc_arena_restore(mrb, ai); @@ -2723,22 +3405,18 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) } CASE(OP_ARYPUSH, BB) { - mrb_assert(mrb_array_p(regs[a])); + mrb_ensure_array_type(mrb, regs[a]); for (mrb_int i=0; ic->ci; regs[a] = ary; + mrb_gc_arena_restore(mrb, ai); NEXT; } @@ -2761,7 +3439,7 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) } CASE(OP_ASET, BBB) { - mrb_assert(mrb_array_p(regs[a])); + mrb_ensure_array_type(mrb, regs[b]); mrb_ary_set(mrb, regs[b], c, regs[a]); NEXT; } @@ -2770,14 +3448,12 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) mrb_value v = regs[a]; int pre = b; int post = c; - struct RArray *ary; - int len, idx; if (!mrb_array_p(v)) { v = ary_new_from_regs(mrb, 1, a); } - ary = mrb_ary_ptr(v); - len = (int)ARY_LEN(ary); + struct RArray *ary = mrb_ary_ptr(v); + int len = (int)ARY_LEN(ary); if (len > pre + post) { v = mrb_ary_new_from_values(mrb, len - pre - post, ARY_PTR(ary)+pre); regs[a++] = v; @@ -2788,6 +3464,8 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) else { v = mrb_ary_new_capa(mrb, 0); regs[a++] = v; + + int idx; for (idx=0; idx+pre> 2; - if (pool[b].tt & IREP_TT_SFLAG) { - sym = mrb_intern_static(mrb, pool[b].u.str, len); + mrb_assert((irep->pool[b].tt&IREP_TT_NFLAG)==0); + len = irep->pool[b].tt >> 2; + if (irep->pool[b].tt & IREP_TT_SFLAG) { + sym = mrb_intern_static(mrb, irep->pool[b].u.str, len); } else { - sym = mrb_intern(mrb, pool[b].u.str, len); + sym = mrb_intern(mrb, irep->pool[b].u.str, len); } regs[a] = mrb_symbol_value(sym); NEXT; @@ -2826,31 +3504,32 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) CASE(OP_STRING, BB) { mrb_int len; - mrb_assert((pool[b].tt&IREP_TT_NFLAG)==0); - len = pool[b].tt >> 2; - if (pool[b].tt & IREP_TT_SFLAG) { - regs[a] = mrb_str_new_static(mrb, pool[b].u.str, len); + mrb_assert((irep->pool[b].tt&IREP_TT_NFLAG)==0); + len = irep->pool[b].tt >> 2; + if (irep->pool[b].tt & IREP_TT_SFLAG) { + regs[a] = mrb_str_new_static(mrb, irep->pool[b].u.str, len); } else { - regs[a] = mrb_str_new(mrb, pool[b].u.str, len); + regs[a] = mrb_str_new(mrb, irep->pool[b].u.str, len); } mrb_gc_arena_restore(mrb, ai); NEXT; } CASE(OP_STRCAT, B) { - mrb_assert(mrb_string_p(regs[a])); + mrb_ensure_string_type(mrb, regs[a]); mrb_str_concat(mrb, regs[a], regs[a+1]); + ci = mrb->c->ci; NEXT; } CASE(OP_HASH, BB) { mrb_value hash = mrb_hash_new_capa(mrb, b); - int i; int lim = a+b*2; - for (i=a; ic->ci; } regs[a] = hash; mrb_gc_arena_restore(mrb, ai); @@ -2859,13 +3538,13 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) CASE(OP_HASHADD, BB) { mrb_value hash; - int i; int lim = a+b*2+1; hash = regs[a]; mrb_ensure_hash_type(mrb, hash); - for (i=a+1; ic->ci; } mrb_gc_arena_restore(mrb, ai); NEXT; @@ -2873,8 +3552,9 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) CASE(OP_HASHCAT, B) { mrb_value hash = regs[a]; - mrb_assert(mrb_hash_p(hash)); + mrb_ensure_hash_type(mrb, hash); mrb_hash_merge(mrb, hash, regs[a+1]); + ci = mrb->c->ci; mrb_gc_arena_restore(mrb, ai); NEXT; } @@ -2909,6 +3589,7 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) CASE(OP_RANGE_INC, B) { mrb_value v = mrb_range_new(mrb, regs[a], regs[a+1], FALSE); + ci = mrb->c->ci; regs[a] = v; mrb_gc_arena_restore(mrb, ai); NEXT; @@ -2916,6 +3597,7 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) CASE(OP_RANGE_EXC, B) { mrb_value v = mrb_range_new(mrb, regs[a], regs[a+1], TRUE); + ci = mrb->c->ci; regs[a] = v; mrb_gc_arena_restore(mrb, ai); NEXT; @@ -2928,17 +3610,17 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) CASE(OP_CLASS, BB) { struct RClass *c = 0, *baseclass; - mrb_value base, super; - mrb_sym id = syms[b]; + mrb_sym id = irep->syms[b]; + mrb_value base = regs[a]; + mrb_value super = regs[a+1]; - base = regs[a]; - super = regs[a+1]; if (mrb_nil_p(base)) { - baseclass = MRB_PROC_TARGET_CLASS(mrb->c->ci->proc); + baseclass = MRB_PROC_TARGET_CLASS(ci->proc); if (!baseclass) baseclass = mrb->object_class; base = mrb_obj_value(baseclass); } c = mrb_vm_define_class(mrb, base, super, id); + ci = mrb->c->ci; regs[a] = mrb_obj_value(c); mrb_gc_arena_restore(mrb, ai); NEXT; @@ -2946,16 +3628,16 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) CASE(OP_MODULE, BB) { struct RClass *cls = 0, *baseclass; - mrb_value base; - mrb_sym id = syms[b]; + mrb_sym id = irep->syms[b]; + mrb_value base = regs[a]; - base = regs[a]; if (mrb_nil_p(base)) { - baseclass = MRB_PROC_TARGET_CLASS(mrb->c->ci->proc); + baseclass = MRB_PROC_TARGET_CLASS(ci->proc); if (!baseclass) baseclass = mrb->object_class; base = mrb_obj_value(baseclass); } cls = mrb_vm_define_module(mrb, base, id); + ci = mrb->c->ci; regs[a] = mrb_obj_value(cls); mrb_gc_arena_restore(mrb, ai); NEXT; @@ -2964,37 +3646,56 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) CASE(OP_EXEC, BB) { mrb_value recv = regs[a]; - struct RProc *p; + struct RClass *c = mrb_class_ptr(recv); const mrb_irep *nirep = irep->reps[b]; /* prepare closure */ - p = mrb_proc_new(mrb, nirep); + struct RProc *p = mrb_proc_new(mrb, nirep); p->c = NULL; - mrb_field_write_barrier(mrb, (struct RBasic*)p, (struct RBasic*)proc); - MRB_PROC_SET_TARGET_CLASS(p, mrb_class_ptr(recv)); + mrb_field_write_barrier(mrb, (struct RBasic*)p, (struct RBasic*)ci->proc); + MRB_PROC_SET_TARGET_CLASS(p, c); p->flags |= MRB_PROC_SCOPE; /* prepare call stack */ - cipush(mrb, a, 0, mrb_class_ptr(recv), p, NULL, 0, 0); + ci = cipush(mrb, a, 0, c, p, NULL, 0, 0); irep = p->body.irep; - pool = irep->pool; - syms = irep->syms; - mrb_stack_extend(mrb, irep->nregs); + stack_extend(mrb, irep->nregs); stack_clear(regs+1, irep->nregs-1); - pc = irep->iseq; + ci->pc = irep->iseq; JUMP; } CASE(OP_DEF, BB) { struct RClass *target = mrb_class_ptr(regs[a]); - struct RProc *p = mrb_proc_ptr(regs[a+1]); + const struct RProc *p = mrb_proc_ptr(regs[a+1]); mrb_method_t m; - mrb_sym mid = syms[b]; + mrb_sym mid = irep->syms[b]; MRB_METHOD_FROM_PROC(m, p); + MRB_METHOD_SET_VISIBILITY(m, MRB_METHOD_VDEFAULT_FL); mrb_define_method_raw(mrb, target, mid, m); mrb_method_added(mrb, target, mid); + ci = mrb->c->ci; + mrb_gc_arena_restore(mrb, ai); + regs[a] = mrb_symbol_value(mid); + NEXT; + } + + CASE(OP_TDEF, BBB) { + struct RClass *tc = check_target_class(mrb); + if (mrb_unlikely(!tc)) goto L_RAISE; + mid = vm_define_method(mrb, tc, irep, b, c); + ci = mrb->c->ci; + mrb_gc_arena_restore(mrb, ai); + regs[a] = mrb_symbol_value(mid); + NEXT; + } + + CASE(OP_SDEF, BBB) { + struct RClass *tc = mrb_class_ptr(mrb_singleton_class(mrb, regs[a])); + mid = vm_define_method(mrb, tc, irep, b, c); + ci = mrb->c->ci; mrb_gc_arena_restore(mrb, ai); regs[a] = mrb_symbol_value(mid); NEXT; @@ -3008,7 +3709,7 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) CASE(OP_TCLASS, B) { struct RClass *target = check_target_class(mrb); - if (!target) goto L_RAISE; + if (mrb_unlikely(!target)) goto L_RAISE; regs[a] = mrb_obj_value(target); NEXT; } @@ -3016,23 +3717,24 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) CASE(OP_ALIAS, BB) { struct RClass *target = check_target_class(mrb); - if (!target) goto L_RAISE; - mrb_alias_method(mrb, target, syms[a], syms[b]); - mrb_method_added(mrb, target, syms[a]); + if (mrb_unlikely(!target)) goto L_RAISE; + mrb_alias_method(mrb, target, irep->syms[a], irep->syms[b]); + mrb_method_added(mrb, target, irep->syms[a]); + ci = mrb->c->ci; NEXT; } CASE(OP_UNDEF, B) { struct RClass *target = check_target_class(mrb); - if (!target) goto L_RAISE; - mrb_undef_method_id(mrb, target, syms[a]); + if (mrb_unlikely(!target)) goto L_RAISE; + mrb_undef_method_id(mrb, target, irep->syms[a]); + ci = mrb->c->ci; NEXT; } - CASE(OP_DEBUG, Z) { - FETCH_BBB(); + CASE(OP_DEBUG, BBB) { #ifdef MRB_USE_DEBUG_HOOK - mrb->debug_op_hook(mrb, irep, pc, regs); + if (mrb->debug_op_hook) mrb->debug_op_hook(mrb, irep, ci->pc, regs); #else #ifndef MRB_NO_STDIO printf("OP_DEBUG %d %d %d\n", a, b, c); @@ -3044,61 +3746,65 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) } CASE(OP_ERR, B) { - size_t len = pool[a].tt >> 2; + size_t len = irep->pool[a].tt >> 2; mrb_value exc; - mrb_assert((pool[a].tt&IREP_TT_NFLAG)==0); - exc = mrb_exc_new(mrb, E_LOCALJUMP_ERROR, pool[a].u.str, len); - mrb_exc_set(mrb, exc); - goto L_RAISE; + mrb_assert((irep->pool[a].tt&IREP_TT_NFLAG)==0); + exc = mrb_exc_new(mrb, E_LOCALJUMP_ERROR, irep->pool[a].u.str, len); + RAISE_EXC(mrb, exc); } CASE(OP_EXT1, Z) { + const mrb_code *pc = ci->pc; insn = READ_B(); switch (insn) { -#define OPCODE(insn,ops) case OP_ ## insn: FETCH_ ## ops ## _1(); mrb->c->ci->pc = pc; goto L_OP_ ## insn ## _BODY; -#include "mruby/ops.h" +#define OPCODE(insn,ops) case OP_ ## insn: FETCH_ ## ops ## _1(); ci->pc = pc; goto L_OP_ ## insn ## _BODY; +#include #undef OPCODE } - pc--; NEXT; } CASE(OP_EXT2, Z) { + const mrb_code *pc = ci->pc; insn = READ_B(); switch (insn) { -#define OPCODE(insn,ops) case OP_ ## insn: FETCH_ ## ops ## _2(); mrb->c->ci->pc = pc; goto L_OP_ ## insn ## _BODY; -#include "mruby/ops.h" +#define OPCODE(insn,ops) case OP_ ## insn: FETCH_ ## ops ## _2(); ci->pc = pc; goto L_OP_ ## insn ## _BODY; +#include #undef OPCODE } - pc--; NEXT; } CASE(OP_EXT3, Z) { + const mrb_code *pc = ci->pc; insn = READ_B(); switch (insn) { -#define OPCODE(insn,ops) case OP_ ## insn: FETCH_ ## ops ## _3(); mrb->c->ci->pc = pc; goto L_OP_ ## insn ## _BODY; -#include "mruby/ops.h" +#define OPCODE(insn,ops) case OP_ ## insn: FETCH_ ## ops ## _3(); ci->pc = pc; goto L_OP_ ## insn ## _BODY; +#include #undef OPCODE } - pc--; NEXT; } CASE(OP_STOP, Z) { /* stop VM */ + mrb_value v; + v = mrb->exc ? mrb_obj_value(mrb->exc) : mrb_nil_value(); CHECKPOINT_RESTORE(RBREAK_TAG_STOP) { - /* do nothing */ + struct RBreak *brk = (struct RBreak*)mrb->exc; + v = mrb_break_value_get(brk); } CHECKPOINT_MAIN(RBREAK_TAG_STOP) { - UNWIND_ENSURE(mrb, mrb->c->ci, pc, RBREAK_TAG_STOP, proc, mrb_nil_value()); + UNWIND_ENSURE(mrb, ci, ci->pc, RBREAK_TAG_STOP, ci, v); } CHECKPOINT_END(RBREAK_TAG_STOP); - L_STOP: mrb->jmp = prev_jmp; - if (mrb->exc) { - mrb_assert(mrb->exc->tt == MRB_TT_EXCEPTION); - return mrb_obj_value(mrb->exc); + if (!mrb_nil_p(v)) { + mrb->exc = mrb_obj_ptr(v); + TASK_STOP(mrb); + return v; } + mrb->exc = NULL; + TASK_STOP(mrb); return regs[irep->nlocals]; } } @@ -3106,12 +3812,12 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) #undef regs } MRB_CATCH(&c_jmp) { - mrb_callinfo *ci = mrb->c->ci; + mrb_assert(mrb->exc != NULL); + + ci = mrb->c->ci; while (ci > mrb->c->cibase && ci->cci == CINFO_DIRECT) { ci = cipop(mrb); } - exc_catched = TRUE; - pc = ci->pc; goto RETRY_TRY_BLOCK; } MRB_END_EXC(&c_jmp); @@ -3120,30 +3826,36 @@ mrb_vm_exec(mrb_state *mrb, const struct RProc *proc, const mrb_code *pc) static mrb_value mrb_run(mrb_state *mrb, const struct RProc *proc, mrb_value self) { - return mrb_vm_run(mrb, proc, self, mrb_ci_bidx(mrb->c->ci) + 1); + return mrb_vm_run(mrb, proc, self, ci_bidx(mrb->c->ci) + 1); } +/** + * @brief Executes a mruby proc in the top-level environment. + * + * This function is used to execute a proc (like a script loaded from a file + * or a string) at the top level of the mruby environment. It's similar to + * `mrb_vm_run` but is specifically designed for top-level execution. + * + * It ensures that if there's an existing callinfo stack, the new execution + * is pushed on top with `CINFO_SKIP`, indicating it's a new, distinct + * execution context rather than a nested call from within the VM. + * + * @param mrb The mruby state. + * @param proc The RProc object (representing the script or code) to execute. + * @param self The `self` object for this top-level execution. Typically, + * this is the main `top_self` object in mruby. + * @param stack_keep The number of values to preserve on the stack. For + * top-level execution, this is often 0 or a small number + * to set up initial local variables if any. + * @return The result of the proc's execution. + * @see mrb_vm_run + */ MRB_API mrb_value mrb_top_run(mrb_state *mrb, const struct RProc *proc, mrb_value self, mrb_int stack_keep) { - mrb_value v; - - if (!mrb->c->cibase) { - return mrb_vm_run(mrb, proc, self, stack_keep); + if (mrb->c->cibase && mrb->c->ci > mrb->c->cibase) { + cipush(mrb, 0, CINFO_SKIP, mrb->object_class, NULL, NULL, 0, 0); } - if (mrb->c->ci == mrb->c->cibase) { - mrb_vm_ci_env_set(mrb->c->ci, NULL); - return mrb_vm_run(mrb, proc, self, stack_keep); - } - cipush(mrb, 0, CINFO_SKIP, mrb->object_class, NULL, NULL, 0, 0); - v = mrb_vm_run(mrb, proc, self, stack_keep); - - return v; + return mrb_vm_run(mrb, proc, self, stack_keep); } - -#if defined(MRB_USE_CXX_EXCEPTION) && defined(__cplusplus) -# if !defined(MRB_USE_CXX_ABI) -} /* end of extern "C" */ -# endif -mrb_int mrb_jmpbuf::jmpbuf_id = 0; -#endif +#undef CASE diff --git a/super-linter.report/.keep b/super-linter.report/.keep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tasks/amalgam.rake b/tasks/amalgam.rake new file mode 100644 index 0000000000..1b22e69618 --- /dev/null +++ b/tasks/amalgam.rake @@ -0,0 +1,34 @@ +require_relative '../lib/mruby/amalgam' + +MRuby.each_target do + next unless libmruby_enabled? + + amalgam_dir = "#{build_dir}/amalgam" + header_file = "#{amalgam_dir}/mruby.h" + source_file = "#{amalgam_dir}/mruby.c" + + # Header depends on presym generation (for presym headers) + file header_file => [:gensym] do |t| + amalgam = MRuby::Amalgam.new(self) + amalgam.generate_header(t.name) + end + + # Source depends on generated files (mrblib.c, gem_init.c, etc.) + mrblib_src = "#{build_dir}/mrblib/mrblib.c" + gem_init_src = "#{build_dir}/mrbgems/gem_init.c" + + source_deps = [:gensym] + source_deps << mrblib_src if File.exist?(mrblib_src) || libmruby_enabled? + + file source_file => source_deps do |t| + amalgam = MRuby::Amalgam.new(self) + amalgam.generate_source(t.name) + end + + desc "Generate amalgamated mruby.h and mruby.c in #{amalgam_dir}" + task :amalgam => [header_file, source_file] do + puts "Amalgamation complete:" + puts " Header: #{header_file}" + puts " Source: #{source_file}" + end +end diff --git a/tasks/benchmark.rake b/tasks/benchmark.rake index b32d165bf0..9b40f9b135 100644 --- a/tasks/benchmark.rake +++ b/tasks/benchmark.rake @@ -87,6 +87,7 @@ file plot_file => $dat_files do plot end +desc "run benchmark tests" task :benchmark => plot_file do plot end diff --git a/tasks/doc.rake b/tasks/doc.rake index 8a9108f1ae..d36df8edd0 100644 --- a/tasks/doc.rake +++ b/tasks/doc.rake @@ -1,3 +1,5 @@ +MRuby.autoload :Documentation, 'mruby/doc' + desc 'generate document' task :doc => %w[doc:api doc:capi] @@ -7,8 +9,11 @@ namespace :doc do begin sh "mrbdoc" rescue - puts "ERROR: To generate yard documentation, you should install yard-mruby gem." - puts " $ gem install yard-mruby yard-coderay" + puts "ERROR: To generate YARD documentation, you should install the yard-coderay and yard-mruby gems." + puts " $ gem install yard-coderay yard-mruby" + puts "https://yardoc.org/" + puts "https://rubygems.org/gems/yard-mruby" + puts "https://rubygems.org/gems/yard-coderay" end end @@ -17,11 +22,15 @@ namespace :doc do begin sh "doxygen Doxyfile" rescue - puts "ERROR: To generate C API documents, you need Doxygen." + puts "ERROR: To generate C API documentation, you should install Doxygen and Graphviz." puts "On Debian-based systems:" - puts " $ sudo apt-get install doxygen" + puts " $ sudo apt-get install doxygen graphviz" puts "On RHEL-based systems:" - puts " $ sudo dnf install doxygen" + puts " $ sudo dnf install doxygen graphviz" + puts "On macOS-based systems:" + puts " $ brew install doxygen graphviz" + puts "https://www.doxygen.nl/" + puts "https://graphviz.org/" end end @@ -31,7 +40,7 @@ namespace :doc do namespace :clean do desc 'clean yard docs' task :api do - rm_rf 'doc/api' + rm_rf %w(doc/api .yardoc) end desc 'clean doxygen docs' @@ -43,13 +52,59 @@ namespace :doc do namespace :view do desc 'open yard docs' task :api do - sh 'xdg-open doc/api/index.html' + if RUBY_PLATFORM.include?('darwin') + sh 'open doc/api/index.html' + else + sh 'xdg-open doc/api/index.html' + end end desc 'open doxygen docs' task :capi do - sh 'xdg-open doc/capi/html/index.html' + if RUBY_PLATFORM.include?('darwin') + sh 'open doc/capi/html/index.html' + else + sh 'xdg-open doc/capi/html/index.html' + end + end + end + + desc 'update doc/internal/opcode.md' + task 'update-opcode.md' do + unless system(*%W(git --git-dir #{MRUBY_ROOT}/.git --work-tree #{MRUBY_ROOT} diff --quiet @ -- doc/internal/opcode.md)) + abort <<~'ERRMESG' + The file "doc/internal/opcode.md" has been modified but not committed. + To avoid loss of your edits, the automatic update process has been aborted. + ERRMESG end + + MRuby::Documentation.update_opcode_md + end + + task 'update-index' do + rev_order = %w(doc/internal/ doc/guides/ doc/) + cmd = %W(git --git-dir #{MRUBY_ROOT}/.git --work-tree #{MRUBY_ROOT} ls-files -- doc/*.md) + doc = IO.popen(cmd, "r") { |io| io.read.split("\n") } + doc.sort_by! { |e| [-rev_order.index { |o| e.start_with?(o) }, e] } + readme_path = File.join(MRUBY_ROOT, "README.md") + readme = File.read(readme_path) + matched = false + mark_begin = "\n" + mark_end = "\n" + readme1 = readme.sub(/^#{mark_begin}\n\K.*(?=^\n#{mark_end})/m) { + matched = true + doc.each_with_object("") { |d, a| + summary = File.open(File.join(MRUBY_ROOT, d)) { |f| + f.each_line.first.slice(/^/, 1) + } + if summary + summary = "Internal Implementation / #{summary}" if d.start_with?("doc/internal/") + a << "- [#{summary}](#{d})\n" + end + } + } + raise "missing marker for document index in README.md" unless matched + File.write(readme_path, readme1, mode: "wb") unless readme == readme1 end end diff --git a/tasks/install.rake b/tasks/install.rake new file mode 100644 index 0000000000..3980d59403 --- /dev/null +++ b/tasks/install.rake @@ -0,0 +1,40 @@ +desc "install compiled products (on host)" +task :install => "install:full:host" + +desc "install compiled executable (on host)" +task :install_bin => "install:bin:host" + +desc "install compiled products (all build targets)" +task "install:full" + +desc "install compiled executable (all build targets)" +task "install:bin" + +MRuby.each_target do |build| + next if build.internal? + + prefix = File.join(MRuby::INSTALL_DESTDIR, build.install_prefix) + exclude_filter = build.install_excludes.flatten + + task "install:full" => "install:full:#{build.name}" + + task "install:full:#{build.name}" => "install:bin:#{build.name}" do + Dir.glob(File.join(build.build_dir.gsub(/[\[\{\*\?]/, "\\\0"), "{include,#{libdir_name}}/**/*")) do |path| + next unless File.file? path + file = path.relative_path_from(build.build_dir) + next if exclude_filter.any? { |filter| filter.respond_to?(:call) ? filter.call(file) : filter.match?(file) } + install_D path, File.join(prefix, file) + end + end + + task "install:bin" => "install:bin:#{build.name}" + + task "install:bin:#{build.name}" => "all" do + Dir.glob(File.join(build.build_dir.gsub(/[\[\{\*\?]/, "\\\0"), "{bin,host-bin}/**/*")) do |path| + next unless File.file? path + file = path.relative_path_from(build.build_dir) + next if exclude_filter.any? { |filter| filter.respond_to?(:call) ? filter.call(file) : filter.match?(file) } + install_D path, File.join(prefix, file) + end + end +end diff --git a/tasks/libmruby.rake b/tasks/libmruby.rake index 1fb3cbc313..423b4fa296 100644 --- a/tasks/libmruby.rake +++ b/tasks/libmruby.rake @@ -7,31 +7,82 @@ MRuby.each_target do next unless libmruby_enabled? + copy_headers_task = "expose_header_files:#{self.name}" file libmruby_static => libmruby_objs.flatten do |t| + Rake::Task[copy_headers_task].invoke archiver.run t.name, t.prerequisites end - file "#{build_dir}/lib/libmruby.flags.mak" => [__FILE__, libmruby_static] do |t| + task copy_headers_task do |t| + # Since header files may be generated dynamically and it is hard to know all of them, + # the task is executed depending on when libmruby.a is generated. + + gemsbasedir = File.join(build_dir, "include/mruby/gems") + dirmap = { + MRUBY_ROOT => build_dir + } + gems.each { |g| + dirmap[g.dir] = File.join(gemsbasedir, g.name) + dirmap[g.build_dir] = File.join(gemsbasedir, g.name) + } + + dirs = each_header_files.to_a + dirs.uniq! + dirs.replace_prefix_by(dirmap).zip(dirs).each do |dest, src| + if File.mtime(src).to_i > (File.mtime(dest).to_i rescue 0) + mkpath File.dirname(dest) + cp src, dest + end + end + end + + file "#{build_dir}/#{libdir_name}/libmruby.flags.mak" => [__FILE__, libmruby_static] do |t| mkdir_p File.dirname t.name open(t.name, 'w') do |f| - gemincs = gems.map { |g| g.export_include_paths.map { |n| g.filename(n) } }.flatten.uniq - f.puts "MRUBY_CFLAGS = #{cc.all_flags([], gemincs)}" + f.puts <<~FLAGS_MAKE + # GNU make is required to use this file. + MRUBY_PACKAGE_DIR_GNU := $(shell dirname "$(lastword $(MAKEFILE_LIST))") + MRUBY_PACKAGE_DIR != dirname "$(MRUBY_PACKAGE_DIR_GNU)" + FLAGS_MAKE + + [ + [cc, "MRUBY_CC", "MRUBY_CFLAGS"], + [cxx, "MRUBY_CXX", "MRUBY_CXXFLAGS"], + [asm, "MRUBY_AS", "MRUBY_ASFLAGS"], + [objc, "MRUBY_OBJC", "MRUBY_OBJCFLAGS"] + ].each do |cc, cmd, flags| + incpaths = cc.include_paths.dup + dirmaps = { + MRUBY_ROOT => "$(MRUBY_PACKAGE_DIR)", + build_dir => "$(MRUBY_PACKAGE_DIR)" + } + gems.each do |g| + incpaths.concat g.export_include_paths + dirmaps[g.dir] = "$(MRUBY_PACKAGE_DIR)/include/mruby/gems/#{g.name}" + dirmaps[g.build_dir] = "$(MRUBY_PACKAGE_DIR)/include/mruby/gems/#{g.name}" + end + modcc = cc.clone + modcc.include_paths = incpaths.replace_prefix_by(dirmaps).uniq + + f.puts "#{cmd} = #{cc.command}" + f.puts "#{flags} = #{modcc.all_flags}" + end - f.puts "MRUBY_CC = #{cc.command}" f.puts "MRUBY_LD = #{linker.command}" libgems = gems.reject{|g| g.bin?} gem_flags = libgems.map {|g| g.linker.flags } gem_library_paths = libgems.map {|g| g.linker.library_paths } - f.puts "MRUBY_LDFLAGS = #{linker.all_flags(gem_library_paths, gem_flags)} #{linker.option_library_path % "#{build_dir}/lib"}" + f.puts "MRUBY_LDFLAGS = #{linker.all_flags(gem_library_paths, gem_flags)} #{linker.option_library_path % "$(MRUBY_PACKAGE_DIR)/#{libdir_name}"}" gem_flags_before_libraries = libgems.map {|g| g.linker.flags_before_libraries } f.puts "MRUBY_LDFLAGS_BEFORE_LIBS = #{[linker.flags_before_libraries, gem_flags_before_libraries].flatten.join(' ')}" gem_libraries = libgems.map {|g| g.linker.libraries } - f.puts "MRUBY_LIBS = #{linker.option_library % 'mruby'} #{linker.library_flags(gem_libraries)}" + libmruby = (toolchains.find { |e| e == "visualcpp" }) ? "libmruby" : "mruby" + f.puts "MRUBY_LIBS = #{linker.option_library % libmruby} #{linker.library_flags(gem_libraries)}" - f.puts "MRUBY_LIBMRUBY_PATH = #{libmruby_static}" + f.puts "MRUBY_LIBMRUBY_PATH = #{libmruby_static.replace_prefix_by(build_dir => "$(MRUBY_PACKAGE_DIR)")}" end end diff --git a/tasks/mrbgems.rake b/tasks/mrbgems.rake index 4f24cfd93f..55a49c61d0 100644 --- a/tasks/mrbgems.rake +++ b/tasks/mrbgems.rake @@ -2,8 +2,7 @@ MRuby.each_target do active_gems_txt = "#{build_dir}/mrbgems/active_gems.txt" if enable_gems? - # set up all gems - gems.each(&:setup) + gems.setup_build gems.check self # loader all gems @@ -13,50 +12,92 @@ MRuby.each_target do mkdir_p "#{build_dir}/mrbgems" open(t.name, 'w') do |f| gem_func_gems = gems.select { |g| g.generate_functions } - gem_func_decls = gem_func_gems.each_with_object('') do |g, s| - s << "void GENERATED_TMP_mrb_#{g.funcname}_gem_init(mrb_state*);\n" \ - "void GENERATED_TMP_mrb_#{g.funcname}_gem_final(mrb_state*);\n" - end - gem_init_calls = gem_func_gems.each_with_object('') do |g, s| - s << " GENERATED_TMP_mrb_#{g.funcname}_gem_init(mrb);\n" - end - gem_final_calls = gem_func_gems.reverse_each.with_object('') do |g, s| - s << " GENERATED_TMP_mrb_#{g.funcname}_gem_final(mrb);\n" + gem_func_decls = +'' + gem_funcs = +'' + gem_func_gems.each do |g| + init = "GENERATED_TMP_mrb_#{g.funcname}_gem_init" + final = "GENERATED_TMP_mrb_#{g.funcname}_gem_final" + gem_func_decls << "void #{init}(mrb_state*);\n" \ + "void #{final}(mrb_state*);\n" + gem_funcs << " { #{init}, #{final} },\n" end f.puts %Q[/*] f.puts %Q[ * This file contains a list of all] f.puts %Q[ * initializing methods which are] f.puts %Q[ * necessary to bootstrap all gems.] f.puts %Q[ *] + f.puts %Q[ * This file was generated by mruby/#{__FILE__.relative_path_from(MRUBY_ROOT)}.] + f.puts %Q[ *] f.puts %Q[ * IMPORTANT:] f.puts %Q[ * This file was generated!] f.puts %Q[ * All manual changes will get lost.] f.puts %Q[ */] f.puts %Q[] f.puts %Q[#include ] + f.puts %Q[#include ] + f.puts %Q[#include ] f.puts %Q[] - f.write gem_func_decls - unless gem_final_calls.empty? + unless gem_funcs.empty? + f.write gem_func_decls + f.puts %Q[] + f.puts %Q[static const struct {] + f.puts %Q[ void (*init)(mrb_state*);] + f.puts %Q[ void (*final)(mrb_state*);] + f.puts %Q[} gem_funcs[] = {] + f.write gem_funcs + f.puts %Q[};] + f.puts %Q[] + f.puts %Q[#define NUM_GEMS ((int)(sizeof(gem_funcs) / sizeof(gem_funcs[0])))] + f.puts %Q[] + f.puts %Q[struct final_mrbgems {] + f.puts %Q[ int i;] + f.puts %Q[ int ai;] + f.puts %Q[};] + f.puts %Q[] + f.puts %Q[static mrb_value] + f.puts %Q[final_mrbgems_body(mrb_state *mrb, void *ud) {] + f.puts %Q[ struct final_mrbgems *p = (struct final_mrbgems*)ud;] + f.puts %Q[ for (; p->i >= 0; p->i--) {] + f.puts %Q[ gem_funcs[p->i].final(mrb);] + f.puts %Q[ mrb_gc_arena_restore(mrb, p->ai);] + f.puts %Q[ }] + f.puts %Q[ return mrb_nil_value();] + f.puts %Q[}] f.puts %Q[] f.puts %Q[static void] f.puts %Q[mrb_final_mrbgems(mrb_state *mrb) {] - f.write gem_final_calls + f.puts %Q[ struct final_mrbgems a = { NUM_GEMS - 1, mrb_gc_arena_save(mrb) };] + f.puts %Q[ for (; a.i >= 0; a.i--) {] + f.puts %Q[ mrb_protect_error(mrb, final_mrbgems_body, &a, NULL);] + f.puts %Q[ mrb_gc_arena_restore(mrb, a.ai);] + f.puts %Q[ }] f.puts %Q[}] + f.puts %Q[] end - f.puts %Q[] f.puts %Q[void] f.puts %Q[mrb_init_mrbgems(mrb_state *mrb) {] - f.write gem_init_calls - f.puts %Q[ mrb_state_atexit(mrb, mrb_final_mrbgems);] unless gem_final_calls.empty? + unless gem_funcs.empty? + f.puts %Q[ int ai = mrb_gc_arena_save(mrb);] + f.puts %Q[ for (int i = 0; i < NUM_GEMS; i++) {] + f.puts %Q[ gem_funcs[i].init(mrb);] + f.puts %Q[ mrb_gc_arena_restore(mrb, ai);] + f.puts %Q[ mrb_vm_ci_env_clear(mrb, mrb->c->cibase);] + f.puts %Q[ if (mrb->exc) {] + f.puts %Q[ mrb_exc_raise(mrb, mrb_obj_value(mrb->exc));] + f.puts %Q[ }] + f.puts %Q[ }] + f.puts %Q[ mrb_state_atexit(mrb, mrb_final_mrbgems);] + end f.puts %Q[}] end end end file active_gems_txt => :generate_active_gems_txt + desc "generate the active gems text files" task :generate_active_gems_txt do |t| def t.timestamp; Time.at(0) end - active_gems = gems.sort_by(&:name).inject(""){|s, g| s << "#{g.name}\n"} + active_gems = gems.sort_by(&:name).inject(+""){|s, g| s << "#{g.name}\n"} if !File.exist?(active_gems_txt) || File.read(active_gems_txt) != active_gems mkdir_p File.dirname(active_gems_txt) File.write(active_gems_txt, active_gems) diff --git a/tasks/mrblib.rake b/tasks/mrblib.rake index 485375e55c..cf863f9d51 100644 --- a/tasks/mrblib.rake +++ b/tasks/mrblib.rake @@ -7,13 +7,6 @@ MRuby.each_target do self.libmruby_objs << objfile(src.ext) file src => [mrbcfile, __FILE__, *rbfiles] do |t| - if presym_enabled? - cdump = true - suffix = "proc" - else - cdump = false - suffix = "irep" - end mkdir_p File.dirname(t.name) File.open(t.name, 'w') do |f| _pp "GEN", "mrblib/*.rb", "#{t.name.relative_path}" @@ -24,16 +17,12 @@ MRuby.each_target do f.puts %Q[ * This file was generated!] f.puts %Q[ * All manual changes will get lost.] f.puts %Q[ */] - unless presym_enabled? - f.puts %Q[#include ] - f.puts %Q[#include ] - end - mrbc.run f, rbfiles, "mrblib_#{suffix}", cdump: cdump, static: true + mrbc.run f, rbfiles, "mrblib_proc", cdump: true, static: true f.puts %Q[void] f.puts %Q[mrb_init_mrblib(mrb_state *mrb)] f.puts %Q[{] - f.puts %Q[ mrblib_#{suffix}_init_syms(mrb);] if cdump - f.puts %Q[ mrb_load_#{suffix}(mrb, mrblib_#{suffix});] + f.puts %Q[ mrblib_proc_init_syms(mrb);] + f.puts %Q[ mrb_load_proc(mrb, mrblib_proc);] f.puts %Q[}] end end diff --git a/tasks/presym.rake b/tasks/presym.rake index 1ebc1a7371..7a9262fad2 100644 --- a/tasks/presym.rake +++ b/tasks/presym.rake @@ -7,9 +7,6 @@ all_prerequisites = ->(task_name, prereqs) do end MRuby.each_target do |build| - gensym_task = task(:gensym) - next unless build.presym_enabled? - presym = build.presym include_dir = "#{build.build_dir}/include" @@ -32,17 +29,44 @@ MRuby.each_target do |build| end end - file presym.list_path => ppps do + presym_task = file presym.list_path => ppps do presyms = presym.scan(ppps) current_presyms = presym.read_list if File.exist?(presym.list_path) - update = presyms != current_presyms - presym.write_list(presyms) if update - mkdir_p presym.header_dir - %w[id table].each do |type| - next if !update && File.exist?(presym.send("#{type}_header_path")) - presym.send("write_#{type}_header", presyms) + if presyms != current_presyms + mkdir_p presym.header_dir + %w[id table].each do |type| + presym.send("write_#{type}_header", presyms) + end + presym.write_list(presyms) end end - gensym_task.enhance([presym.list_path]) + # Don't directly write dependency tasks in the "task" arguments. + # The rake system tracks dependencies recursively + # (see Rake::Task#all_prerequisite_tasks and #collect_prerequisites). + # Therefore, indirect dependencies from ".o" to ".pi" must be eliminated. + # ref. https://github.com/mruby/mruby/issues/6721 + # This task acts a proxy-like for the "presym.list_path" task. + presym_proxy = task "gensym:update:#{build.name}" do + presym_task.invoke + end + + # Override the "timestamp" method to reflect the presym file. + presym_proxy.define_singleton_method :timestamp do + presym_task.timestamp + end + + # Ensure .o files depend on presym headers being generated. + # This is critical when a build's .o files are compiled during another + # build's presym scanning chain (before :gensym completes), e.g.: + # - internal sub-builds (mrbc) triggered by their parent build + # - the implicit host build triggered by a cross build needing mrbc + prereqs.each_key do |prereq| + next unless File.extname(prereq) == build.exts.object + next unless prereq.start_with?(build_dir) + next if mrbc_build_dir && prereq.start_with?(mrbc_build_dir) + file prereq => presym_proxy + end + + task gensym: presym.list_path end diff --git a/tasks/test.rake b/tasks/test.rake index 32a03fce60..68d4ef6056 100644 --- a/tasks/test.rake +++ b/tasks/test.rake @@ -36,6 +36,21 @@ namespace :test do |test_ns| desc "run command binaries tests" task :bin end + + desc "run all mruby tests serially" + task "run:serial" => "build" do + Rake::Task["test:run"].prerequisite_tasks.each(&:invoke) + end + + desc "run library tests serially" + task "run:serial:bin" => "build:bin" do + Rake::Task["test:run:lib"].prerequisite_tasks.each(&:invoke) + end + + desc "run command binaries tests serially" + task "run:serial:lib" => "build:lib" do + Rake::Task["test:run:bin"].prerequisite_tasks.each(&:invoke) + end end MRuby.each_target do |build| diff --git a/tasks/toolchains/android.rake b/tasks/toolchains/android.rake index da53cfdde2..98e413042e 100644 --- a/tasks/toolchains/android.rake +++ b/tasks/toolchains/android.rake @@ -2,7 +2,7 @@ require "json" class MRuby::Toolchain::Android - DEFAULT_ARCH = 'armeabi' # TODO : Revise if arch should have a default + DEFAULT_ARCH = 'armeabi-v7a' # TODO : Revise if arch should have a default DEFAULT_TOOLCHAIN = :clang @@ -15,14 +15,14 @@ class MRuby::Toolchain::Android %LOCALAPPDATA%/Android/Sdk/ndk/* ~/Library/Android/sdk/ndk-bundle ~/Library/Android/ndk + /opt/android-ndk } - TOOLCHAINS = [:clang, :gcc] + TOOLCHAINS = [:clang] ARCHITECTURES = %w{ - armeabi armeabi-v7a arm64-v8a + armeabi-v7a arm64-v8a x86 x86_64 - mips mips64 } class AndroidNDKHomeNotFound < StandardError @@ -40,21 +40,6 @@ Set ANDROID_NDK_HOME environment variable or set :ndk_home parameter @params = params end - def bin_gcc(command) - command = command.to_s - - command = case arch - when /armeabi/ then 'arm-linux-androideabi-' - when /arm64-v8a/ then 'aarch64-linux-android-' - when /x86_64/ then 'x86_64-linux-android-' - when /x86/ then 'i686-linux-android-' - when /mips64/ then 'mips64el-linux-android-' - when /mips/ then 'mipsel-linux-android-' - end + command - - gcc_toolchain_path.join('bin', command).to_s - end - def bin(command) command = command.to_s toolchain_path.join('bin', command).to_s @@ -72,7 +57,7 @@ Set ANDROID_NDK_HOME environment variable or set :ndk_home parameter next nil unless path[-1] == "*" dirs = Dir.glob(path).collect do |d| m = d.match(/(\d+)\.(\d+)\.(\d+)$/) - m ? [m[1], m[2], m[3]].collect { |v| v.to_i } : nil + m ? [m[1], m[2], m[3]].collect(&:to_i) : nil end dirs.compact! dirs.sort! do |before, after| @@ -97,38 +82,7 @@ Set ANDROID_NDK_HOME environment variable or set :ndk_home parameter end def toolchain_path - @toolchain_path ||= case toolchain - when :gcc - gcc_toolchain_path - when :clang - home_path.join('toolchains', 'llvm' , 'prebuilt', host_platform) - end - end - - def gcc_toolchain_path - if @gcc_toolchain_path === nil then - prefix = case arch - when /armeabi/ then 'arm-linux-androideabi-' - when /arm64-v8a/ then 'aarch64-linux-android-' - when /x86_64/ then 'x86_64-' - when /x86/ then 'x86-' - when /mips64/ then 'mips64el-linux-android-' - when /mips/ then 'mipsel-linux-android-' - end - - test = case arch - when /armeabi/ then 'arm-linux-androideabi-*' - when /arm64-v8a/ then 'aarch64-linux-android-*' - when /x86_64/ then 'x86_64-*' - when /x86/ then 'x86-*' - when /mips64/ then 'mips64el-linux-android-*' - when /mips/ then 'mipsel-linux-android-*' - end - - gcc_toolchain_version = Dir[home_path.join('toolchains', test)].map{|t| t.match(/-(\d+\.\d+)$/); $1.to_f }.max - @gcc_toolchain_path = home_path.join('toolchains', prefix + gcc_toolchain_version.to_s, 'prebuilt', host_platform) - end - @gcc_toolchain_path + @toolchain_path ||= home_path.join('toolchains', 'llvm' , 'prebuilt', host_platform) end def host_platform @@ -189,15 +143,13 @@ Set ANDROID_NDK_HOME environment variable or set :ndk_home parameter def cc case toolchain - when :gcc then bin_gcc('gcc') when :clang then bin('clang') end end def ar case toolchain - when :gcc then bin_gcc('ar') - when :clang then bin_gcc('ar') + when :clang then bin('llvm-ar') end end @@ -206,38 +158,15 @@ Set ANDROID_NDK_HOME environment variable or set :ndk_home parameter v = sdk_version case toolchain - when :gcc - case arch - when /armeabi-v7a/ then flags += %W(-march=armv7-a) - when /armeabi/ then flags += %W(-march=armv5te) - when /arm64-v8a/ then flags += %W(-march=armv8-a) - when /x86_64/ then flags += %W(-march=x86-64) - when /x86/ then flags += %W(-march=i686) - when /mips64/ then flags += %W(-march=mips64r6) - when /mips/ then flags += %W(-march=mips32) - end when :clang case arch - when /armeabi-v7a/ then flags += %W(-target armv7-none-linux-androideabi#{v}) - when /armeabi/ then flags += %W(-target armv5te-none-linux-androideabi#{v}) - when /arm64-v8a/ then flags += %W(-target aarch64-none-linux-android#{v}) - when /x86_64/ then flags += %W(-target x86_64-none-linux-android#{v}) - when /x86/ then flags += %W(-target i686-none-linux-android#{v}) - when /mips64/ then flags += %W(-target mips64el-none-linux-android#{v}) - when /mips/ then flags += %W(-target mipsel-none-linux-android#{v}) + when /armeabi-v7a/ then flags += %W(-target armv7a-linux-androideabi#{v} -mfpu=#{armeabi_v7a_mfpu} -mfloat-abi=#{armeabi_v7a_mfloat_abi}) + when /arm64-v8a/ then flags += %W(-target aarch64-linux-android#{v}) + when /x86_64/ then flags += %W(-target x86_64-linux-android#{v}) + when /x86/ then flags += %W(-target i686-linux-android#{v}) end end - case arch - when /armeabi-v7a/ then flags += %W(-mfpu=#{armeabi_v7a_mfpu} -mfloat-abi=#{armeabi_v7a_mfloat_abi}) - when /armeabi/ then flags += %W(-mtune=xscale -msoft-float) - when /arm64-v8a/ then flags += %W() - when /x86_64/ then flags += %W() - when /x86/ then flags += %W() - when /mips64/ then flags += %W(-fmessage-length=0) - when /mips/ then flags += %W(-fmessage-length=0) - end - flags end @@ -252,11 +181,6 @@ Set ANDROID_NDK_HOME environment variable or set :ndk_home parameter flags += %W(-MMD -MP -D__android__ -DANDROID) flags += ctarget - case toolchain - when :gcc - when :clang - flags += %W(-gcc-toolchain "#{gcc_toolchain_path}" -Wno-invalid-command-line-argument -Wno-unused-command-line-argument) - end flags += %W(-fpic -ffunction-sections -funwind-tables -fstack-protector-strong -no-canonical-prefixes) flags @@ -273,20 +197,12 @@ Set ANDROID_NDK_HOME environment variable or set :ndk_home parameter v = sdk_version case toolchain - when :gcc - case arch - when /armeabi-v7a/ then flags += %W(-Wl#{no_warn_mismatch}) - end when :clang - flags += %W(-gcc-toolchain "#{gcc_toolchain_path.to_s}") case arch when /armeabi-v7a/ then flags += %W(-target armv7-none-linux-androideabi#{v} -Wl,--fix-cortex-a8#{no_warn_mismatch}) - when /armeabi/ then flags += %W(-target armv5te-none-linux-androideabi#{v}) when /arm64-v8a/ then flags += %W(-target aarch64-none-linux-android#{v}) when /x86_64/ then flags += %W(-target x86_64-none-linux-android#{v}) when /x86/ then flags += %W(-target i686-none-linux-android#{v}) - when /mips64/ then flags += %W(-target mips64el-none-linux-android#{v}) - when /mips/ then flags += %W(-target mipsel-none-linux-android#{v}) end end flags += %W(-no-canonical-prefixes) diff --git a/tasks/toolchains/emscripten.rake b/tasks/toolchains/emscripten.rake new file mode 100644 index 0000000000..ce4be4a3f1 --- /dev/null +++ b/tasks/toolchains/emscripten.rake @@ -0,0 +1,57 @@ +MRuby::Toolchain.new(:emscripten) do |conf| + toolchain :clang + + # See: + # - https://emscripten.org/docs/tools_reference/emcc.html + # - https://emscripten.org/docs/tools_reference/settings_reference.html + # - https://github.com/emscripten-core/emscripten/blob/main/src/settings.js + # + # == WASM Exception Handling == + # + # mruby uses setjmp/longjmp for non-local exits (exceptions, break/return + # from blocks). This toolchain uses native WASM exception handling which + # is more efficient than Asyncify-based emulation: + # + # - Minimal memory overhead (no shadow stack) + # - No code size penalty + # - Works with both C and C++ code + # + # Supported runtimes (as of 2024): + # - Chrome 95+, Firefox 100+, Safari 15.2+ + # - Node.js 17+, Wasmtime, Wasmer + # + # For older runtimes, override with CFLAGS/LDFLAGS environment variables. + # + # See: https://emscripten.org/docs/porting/exceptions.html + # https://emscripten.org/docs/porting/setjmp-longjmp.html + compile_and_link_flags = [ + '-fwasm-exceptions', + ] + compile_flags = [ + *compile_and_link_flags, + '-Wno-unused-but-set-variable', + ] + link_flags = [ + *compile_and_link_flags, + '-sSUPPORT_LONGJMP=wasm', + ] + + conf.cc do |cc| + cc.command = 'emcc' + cc.flags.concat(compile_flags) unless ENV['CFLAGS'] + end + + conf.cxx do |cxx| + cxx.command = 'em++' + cxx.flags.concat(compile_flags) unless ENV['CXXFLAGS'] || ENV['CFLAGS'] + end + + conf.linker do |linker| + linker.command = 'emcc' + linker.flags.concat(link_flags) unless ENV['LDFLAGS'] + end + + conf.archiver do |archiver| + archiver.command = 'emar' + end +end diff --git a/tasks/toolchains/gcc.rake b/tasks/toolchains/gcc.rake index aa1cf777dc..675c26880e 100644 --- a/tasks/toolchains/gcc.rake +++ b/tasks/toolchains/gcc.rake @@ -20,6 +20,10 @@ MRuby::Toolchain.new(:gcc) do |conf, params| compiler.cxx_compile_flag = '-x c++ -std=gnu++03' compiler.cxx_exception_flag = '-fexceptions' compiler.cxx_invalid_flags = c_mandatory_flags + cxx_invalid_flags + + def compiler.setup_debug(conf) + self.flags << %w(-g3 -O0) + end end conf.linker do |linker| diff --git a/tasks/toolchains/visualcpp.rake b/tasks/toolchains/visualcpp.rake index 00e3640827..b23793f5a2 100644 --- a/tasks/toolchains/visualcpp.rake +++ b/tasks/toolchains/visualcpp.rake @@ -33,14 +33,9 @@ MRuby::Toolchain.new(:visualcpp) do |conf, _params| archiver.archive_options = '/nologo /OUT:"%{outfile}" %{objs}' end - conf.yacc do |yacc| - yacc.command = ENV['YACC'] || 'bison.exe' - yacc.compile_options = %q[-o "%{outfile}" "%{infile}"] - end - conf.gperf do |gperf| gperf.command = 'gperf.exe' - gperf.compile_options = %q[-L ANSI-C -C -p -j1 -i 1 -g -o -t -N mrb_reserved_word -k"1,3,$" "%{infile}" > "%{outfile}"] + gperf.compile_options = %q[-L ANSI-C -C -j1 -i 1 -o -t -N mrb_reserved_word -k"1,3,$" "%{infile}" > "%{outfile}"] end conf.exts do |exts| diff --git a/test/assert.rb b/test/assert.rb index 8cd2b375b8..b5e61e740a 100644 --- a/test/assert.rb +++ b/test/assert.rb @@ -4,7 +4,7 @@ $kill_test = 0 $warning_test = 0 $skip_test = 0 -$asserts = [] +$asserts = [] $test_start = Time.now if Object.const_defined?(:Time) # For bintest on Ruby @@ -245,25 +245,25 @@ def _assert_operator(affirmed, obj1, op, obj2 = $undefined, msg = nil) end ## -# Fail unless +str+ matches against +pattern+. +# Fail unless `str` matches against `pattern`. # -# +pattern+ is interpreted as pattern for File.fnmatch?. It may contain the +# `pattern` is interpreted as pattern for File.fnmatch?. It may contain the # following metacharacters: # -# * :: +# `*` :: # Matches any string. # -# ? :: +# `?` :: # Matches any one character. # -# [_SET_], [^_SET_] ([!_SET_]) :: -# Matches any one character in _SET_. Behaves like character sets in -# Regexp, including set negation ([^a-z]). +# `[_SET_]`, `[^_SET_]` (`[!_SET_]`) :: +# Matches any one character in _SET_. Behaves like character sets in +# Regexp, including set negation (`[^a-z]`). # -# {_A_,_B_} :: +# `{_A_,_B_}` :: # Matches pattern _A_ or pattern _B_. # -# \ :: +# ` \ ` :: # Escapes the next character. def assert_match(*args); _assert_match(true, *args) end def assert_not_match(*args); _assert_match(false, *args) end @@ -275,7 +275,7 @@ def _assert_match(affirmed, pattern, str, msg = nil) end ## -# Fails unless +obj+ is a kind of +cls+. +# Fails unless `obj` is a kind of `cls`. def assert_kind_of(cls, obj, msg = nil) unless ret = obj.kind_of?(cls) diff = " Expected #{obj.inspect} to be a kind of #{cls}, not #{obj.class}." @@ -284,7 +284,7 @@ def assert_kind_of(cls, obj, msg = nil) end ## -# Fails unless +exp+ is equal to +act+ in terms of a Float +# Fails unless `exp` is equal to `act` in terms of a Float def assert_float(exp, act, msg = nil) e, a = exp.to_f, act.to_f if e.finite? && a.finite? && (n = (e - a).abs) > Mrbtest::FLOAT_TOLERANCE diff --git a/test/bintest.rb b/test/bintest.rb index 773d61edc8..e5ec309ff0 100644 --- a/test/bintest.rb +++ b/test/bintest.rb @@ -1,29 +1,29 @@ $:.unshift File.dirname(File.dirname(File.expand_path(__FILE__))) +require 'shellwords' require 'test/assert.rb' GEMNAME = "" -def cmd_list(s) +def cmd_bin(s) path = s == "mrbc" ? ENV['MRBCFILE'] : "#{ENV['BUILD_DIR']}/bin/#{s}" path = path.sub(/\.exe\z/, "") if /mswin(?!ce)|mingw|bccwin/ =~ RbConfig::CONFIG['host_os'] path = "#{path}.exe".tr("/", "\\") end + path +end - path_list = [path] +def cmd_list(s) + path_list = [cmd_bin(s)] emu = ENV['EMULATOR'] - path_list.unshift emu if emu && !emu.empty? + path_list.unshift(*Shellwords.split(emu)) if emu && !emu.empty? path_list end def cmd(s) - return cmd_list(s).join(' ') -end - -def cmd_bin(s) - return cmd_list(s).pop + cmd_list(s).join(' ') end def shellquote(s) @@ -44,7 +44,7 @@ def shellquote(s) case RbConfig::CONFIG['host_os'] when /mswin(?!ce)|mingw|bccwin/ - gem = gem.gsub('\\', '/') + gem = gem.tr('\\', '/') end Dir["#{gem}/bintest/**/*.rb"].each do |file| diff --git a/test/t/array.rb b/test/t/array.rb index eccb435ef9..bccdbb8159 100644 --- a/test/t/array.rb +++ b/test/t/array.rb @@ -33,7 +33,7 @@ class SubArray < Array assert_equal([1, 1, 1], [1].*(3)) assert_equal([], [1].*(0)) assert_equal('abc', ['a', 'b', 'c'].*('')) - assert_equal('0, 0, 1, {:foo=>0}', [0, [0, 1], {foo: 0}].*(', ')) + assert_equal('0, 0, 1, {foo: 0}', [0, [0, 1], {foo: 0}].*(', ')) end assert('Array#<<', '15.2.12.5.3') do @@ -107,6 +107,17 @@ class SubArray < Array a = [1,2,3] a[-1,0] = a assert_equal([1,2,1,2,3,3], a) + + # passing self with length above ARY_REPLACE_SHARED_MIN (=20). + # ary_dup -> ary_replace converts the source to shared as a + # copy-on-write optimization; without re-modifying `a` afterwards, + # ARY_CAPA(a) reads from aux.shared's pointer bits and the + # expand-capa check silently mis-sizes -> heap-buffer-overflow in + # value_move. Reported via clusterfuzz mruby_fuzzer. + a = (0..30).to_a + a[3, 2] = a + assert_equal(60, a.length) + assert_equal([0, 1, 2] + (0..30).to_a + (5..30).to_a, a) end assert('Array#clear', '15.2.12.5.6') do @@ -189,11 +200,16 @@ class SubArray < Array assert_equal(nil, a.index(0)) end +assert("Array#index (block)") do + assert_nil (1..10).to_a.index { |i| i % 5 == 0 and i % 7 == 0 } + assert_equal 34, (1..100).to_a.index { |i| i % 5 == 0 and i % 7 == 0 } +end + assert('Array#initialize', '15.2.12.5.15') do - a = [].initialize(1) - b = [].initialize(2) - c = [].initialize(2, 1) - d = [].initialize(2) {|i| i} + a = [].__send__(:initialize,1) + b = [].__send__(:initialize,2) + c = [].__send__(:initialize,2, 1) + d = [].__send__(:initialize,2) {|i| i} assert_equal([nil], a) assert_equal([nil,nil], b) @@ -203,7 +219,7 @@ class SubArray < Array assert('Array#initialize_copy', '15.2.12.5.16') do a = [1,2,3] - b = [].initialize_copy(a) + b = [].__send__(:initialize_copy, a) assert_equal([1,2,3], b) end @@ -288,6 +304,11 @@ class SubArray < Array assert_equal(nil, a.rindex(0)) end +assert("Array#rindex (block)") do + assert_nil (1..10).to_a.rindex { |i| i % 5 == 0 and i % 7 == 0 } + assert_equal 69, (1..100).to_a.rindex { |i| i % 5 == 0 and i % 7 == 0 } +end + assert('Array#shift', '15.2.12.5.27') do a = [1,2,3] b = a.shift @@ -445,3 +466,24 @@ def ==(*) a[0] = 1 end end + +assert('Array#delete') do + a = ["a", "b", "c"] + assert_equal nil, a.delete("x") + assert_equal "x", a.delete("x") { _1 } + assert_equal ["a", "b", "c"], a + assert_equal "a", a.delete("a") + assert_equal ["b", "c"], a + + a = [nil] + assert_equal nil, a.delete(nil) { "?" } + assert_equal [], a +end + +assert('Array#hash with self-referencing arrays') do + a = [] + a << a + b = [] + b << b + assert_equal a.hash, b.hash +end diff --git a/test/t/bs_block.rb b/test/t/bs_block.rb index f4e4da375f..8853db8b05 100644 --- a/test/t/bs_block.rb +++ b/test/t/bs_block.rb @@ -313,7 +313,7 @@ def m end assert('BS Block 26') do - def m a + def m(a) yield a end assert_equal(2) do @@ -330,7 +330,8 @@ def m a 3.times{|ib| 2.times{|jb| sum += ib + jb - }} + } + } assert_equal sum, 9 end @@ -374,7 +375,7 @@ def m end assert('BS Block [ruby-dev:31160]') do - def m() + def m yield end assert_nil m {|(v,(*))|} @@ -389,7 +390,7 @@ def m(a, *b) end assert('BS Block 31') do - def m() + def m yield end assert_nil m {|((*))|} @@ -532,3 +533,89 @@ def iter } end end + +assert('BS Block 40 (https://github.com/mruby/mruby/issues/6411)') do + assert_equal "GOOD" do + Object.new.instance_eval do + def test(&b) + if b + b.call + else + test { return "GOOD" } + end + "BAD" + end + + test + end + end + + assert_equal "GOOD" do + Object.new.instance_eval do + # since Kernel#proc is defined in proc-ext + def make_proc(&b) + b + end + + def chocolate(&b) + biscuit(&b) + end + + def biscuit(&b) + if b + b.call + else + b = make_proc { return "GOOD" } + chocolate(&b) + end + "BAD" + end + + biscuit + end + end + + assert_equal [0, 1, 2, 3] do + Object.new.instance_eval do + def test(a = [], &b) + if b + b.call + else + if a.empty? + a << 0 + test(a) + else + a << 1 + test(a) { return 1 } + end + a << 2 + end + a << 3 + end + + test + end + end + + assert_equal [0, 1, 3, 2, 3, 2, 3] do + Object.new.instance_eval do + def test(a = [], &b) + if b + b.call + else + if a.empty? + a << 0 + test(a) + else + a << 1 + test(a, &-> { return 1 }) + end + a << 2 + end + a << 3 + end + + test + end + end +end diff --git a/test/t/class.rb b/test/t/class.rb index 1b4b84890d..fcdb7a88b0 100644 --- a/test/t/class.rb +++ b/test/t/class.rb @@ -40,7 +40,7 @@ def initialize_copy(obj) end class TestClass - def initialize args, &block + def initialize(args, &block) @result = if not args.nil? and block.nil? # only arguments :only_args @@ -477,3 +477,65 @@ def a=(v) assert_raise(TypeError) { class 0::C1; end } assert_raise(TypeError) { class []::C2; end } end + +assert('module with extended callback') do + module FooWithExtended + @@extended = [] + + def self.extended(base) + @@extended << base + end + + def self.extended_classes + @@extended + end + + def answer + 42 + end + end + + class BarBeingExtended + extend FooWithExtended + end + + assert_equal [BarBeingExtended], FooWithExtended.extended_classes + assert_true BarBeingExtended.respond_to?(:answer) + assert_equal 42, BarBeingExtended.answer +end + +assert("inherited hook runs before block body") do + class A + def self.values + @values ||= [] + end + + def self.inherited(mod) + mod.values << 1 + end + end + + klass = Class.new(A) do + self.values << 2 + end + + assert_equal [1, 2], klass.values +end + +assert("inherited hook runs before class body") do + class A + def self.values + @values ||= [] + end + + def self.inherited(mod) + mod.values << 1 + end + end + + class B < A + self.values << 2 + end + + assert_equal [1, 2], B.values +end diff --git a/test/t/codegen.rb b/test/t/codegen.rb index acb9e1bf53..1e4d375594 100644 --- a/test/t/codegen.rb +++ b/test/t/codegen.rb @@ -56,12 +56,11 @@ def args_to_ary(*args) end end -assert('next in normal loop with 127 arguments') do - assert_raise NameError do - while true - next A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A - end - end +assert('break in normal loop with 127 arguments') do + assert_equal 127, + 1.times{ + break 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 + }.size end assert('negate literal register alignment') do @@ -179,7 +178,7 @@ class Regexp; end # NODE_REGX assert_raise(NoMethodError){ /static/ } assert_raise(NoMethodError){ /static/iu } - Object.remove_const :Regexp + Object.__send__(:remove_const,:Regexp) # NODE_UNDEF assert_nothing_raised do @@ -195,3 +194,29 @@ class << Object.new end end end + +assert('bare `nil?` in if/unless uses self as receiver (#6874)') do + klass = Class.new do + def unless_form + reached = false + unless nil? + reached = true + end + reached + end + + def if_form + if nil? + :yes + else + :no + end + end + end + + assert_true klass.new.unless_form + assert_equal :no, klass.new.if_form + # Sanity: explicit literal nil receiver still optimized correctly. + result = if nil.nil? then :yes else :no end + assert_equal :yes, result +end diff --git a/test/t/comparable.rb b/test/t/comparable.rb index 2ee28de7b4..08d1f308c3 100644 --- a/test/t/comparable.rb +++ b/test/t/comparable.rb @@ -1,4 +1,3 @@ - assert('Comparable#<', '15.3.3.2.1') do class Foo include Comparable diff --git a/test/t/exception.rb b/test/t/exception.rb index 6f517a5c3e..d842c6ce19 100644 --- a/test/t/exception.rb +++ b/test/t/exception.rb @@ -36,7 +36,7 @@ assert('Exception.exception', '15.2.22.4.1') do e = Exception.exception() - e.initialize('a') + e.__send__(:initialize,'a') assert_equal 'a', e.message end @@ -46,7 +46,7 @@ raise NameError.new end - e = NameError.new "msg", "name" + e = NameError.new("msg", "name") assert_equal "msg", e.message assert_equal "name", e.name end @@ -355,7 +355,7 @@ def z assert('Exception#inspect') do assert_equal "Exception", Exception.new.inspect assert_equal "Exception", Exception.new("").inspect - assert_equal "error! (Exception)", Exception.new("error!").inspect + assert_equal "#", Exception.new("error!").inspect end assert('Exception#backtrace') do diff --git a/test/t/float.rb b/test/t/float.rb index e4c25b34ea..73ca7cf030 100644 --- a/test/t/float.rb +++ b/test/t/float.rb @@ -216,7 +216,7 @@ end assert('Float#divmod') do - def check_floats exp, act + def check_floats(exp, act) assert_float exp[0], act[0] assert_float exp[1], act[1] end @@ -239,28 +239,6 @@ def check_floats exp, act assert_not_predicate(-1.0/0.0, :nan?) end -assert('Float#<<') do - # Left Shift by one - assert_equal 46, 23.0 << 1 - - # Left Shift by a negative is Right Shift - assert_equal 23, 46.0 << -1 -end - -assert('Float#>>') do - # Right Shift by one - assert_equal 23, 46.0 >> 1 - - # Right Shift by a negative is Left Shift - assert_equal 46, 23.0 >> -1 - - # Don't raise on large Right Shift - assert_equal 0, 23.0 >> 128 - - # Don't raise on large Right Shift - assert_equal(-1, -23.0 >> 128) -end - assert('Float#to_s') do uses_float = 4e38.infinite? # enable MRB_USE_FLOAT32? @@ -315,4 +293,13 @@ def check_floats exp, act assert_equal(0.0, f.abs) end +assert('Float literal underflow') do + # Regression: float literals with exponents below POW10_MIN used to + # index pow10_tab out of bounds in mrb_read_float. They must round + # cleanly to 0.0. + assert_equal 0.0, 1.0e-400 + assert_equal 0.0, 9.99e-344 + assert_equal(-0.0, -92170141183460469231731687303715884105729e-383) +end + end # const_defined?(:Float) diff --git a/test/t/gc.rb b/test/t/gc.rb index 4b800e945a..3ea6b7ef8c 100644 --- a/test/t/gc.rb +++ b/test/t/gc.rb @@ -33,6 +33,56 @@ end end +assert('GC.step_limit=') do + origin = GC.step_limit + begin + assert_equal 0, origin # default: unlimited + assert_equal 512, (GC.step_limit = 512) + assert_equal 512, GC.step_limit + assert_equal 0, (GC.step_limit = 0) # back to unlimited + ensure + GC.step_limit = origin + end +end + +assert('GC.step_limit - GC completes with small limit') do + origin = GC.step_limit + begin + GC.step_limit = 64 + # GC should still complete even with a small step limit + GC.start + assert_true GC.stat[:live] > 0 + ensure + GC.step_limit = origin + end +end + +assert('GC.malloc_threshold=') do + origin = GC.malloc_threshold + begin + assert_equal 0, origin # default: disabled + assert_equal 65536, (GC.malloc_threshold = 65536) + assert_equal 65536, GC.malloc_threshold + assert_equal 0, (GC.malloc_threshold = 0) # back to disabled + ensure + GC.malloc_threshold = origin + end +end + +assert('GC.malloc_threshold - triggers GC on large allocations') do + origin = GC.malloc_threshold + begin + GC.malloc_threshold = 4096 + GC.start # reset malloc_increase + # allocate large strings to exceed threshold + 100.times { "x" * 1024 } + stat = GC.stat + assert_true stat[:malloc_increase] >= 0 + ensure + GC.malloc_threshold = origin + end +end + assert('GC.generational_mode=') do origin = GC.generational_mode begin diff --git a/test/t/hash.rb b/test/t/hash.rb index 9ff066b8c2..c278ff2ff9 100644 --- a/test/t/hash.rb +++ b/test/t/hash.rb @@ -857,16 +857,16 @@ def h.default(k); self[k] = 1; end assert("Hash##{meth}") do assert_equal('{}', Hash.new.__send__(meth)) - h1 = {:s => 0, :a => [1,2], 37 => :b, :d => "del", "c" => nil} + h1 = {s: 0, a: [1,2], 37 => :b, d: "del", "c" => nil} h1.shift h1.delete(:d) - s1 = ':a=>[1, 2], 37=>:b, "c"=>nil' + s1 = 'a: [1, 2], 37 => :b, "c" => nil' h2 = Hash.new(100) (1..14).each{h2[_1] = _1 * 2} h2 = {**h2, **h1} - s2 = "1=>2, 2=>4, 3=>6, 4=>8, 5=>10, 6=>12, 7=>14, 8=>16, " \ - "9=>18, 10=>20, 11=>22, 12=>24, 13=>26, 14=>28, #{s1}" + s2 = "1 => 2, 2 => 4, 3 => 6, 4 => 8, 5 => 10, 6 => 12, 7 => 14, 8 => 16, " \ + "9 => 18, 10 => 20, 11 => 22, 12 => 24, 13 => 26, 14 => 28, #{s1}" [[h1, s1], [h2, s2]].each do |h, s| assert_equal("{#{s}}", h.__send__(meth)) @@ -874,22 +874,11 @@ def h.default(k); self[k] = 1; end hh = {} hh[:recur] = hh h.each{|k, v| hh[k] = v} - assert_equal("{:recur=>{...}, #{s}}", hh.__send__(meth)) + assert_equal("{recur: {...}, #{s}}", hh.__send__(meth)) hh = h.dup hh[hh] = :recur - assert_equal("{#{s}, {...}=>:recur}", hh.__send__(meth)) - end - - [ar_entries, ht_entries].each do |entries| - cls = Class.new do - attr_accessor :h - def inspect; @h.replace(@h.dup); to_s; end - end - v = cls.new - h = entries.hash_for({_k: v}) - v.h = h - assert_nothing_raised{h.__send__(meth)} + assert_equal("{#{s}, {...} => :recur}", hh.__send__(meth)) end end end @@ -914,7 +903,6 @@ def inspect; @h.replace(@h.dup); to_s; end pairs1 = pairs.dup pairs1.delete([:_del, h.delete(:_del)]) exp_pairs1 = pairs1.hash_for.to_a - h.freeze assert_same(h, h.rehash) assert_equal(exp_pairs1, h.to_a) assert_equal(exp_pairs1.size, h.size) @@ -959,43 +947,12 @@ def inspect; @h.replace(@h.dup); to_s; end [1, 17].each{assert_equal(_1 * 2, h[_1])} end -assert('#eql? receiver should be specified key') do - [ar_entries, ht_entries].each do |entries| - h = entries.hash_for - k0 = HashKey[-99] - h[k0] = 1 - - k1 = HashKey[-3, error: :eql?] - assert_raise{h[k1]} - k0.error = :eql? - k1.error = false - assert_nothing_raised{h[k1]} - - k0.error = false - k1.error = :eql? - assert_raise{h[k1] = 1} - k0.error = :eql? - k1.error = false - assert_nothing_raised{h[k1] = 1} - - k0.error = false - k2 = HashKey[-6, error: :eql?] - assert_raise{h.delete(k2)} - k0.error = :eql? - k2.error = false - assert_nothing_raised{h.delete(k2)} - - k0.error = false - k3 = HashKey[-9, error: :eql?] - %i[has_key? include? key? member?].each do |m| - assert_raise{h.__send__(m, k3)} - end - k0.error = :eql? - k3.error = false - %i[has_key? include? key? member?].each do |m| - assert_nothing_raised{h.__send__(m, k3)} - end - end +assert('Hash#assoc, Hash#rassoc') do + h = {foo: 0, bar: 1, baz: 2} + assert_equal([:bar, 1], h.assoc(:bar)) + assert_nil(h.assoc(:quux)) + assert_equal([:foo, 0], h.rassoc(0)) + assert_nil(h.rassoc(4)) end assert('#== receiver should be specified value') do @@ -1012,7 +969,7 @@ def inspect; @h.replace(@h.dup); to_s; end end end -assert('test value ommision') do +assert('test value omission') do x = 1 y = 2 assert_equal({x:1, y:2}, {x:, y:}) diff --git a/test/t/integer.rb b/test/t/integer.rb index 5a5098b92d..b51826829a 100644 --- a/test/t/integer.rb +++ b/test/t/integer.rb @@ -49,12 +49,7 @@ assert_equal 2.0, b end -if Object.const_defined?(:Rational) - assert('Integer#quo') do - a = 6.quo(5) - assert_equal 5/6r, a - end -elsif Object.const_defined?(:Float) +if Object.const_defined?(:Float) assert('Integer#quo') do a = 6.quo(5) assert_equal 1.2, a @@ -68,7 +63,7 @@ d = 2%-5 e = -2%5 f = -2%-5 - g = 2%-2 + g = 2%-2 h = -2%2 i = -2%-2 @@ -134,6 +129,18 @@ assert_equal 6, 5 ^ 3 end +assert('Integer bitwise ops reject non-Integer operands') do + # A non-Integer operand has no bit pattern to combine, so &, |, ^ raise + # TypeError instead of silently reading garbage (Float used to return a + # bogus value via an unchecked union access). + assert_raise(TypeError) { 5 & 5.0 } + assert_raise(TypeError) { 5 | 5.0 } + assert_raise(TypeError) { 5 ^ 5.0 } + assert_raise(TypeError) { 5 | "3" } + assert_raise(TypeError) { 5 & nil } + assert_raise(TypeError) { 5 ^ :sym } +end + assert('Integer#<<', '15.2.8.3.12') do # Left Shift by one # 00010111 (23) diff --git a/test/t/kernel.rb b/test/t/kernel.rb index 0a70ec0d24..9b9f076252 100644 --- a/test/t/kernel.rb +++ b/test/t/kernel.rb @@ -5,65 +5,24 @@ assert_equal Module, Kernel.class end -assert('Kernel.block_given?', '15.3.1.2.2') do - def bg_try(&b) - if Kernel.block_given? - yield - else - "no block" - end - end - - assert_false Kernel.block_given? - # test without block - assert_equal "no block", bg_try - # test with block - assert_equal "block" do - bg_try { "block" } - end - # test with block - assert_equal "block" do - bg_try do - "block" - end - end -end +# Kernel.block_given? is not provided by mruby. '15.3.1.2.2' # Kernel.eval is provided by the mruby-eval mrbgem. '15.3.1.2.3' -assert('Kernel.iterator?', '15.3.1.2.5') do - assert_false Kernel.iterator? -end - -assert('Kernel.lambda', '15.3.1.2.6') do - l = Kernel.lambda do - true - end - - m = Kernel.lambda(&l) - - assert_true l.call - assert_equal Proc, l.class - assert_true m.call - assert_equal Proc, m.class -end - -assert('Kernel.loop', '15.3.1.2.8') do - i = 0 +# Kernel.iterator? is not provided by mruby. '15.3.1.2.5' - Kernel.loop do - i += 1 - break if i == 100 - end +# Kernel.lambda is not provided by mruby. '15.3.1.2.6' - assert_equal 100, i -end +# Kernel.loop is not provided by mruby. '15.3.1.2.8' -# Kernel.p is provided by the mruby-print mrbgem. '15.3.1.2.9' +# Kernel.p test is skipped due to the side effect. '15.3.1.2.9' +#assert('Kernel.p', '15.3.1.2.9') do +# assert_equal 1, Kernel.__send__(:p, 1) +#end -# Kernel.print is provided by the mruby-print mrbgem. '15.3.1.2.10' +# Kernel.print is provided by the mruby-io mrbgem. '15.3.1.2.10' -# Kernel.puts is provided by the mruby-print mrbgem. '15.3.1.2.11' +# Kernel.puts is provided by the mruby-io mrbgem. '15.3.1.2.11' assert('Kernel.raise', '15.3.1.2.12') do assert_raise RuntimeError do @@ -346,13 +305,13 @@ def no_super_method_named_this end end no_super_test = NoSuperMethodTestClass.new - msg = "no superclass method 'no_super_method_named_this'" + msg = "no superclass method 'no_super_method_named_this' for NoSuperMethodTestClass" assert_raise_with_message(NoMethodError, msg) do no_super_test.no_super_method_named_this end a = String.new - msg = "undefined method 'no_method_named_this'" + msg = "undefined method 'no_method_named_this' for String" assert_raise_with_message(NoMethodError, msg) do a.no_method_named_this end @@ -377,11 +336,16 @@ def no_super_method_named_this assert_kind_of Numeric, 1.0.object_id end -# Kernel#p is defined in mruby-print mrbgem. '15.3.1.3.34' +# Kernel#p test is skipped due to the side effect. '15.3.1.3.34' +#assert('Kernel#p', '15.3.1.3.34') do +# assert_equal nil, p +# assert_equal nil, p(p) +# assert_equal [:a, :b], p(:a, :b) +#end -# Kernel#print is defined in mruby-print mrbgem. '15.3.1.3.35' +# Kernel#print is defined in mruby-io mrbgem. '15.3.1.3.35' -# Kernel#puts is defined in mruby-print mrbgem. '15.3.1.3.39' +# Kernel#puts is defined in mruby-io mrbgem. '15.3.1.3.39' assert('Kernel#raise', '15.3.1.3.40') do assert_raise RuntimeError do diff --git a/test/t/lang.rb b/test/t/lang.rb index 4ed1b3daff..df889c9fcb 100755 --- a/test/t/lang.rb +++ b/test/t/lang.rb @@ -1,4 +1,4 @@ -# The aim of these tests is to detect pitfall for optimized VM. +# The aim of these tests is to detect pitfall for optimized VM. # Test for or/and # @@ -12,15 +12,15 @@ # # compiles to the following byte code: # -# 1 000 OP_LOADI R1 0 ; R1:i -# 2 001 OP_MOVE R2 R1 ; R1:i -# 2 002 OP_LOADI R3 0 -# 2 003 OP_GT R2 :> 1 -# 2 004 OP_JMPNOT R2 008 -# 2 005 OP_MOVE R2 R1 ; R1:i -# 2 006 OP_LOADI R3 10 -# 2 007 OP_LT R2 :< 1 -# 2 008 OP_JMPNOT R2 (The address of end of then part) +# 1 000 LOADI_0 R1 (0) ; R1:i +# 2 002 MOVE R2 R1 ; R1:i +# 2 005 LOADI_0 R3 (0) +# 2 007 GT R2 R3 +# 2 009 JMPNOT R2 021 +# 2 013 MOVE R2 R1 ; R1:i +# 2 016 LOADI8 R3 10 +# 2 019 LT R2 R3 +# 2 021 JMPNOT R2 (The address of end of then part) # # When the instruction fusion the OP_GT and OP_JMPNOT you fell into the pitfalls. # The deleted intermediate boolean value is used in OP_JMPNOT (address 008). diff --git a/test/t/literals.rb b/test/t/literals.rb index f939686500..de30e816ba 100644 --- a/test/t/literals.rb +++ b/test/t/literals.rb @@ -233,8 +233,8 @@ assert_equal " iii\n", i assert_equal [" j1j\n", " j2j\n", " j\#{3}j\n"], j assert_equal 123, k - assert_equal ["x{:x=>\"mm3\\n\"}y\nmm1\n", "mm2\n"], m - assert_equal ({:x=>"mm3\n"}), m2 + assert_equal ["x{x: \"mm3\\n\"}y\nmm1\n", "mm2\n"], m + assert_equal ({x: "mm3\n"}), m2 assert_equal [1, "nn1\n", 3, 4], n assert_equal "a $ q\n $ c $ d", q1 assert_equal "l $ mqq\nn $ o", q2 @@ -383,4 +383,20 @@ assert_equal :'{foo bar}', h end +assert('operator override with negative integer literal', '#2557') do + cls = Class.new { + def +(x); ['add', x]; end + def -(x); ['sub', x]; end + } + q = cls.new + assert_equal ['add', 5], q + 5 + assert_equal ['add', -5], q + -5 + assert_equal ['sub', 5], q - 5 + assert_equal ['sub', -5], q - -5 + assert_equal ['add', 500], q + 500 + assert_equal ['add', -500], q + -500 + assert_equal ['sub', 500], q - 500 + assert_equal ['sub', -500], q - -500 +end + # Not Implemented ATM assert('Literals Regular expression', '8.7.6.5') do diff --git a/test/t/module.rb b/test/t/module.rb index 9aa0182802..f2a0e1164e 100644 --- a/test/t/module.rb +++ b/test/t/module.rb @@ -56,20 +56,6 @@ class Test4ModuleAncestors assert_true r.include?(Object) end -assert('Module#append_features', '15.2.2.4.10') do - module Test4AppendFeatures - def self.append_features(mod) - Test4AppendFeatures2.const_set(:Const4AppendFeatures2, mod) - end - end - module Test4AppendFeatures2 - include Test4AppendFeatures - end - - assert_equal Test4AppendFeatures2, Test4AppendFeatures2.const_get(:Const4AppendFeatures2) - assert_raise(FrozenError) { Module.new.append_features Class.new.freeze } -end - assert('Module#attr NameError') do %w[ foo? @@ -177,6 +163,9 @@ def iattr_val=(val) AttrTestReader.cattr_val = 'test' assert_equal 'test', AttrTestReader.cattr + + assert_raise(ArgumentError) { attr_instance.iattr(1) } + assert_raise(ArgumentError) { attr_instance.iattr(1, 2, 3) } end assert('Module#attr_writer', '15.2.2.4.14') do @@ -231,6 +220,18 @@ module Test4ConstDefined assert_true Test4ConstDefined.const_defined?(:Const4Test4ConstDefined) assert_false Test4ConstDefined.const_defined?(:NotExisting) assert_wrong_const_name{ Test4ConstDefined.const_defined?(:wrong_name) } + + # shared empty iv_tbl (include) + m = Module.new + c = Class.new{include m} + m::CONST = 1 + assert_true c.const_defined?(:CONST) + + # shared empty iv_tbl (prepend) + m = Module.new + c = Class.new{prepend m} + m::CONST = 1 + assert_true c.const_defined?(:CONST) end assert('Module#const_get', '15.2.2.4.21') do @@ -246,6 +247,18 @@ module Test4ConstGet assert_uninitialized_const{ Test4ConstGet.const_get(:I_DO_NOT_EXIST) } assert_uninitialized_const{ Test4ConstGet.const_get("I_DO_NOT_EXIST::ME_NEITHER") } assert_wrong_const_name{ Test4ConstGet.const_get(:wrong_name) } + + # shared empty iv_tbl (include) + m = Module.new + c = Class.new{include m} + m::CONST = 1 + assert_equal 1, c.const_get(:CONST) + + # shared empty iv_tbl (prepend) + m = Module.new + c = Class.new{prepend m} + m::CONST = 1 + assert_equal 1, c.const_get(:CONST) end assert('Module#const_set', '15.2.2.4.23') do @@ -264,16 +277,15 @@ module Test4ConstSet module Test4RemoveConst ExistingConst = 23 end - - assert_equal 23, Test4RemoveConst.remove_const(:ExistingConst) + assert_equal 23, Test4RemoveConst.__send__(:remove_const,:ExistingConst) assert_false Test4RemoveConst.const_defined?(:ExistingConst) assert_raise_with_message_pattern(NameError, "constant * not defined") do - Test4RemoveConst.remove_const(:NonExistingConst) + Test4RemoveConst.__send__(:remove_const,:NonExistingConst) end %i[x X!].each do |n| - assert_wrong_const_name { Test4RemoveConst.remove_const(n) } + assert_wrong_const_name { Test4RemoveConst.__send__(:remove_const,n) } end - assert_raise(FrozenError) { Test4RemoveConst.freeze.remove_const(:A) } + assert_raise(FrozenError) { Test4RemoveConst.freeze.__send__(:remove_const,:A) } end assert('Module#const_missing', '15.2.2.4.22') do @@ -286,18 +298,6 @@ def self.const_missing(sym) assert_equal 42, Test4ConstMissing.const_get(:ConstDoesntExist) end -assert('Module#extend_object', '15.2.2.4.25') do - cls = Class.new - mod = Module.new { def foo; end } - a = cls.new - b = cls.new - mod.extend_object(b) - assert_false a.respond_to?(:foo) - assert_true b.respond_to?(:foo) - assert_raise(FrozenError) { mod.extend_object(cls.new.freeze) } - assert_raise(FrozenError, TypeError) { mod.extend_object(1) } -end - assert('Module#include', '15.2.2.4.27') do module Test4Include Const4Include = 42 @@ -356,16 +356,16 @@ module Test4Included2 assert('Module#method_defined?', '15.2.2.4.34') do module Test4MethodDefined module A - def method1() end + def method1() end end class B - def method2() end + def method2() end end class C < B include A - def method3() end + def method3() end end end @@ -445,15 +445,6 @@ def imeth; :imeth end end end -assert 'Module#prepend_features' do - mod = Module.new { def m; :mod end } - cls = Class.new { def m; :cls end } - assert_equal :cls, cls.new.m - mod.prepend_features(cls) - assert_equal :mod, cls.new.m - assert_raise(FrozenError) { Module.new.prepend_features(Class.new.freeze) } -end - # @!group prepend assert('Module#prepend') do module M0 @@ -542,8 +533,8 @@ class << self bug6662 = '[ruby-dev:45868]' c2 = labeled_class("c2", c) - anc = c2.ancestors - assert_equal([c2, m, c, Object], anc[0..anc.index(Object)], bug6662) + as = c2.ancestors + assert_equal([c2, m, c, Object], as[0..as.index(Object)], bug6662) end assert 'Module#prepend + Module#ancestors' do @@ -686,7 +677,7 @@ class << self # assert_nothing_raised(SystemStackError, bug10847) do # 0.3.numerator # end - # end; + # end #end assert 'Module#prepend to frozen class' do @@ -729,14 +720,14 @@ module Test4to_sModules assert('Issue 1467') do module M1 - def initialize() + def initialize super() end end class C1 include M1 - def initialize() + def initialize super() end end @@ -763,6 +754,106 @@ class B assert_true(B.new.foo) end +assert('method visibility') do + class CallTypeTest + def test_private(&block) + func(&block) + end + def test_protected(&block) + self.func(&block) + end + private + def func + yield + end + end + + v = CallTypeTest.new + + assert_raise_with_message_pattern(NameError, "private method 'func' called for CallTypeTest") do + v.func { :test } + end + assert_equal :test, v.test_private { :test } + + class CallTypeTest + protected :func + end + + assert_raise_with_message_pattern(NameError, "protected method 'func' called for CallTypeTest") do + v.func { :test } + end + assert_equal :test, v.test_protected { :test } + assert_equal :test, v.test_private { :test } + + class CallTypeTest + public def public_func + :test + end + + public :func + end + + assert_equal :test, v.public_func + assert_equal :test, v.func { :test } + assert_equal :test, v.test_protected { :test } + assert_equal :test, v.test_private { :test } +end + +assert('method visibility with meta programming') do + assert_equal "GOOD!" do + f = nil + c = Class.new { + private + f = ->(&blk) { + class_eval(&blk) + } + } + f.call { + def good! + "GOOD!" + end + } + c.new.good! + end + + assert_equal "GOOD!" do + c = Class.new + c.class_eval { + private + c.class_eval { + def good! + "GOOD!" + end + } + } + c.new.good! + end + + assert_raise NoMethodError do + f = nil + c = Class.new { + private + f = -> { + def bad! + "BAD!" + end + } + } + f.call + c.new.bad! + end + + assert_raise NoMethodError do + c = Class.new { + -> { private }.call + def bad! + "BAD!" + end + } + c.new.bad! + end +end + assert('Module#module_function') do module M def modfunc; end @@ -770,6 +861,9 @@ def modfunc; end end assert_true M.respond_to?(:modfunc) + assert_equal nil do + M.modfunc + end end assert('module with non-class/module outer raises TypeError') do @@ -779,7 +873,7 @@ def modfunc; end assert('module to return the last value') do m = module M; :m end - assert_equal(m, :m) + assert_equal(:m, m) end assert('module to return nil if body is empty') do @@ -796,3 +890,45 @@ class << self assert_equal("value", actual) end + +assert('shared empty iv_tbl (include)') do + m1 = Module.new + m2 = Module.new{include m1} + c = Class.new{include m2} + m1::CONST1 = 1 + assert_equal 1, m2::CONST1 + assert_equal 1, c::CONST1 + m2::CONST2 = 2 + assert_equal 2, c::CONST2 +end + +assert('shared empty iv_tbl (prepend)') do + m1 = Module.new + m2 = Module.new{prepend m1} + c = Class.new{include m2} + m1::CONST1 = 1 + assert_equal 1, m2::CONST1 + assert_equal 1, c::CONST1 + m2::CONST2 = 2 + assert_equal 2, c::CONST2 +end + +assert('constant lookup #6506') do + Module.new do + module X + module A + class WWW; end + end + end + + module X::Y; end + + module X::Y::Z + extend X::A + + class << self + assert_nothing_raised{WWW} + end + end + end +end diff --git a/test/t/nomethoderror.rb b/test/t/nomethoderror.rb index 5fed796893..0eb8fc69e2 100644 --- a/test/t/nomethoderror.rb +++ b/test/t/nomethoderror.rb @@ -9,7 +9,7 @@ end assert('NoMethodError#args', '15.2.32.2.1') do - a = NoMethodError.new 'test', :test, [1, 2] + a = NoMethodError.new('test', :test, [1, 2]) assert_equal [1, 2], a.args assert_nothing_raised do diff --git a/test/t/numeric.rb b/test/t/numeric.rb index 8baf6c883b..5c256ea903 100644 --- a/test/t/numeric.rb +++ b/test/t/numeric.rb @@ -66,7 +66,7 @@ def assert_step(exp, receiver, args, inf: false) skip unless Object.const_defined?(:Float) inf = Float::INFINITY assert_raise(ArgumentError) { 1.step(2, 0.0) { break } } - assert_step([2.0, 3.0, 4.0], 2, [4.0]) + assert_step([2, 3, 4], 2, [4.0]) assert_step([7.0, 4.0, 1.0, -2.0], 7, [-4, -3.0]) assert_step([2.0, 3.0, 4.0], 2.0, [4]) assert_step([10.0, 11.0, 12.0, 13.0], 10.0, [], inf: true) diff --git a/test/t/proc.rb b/test/t/proc.rb index b17b21e8c1..f9c4b4be1d 100644 --- a/test/t/proc.rb +++ b/test/t/proc.rb @@ -178,3 +178,33 @@ def m(&b) b end m{ break }.call end end + +assert('#6345: dup of a block from method is treated as orphan') do + def m(&b) b.dup end + + # The dup is orphan, so calling it raises LocalJumpError on break. + assert_raise LocalJumpError do + m { break 1 }.call + end + + # A dup of a block without break still returns normally. + assert_equal 42, m { 42 }.call +end + +assert('identity check for proc object') do + b = [] + t = 2 + while t > 0 + b << ->{} + t -= 1 + end + assert_true b[0] == b[1] + assert_equal b[0].hash, b[1].hash + + b = [] + 2.times { + b << ->{} + } + assert_false b[0] == b[1] + assert_not_equal b[0].hash, b[1].hash +end diff --git a/test/t/string.rb b/test/t/string.rb index 447493227b..d74132d869 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -1,4 +1,3 @@ -# coding: utf-8 ## # String ISO Test @@ -130,7 +129,7 @@ assert('String#[]=') do # length of args is 1 a = 'abc' - a[0] = 'X' + assert_equal 'X', (a[0] = 'X') assert_equal 'Xbc', a b = 'abc' @@ -153,6 +152,10 @@ assert_equal 'aXc', e end + f = 'abc' + assert_equal 'X', f.[]=(0, 'X') + assert_equal 'Xbc', f + assert_raise(TypeError) { 'a'[0] = 1 } assert_raise(TypeError) { 'a'[:a] = '1' } @@ -177,15 +180,19 @@ assert_equal 'Xabc', d1 e1 = 'abc' - e1[1, 3] = 'X' + assert_equal 'X', (e1[1, 3] = 'X') assert_equal 'aX', e1 + f1 = 'abc' + assert_equal 'X', f1.[]=(0, 1, 'X') + assert_equal 'Xbc', f1 + # args is RegExp # It will be tested in mrbgems. # args is String a3 = 'abc' - a3['bc'] = 'X' + assert_equal 'X', (a3['bc'] = 'X') assert_equal a3, 'aX' b3 = 'abc' @@ -193,6 +200,10 @@ b3['XX'] = 'Y' end + c3 = 'abc' + assert_equal 'X', c3.[]=('bc', 'X') + assert_equal 'aX', c3 + assert_raise(TypeError) { 'a'[:a, 0] = '1' } assert_raise(TypeError) { 'a'[0, :a] = '1' } assert_raise(TypeError) { 'a'[0, 1] = 1 } @@ -474,16 +485,16 @@ assert('String#initialize', '15.2.10.5.23') do a = '' - a.initialize('abc') + a.__send__(:initialize,'abc') assert_equal 'abc', a - a.initialize('abcdefghijklmnopqrstuvwxyz') + a.__send__(:initialize,'abcdefghijklmnopqrstuvwxyz') assert_equal 'abcdefghijklmnopqrstuvwxyz', a end assert('String#initialize_copy', '15.2.10.5.24') do a = '' - a.initialize_copy('abc') + a.__send__(:initialize_copy, 'abc') assert_equal 'abc', a end @@ -507,8 +518,8 @@ b = 'abc' * 10 c = ('cba' * 10).dup - b.replace(c); - c.replace(b); + b.replace(c) + c.replace(b) assert_equal c, b # shared string @@ -899,3 +910,72 @@ assert_equal("o", str1.byteslice(4.0)) assert_equal("\x82ab", str2.byteslice(2.0, 3.0)) end + +assert('String#bytesplice') do + # range, replace (len1=len2) + a = "0123456789" + assert_equal "0ab3456789", a.bytesplice(1..2, "ab") + + # range, replace (len1>len2) + a = "0123456789" + assert_equal "0ab456789", a.bytesplice(1..3, "ab") + + # range, replace (len1len2) + a = "0123456789" + assert_equal "0ab456789", a.bytesplice(1, 3, "ab") + + # idx, len, replace (len1len2) + a = "0123456789" + assert_equal "0bc456789", a.bytesplice(1..3, b, 1..2) + + # range, replace, range (len1len2) + a = "0123456789" + assert_equal "0bc456789", a.bytesplice(1, 3, b, 1, 2) + + # idx, len, replace, idx, len (len1 0 + a.push i * 3 + 1 + raise e + rescue + a.push i * 3 + 2 + redo + ensure + a.push i * 3 + 3 + end + end + assert_equal [1, 2, 3, 1, 2, 3, 3], a + + a = [] + limit = 3 + e = RuntimeError.new("!") + for i in 0...3 + a.push i * 4 + 1 + begin + limit -= 1 + break unless limit > 0 + a.push i * 4 + 2 + raise e + rescue + a.push i * 4 + 3 + redo + ensure + a.push i * 4 + 4 + end + end + assert_equal [1, 2, 3, 4, 1, 2, 3, 4, 1, 4], a end assert('Abbreviated variable assignment', '11.4.2.3.2') do @@ -211,6 +305,7 @@ def const1 end assert_equal "hello world", Syntax4Const::CONST1 assert_equal "hello world", Syntax4Const::Const2.new.const1 + assert_raise(NameError) { Syntax4Const::Object } end assert('Abbreviated variable assignment as returns') do @@ -531,9 +626,9 @@ def m(a = b = 1, &c) [a, b, c ? true : nil] end this is a comment that has extra after =begin and =end with spaces after it =end line = __LINE__ -=begin this is a comment +=begin this is a comment this is a comment that has extra after =begin and =end with tabs after it -=end xxxxxxxxxxxxxxxxxxxxxxxxxx +=end xxxxxxxxxxxxxxxxxxxxxxxxxx assert_equal(line + 4, __LINE__) end @@ -585,6 +680,20 @@ def m(a=1, **k) [a, k] end assert_equal [{a: 1}, {b: 2}], m({a: 1}, b: 2) assert_raise(ArgumentError) { m({a: 1}, {b: 2}) } + def m1(a: {}) a end + assert_equal({}, m1) + assert_equal(:abc, m1(a: :abc)) + + def m2(a: +1) a end + assert_equal(1, m2) + + assert_nothing_raised do + def m3 arg: + 123 + end + end + assert_equal(123, m3(arg: 456)) + def m(*, a:) a end assert_equal 1, m(a: 1) assert_equal 3, m(1, 2, a: 3) @@ -710,6 +819,7 @@ def m(a: b = 1, c:) [a, b, c] end assert('numbered parameters') do assert_equal(15, [1,2,3,4,5].reduce {_1+_2}) assert_equal(45, Proc.new do _1 + _2 + _3 + _4 + _5 + _6 + _7 + _8 + _9 end.call(*[1, 2, 3, 4, 5, 6, 7, 8, 9])) + assert_equal(5, -> { _1 }.call(5)) end assert('_0 is not numbered parameter') do @@ -717,6 +827,22 @@ def m(a: b = 1, c:) [a, b, c] end assert_equal(:l, ->{_0}.call) end +assert('numbered parameters in symbol name (https://github.com/mruby/mruby/issues/5295)') do + assert_equal([:_1], Array.new(1) {:_1}) +end + +assert('numbered parameters as hash key') do + h = {_1: 3} + assert_equal(3, h[:_1]) + assert_equal(7, -> { _1 }.call(7)) +end + +assert('numbered parameters as singleton') do + o = Object.new + lambda { def _1.a(b) = "a#{b}" }.call(o) + assert_equal('ab', o.a('b')) +end + assert('argument forwarding') do c = Class.new { def a0(*a,&b) @@ -770,3 +896,457 @@ def self.cs3(x) = s3 x + 1 assert_equal(43, c.cs1) assert_equal(45, c.cs3(c.cs2)) end + +assert('at-least-once loop') do + # basic at-least-once loop that executes once + count = 0 + begin + count += 1 + end while false + assert_equal 1, count + + # at-least-once loop with true condition + count = 0 + begin + count += 1 + end while count < 3 + assert_equal 3, count + + # at-least-once loop with break + count = 0 + begin + count += 1 + break if count == 2 + end while count < 10 + assert_equal 2, count + + # at-least-once loop with next + count = 0 + sum = 0 + begin + count += 1 + if count == 2 + next + end + sum += count + end while count < 4 + assert_equal 4, count + assert_equal 8, sum # 1 + 3 + 4 = 8 (skipped 2) + + # nested at-least-once loops + outer_count = 0 + total = 0 + begin + outer_count += 1 + inner_count = 0 + begin + inner_count += 1 + total += inner_count + end while inner_count < 2 + end while outer_count < 2 + assert_equal 2, outer_count + assert_equal 6, total # (1+2) + (1+2) = 6 + + # at-least-once loop with exception handling + count = 0 + begin + count += 1 + raise "error" if count == 2 + rescue + count += 10 + end while count < 5 + assert_equal 12, count # 1, then 2+10=12 +end + +assert('pattern matching - basic case/in') do + # literal patterns + result = case 1 + in 1 then :one + in 2 then :two + end + assert_equal :one, result + + # variable binding + case 42 + in x + assert_equal 42, x + end + + # else clause + result = case 3 + in 1 then :one + in 2 then :two + else :other + end + assert_equal :other, result +end + +assert('pattern matching - string literal patterns') do + result = case "hello" + in "hello" then :match + end + assert_equal :match, result + + # double-quoted vs single-quoted should both work + result = case 'world' + in "world" then :double + end + assert_equal :double, result + + # alternation with strings + result = case "ab" + in "ab" | "cd" then :alt + end + assert_equal :alt, result + + # string interpolation in pattern + expected = "lo" + result = case "hello" + in "hel#{expected}" then :interp + end + assert_equal :interp, result +end + +assert('pattern matching - array patterns') do + # simple array pattern + case [1, 2, 3] + in [a, b, c] + assert_equal 1, a + assert_equal 2, b + assert_equal 3, c + end + + # array pattern with rest + case [1, 2, 3, 4, 5] + in [first, *rest] + assert_equal 1, first + assert_equal [2, 3, 4, 5], rest + end + + # array pattern with rest in middle + case [1, 2, 3, 4, 5] + in [first, *middle, last] + assert_equal 1, first + assert_equal [2, 3, 4], middle + assert_equal 5, last + end + + # nested array pattern + case [[1, 2], [3, 4]] + in [[a, b], [c, d]] + assert_equal [1, 2, 3, 4], [a, b, c, d] + end + + # array pattern with literal match + result = case [1, 2, 3] + in [1, 2, x] + x + end + assert_equal 3, result + + # array literal with splat as case value (#6854): + # the array-literal-length optimization must bail out for splat, + # since the runtime length is unknown statically. + a = [1, 2] + result = case [*a] + in [1, 2] then :match + else :nomatch + end + assert_equal :match, result + + # same bug in one-line `in` pattern + assert_true ([*a] in [1, 2]) + assert_false ([*a] in [1, 2, 3]) + assert_true ([1, *a, 4] in [1, 1, 2, 4]) +end + +assert('pattern matching - find patterns') do + # find pattern - element at end + case [1, 2, 3] + in [*pre, 3, *post] + assert_equal [1, 2], pre + assert_equal [], post + end + + # find pattern - element at beginning + case [1, 2, 3] + in [*pre, 1, *post] + assert_equal [], pre + assert_equal [2, 3], post + end + + # find pattern - element in middle + case [1, 2, 3, 4, 5] + in [*pre, 3, *post] + assert_equal [1, 2], pre + assert_equal [4, 5], post + end + + # find pattern - multiple middle elements + case [1, 2, 3, 4, 5] + in [*pre, 2, 3, *post] + assert_equal [1], pre + assert_equal [4, 5], post + end + + # find pattern - anonymous rest + result = case [1, 2, 3, 4, 5] + in [*, 3, *] + :found + else + :not_found + end + assert_equal :found, result + + # find pattern - no match + result = case [1, 2, 4, 5] + in [*pre, 3, *post] + :found + else + :not_found + end + assert_equal :not_found, result + + # find pattern with literal values + case [1, 2, 3, 4, 5] + in [*pre, 2, x, *post] + assert_equal [1], pre + assert_equal 3, x + assert_equal [4, 5], post + end +end + +assert('pattern matching - hash patterns') do + # simple hash pattern + case {a: 1, b: 2} + in {a: x, b: y} + assert_equal 1, x + assert_equal 2, y + end + + # shorthand hash pattern + case {a: 1, b: 2} + in {a:, b:} + assert_equal 1, a + assert_equal 2, b + end + + # hash pattern with extra keys (partial match) + case {a: 1, b: 2, c: 3} + in {a: x} + assert_equal 1, x + end + + # hash pattern with rest (captures unmatched keys) + case {a: 1, b: 2, c: 3} + in {a:, **rest} + assert_equal 1, a + assert_equal({b: 2, c: 3}, rest) + end + + # hash value extraction + h = {user: "Alice"} + case h + in {user: u} + assert_equal "Alice", u + end +end + +assert('pattern matching - guard clauses') do + # if guard + result = case 10 + in x if x > 5 + :big + in x + :small + end + assert_equal :big, result + + # unless guard + result = case 3 + in x unless x > 5 + :small + in x + :big + end + assert_equal :small, result + + # guard with pattern + result = case [1, 2, 3] + in [a, b, c] if a + b + c > 5 + :sum_big + in [a, b, c] + :sum_small + end + assert_equal :sum_big, result +end + +assert('pattern matching - alternative patterns') do + # simple alternatives + result = case 2 + in 1 | 2 | 3 + :found + else + :not_found + end + assert_equal :found, result + + # alternatives in array + result = case [1, 2] + in [1, 2] | [3, 4] + :match + else + :no_match + end + assert_equal :match, result +end + +assert('pattern matching - pin operator') do + x = 1 + # pin matches exact value + result = case 1 + in ^x + :matched + else + :not_matched + end + assert_equal :matched, result + + # pin in array pattern + expected = 42 + case [42, 100] + in [^expected, y] + assert_equal 100, y + end + + # pin prevents rebinding + a = 1 + result = case 2 + in ^a + :same + else + :different + end + assert_equal :different, result +end + +assert('pattern matching - as pattern') do + # bind entire match + case [1, 2, 3] + in [x, *rest] => whole + assert_equal 1, x + assert_equal [2, 3], rest + assert_equal [1, 2, 3], whole + end + + # as pattern with hash + case {a: 1, b: 2} + in {a:} => h + assert_equal 1, a + assert_equal({a: 1, b: 2}, h) + end +end + +assert('pattern matching - one-line in') do + # basic true/false + assert_true((1 in 1)) + assert_false((1 in 2)) + + # with variable binding + assert_true(([1, 2] in [x, y])) + + # in conditional + matched = false + if [1, 2, 3] in [a, *rest] + matched = true + end + assert_true matched + + # with hash + assert_true(({a: 1} in {a: 1})) + assert_false(({a: 1} in {a: 2})) +end + +assert('pattern matching - one-line =>') do + # simple binding + 1 => x + assert_equal 1, x + + # array destructuring + [1, 2, 3] => [a, b, c] + assert_equal [1, 2, 3], [a, b, c] + + # hash destructuring + {name: "Bob", age: 25} => {name:, age:} + assert_equal "Bob", name + assert_equal 25, age + + # with rest + [1, 2, 3, 4] => [first, *middle, last] + assert_equal 1, first + assert_equal [2, 3], middle + assert_equal 4, last +end + +assert('pattern matching - NoMatchingPatternError') do + # => raises on mismatch + assert_raise(NoMatchingPatternError) do + 1 => 2 + end + + # can be rescued + begin + [1, 2] => [1, 2, 3] + rescue NoMatchingPatternError => e + assert_true e.message.is_a?(String) + end + + # case/in without else raises NoMatchingPatternError + assert_raise(NoMatchingPatternError) do + case 5 + in 1 then :one + in 2 then :two + end + end +end + +assert('pattern matching - complex patterns') do + # array of hashes + records = [{id: 1, value: "a"}, {id: 2, value: "b"}] + case records + in [first, second] + assert_equal({id: 1, value: "a"}, first) + assert_equal({id: 2, value: "b"}, second) + end + + # hash with array value + data = {items: [1, 2, 3]} + case data + in {items: [a, *rest]} + assert_equal 1, a + assert_equal [2, 3], rest + end +end + +assert('&nil in formal parameters') do + def m(&nil); end + m + m(&nil) + assert_raise(ArgumentError) { m {} } + + def m2(a, b, &nil); end + m2(1, 2) + assert_raise(ArgumentError) { m2(1, 2) {} } + + def m3(a, b=1, *c, &nil); end + m3(1) + assert_raise(ArgumentError) { m3(1) {} } + + def m4(a:, &nil); end + m4(a: 1) + assert_raise(ArgumentError) { m4(a: 1) {} } + + f = ->(&nil) { :ok } + assert_equal :ok, f.call + assert_raise(ArgumentError) { f.call {} } +end diff --git a/test/t/vformat.rb b/test/t/vformat.rb index f645351ee7..06a30a9694 100644 --- a/test/t/vformat.rb +++ b/test/t/vformat.rb @@ -1,4 +1,3 @@ -# coding: utf-8-emacs def sclass(v) class << v self @@ -22,7 +21,7 @@ class << v assert_match '#>>', vf.v('%t', sclass({})) assert_equal 'string and length', vf.l('string %l length', 'andante', 3) assert_equal '`n`: sym', vf.n('`n`: %n', :sym) - assert_equal '%C文字列%', vf.s('%s', '%C文字列%') + assert_equal '%C文字列%', vf.s('%s', '%C文字列%') assert_equal '`C`: Kernel module', vf.C('`C`: %C module', Kernel) assert_equal '`C`: NilClass', vf.C('`C`: %C', nil.class) assert_match '#>', vf.C('%C', sclass("")) @@ -40,10 +39,10 @@ class << v assert_equal 'Class', vf.v('%Y', sclass({})) assert_match '#>', vf.v('%v', sclass("")) assert_equal '`v`: 1...3', vf.v('`v`: %v', 1...3) - assert_equal '`S`: {:a=>1, "b"=>"c"}', vf.v('`S`: %S', {a: 1, "b" => ?c}) + assert_equal '`S`: {a: 1, "b" => "c"}', vf.v('`S`: %S', {a: 1, "b" => ?c}) assert_equal 'percent: %', vf.z('percent: %%') assert_equal '"I": inspect char', vf.c('%!c: inspect char', ?I) - assert_equal '709: inspect mrb_int', vf.i('%!d: inspect mrb_int', 709) + assert_equal '709: inspect mrb_int', vf.i('%!i: inspect mrb_int', 709) assert_equal '"a\x00b\xff"', vf.l('%!l', "a\000b\xFFc\000d", 4) assert_equal ':"&.": inspect symbol', vf.n('%!n: inspect symbol', :'&.') assert_equal 'inspect "String"', vf.v('inspect %!v', 'String') diff --git a/tools/gen_pow10_tab.rb b/tools/gen_pow10_tab.rb new file mode 100755 index 0000000000..07cec1885d --- /dev/null +++ b/tools/gen_pow10_tab.rb @@ -0,0 +1,68 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# Generate pow10 table for mruby unrounded scaling (fp_uscale.c) +# +# Uses exact integer arithmetic only. +# For each p in [-343, 341], computes (hi, lo) such that: +# 10^p ~= (hi * 2^64 - lo) * 2^pe +# where pe = floor(p * log2(10)) - 127 +# +# The 128-bit value pm = hi * 2^64 - lo is in [2^127, 2^128). + +POW10_MIN = -343 +POW10_MAX = 341 + +def generate_entry(p) + if p >= 0 + val = 10**p + bit_len = val.bit_length + pe = bit_len - 128 + if pe >= 0 + mask = (1 << pe) - 1 + pm = (val >> pe) + ((val & mask) != 0 ? 1 : 0) + else + pm = val << (-pe) + end + else + abs_p = -p + denom = 10**abs_p + # pe = floor(p * log2(10)) - 127 + # Ruby's integer division of negative numbers does floor division + pe_est = (p * 108853 >> 15) - 127 + + numerator = 1 << (-pe_est) + pm = (numerator + denom - 1) / denom + + # Adjust pe if pm is out of range [2^127, 2^128) + while pm >= (1 << 128) + pe_est += 1 + numerator = 1 << (-pe_est) + pm = (numerator + denom - 1) / denom + end + while pm < (1 << 127) + pe_est -= 1 + numerator = 1 << (-pe_est) + pm = (numerator + denom - 1) / denom + end + end + + raise "pm out of range for p=#{p}: #{pm.bit_length}" unless pm.bit_length == 128 + + hi = (pm >> 64) + ((pm & ((1 << 64) - 1)) != 0 ? 1 : 0) + lo = (hi << 64) - pm + + raise "hi out of range for p=#{p}" unless hi >= (1 << 63) && hi < (1 << 64) + + { hi: hi, lo: lo } +end + +def main + entries = (POW10_MIN..POW10_MAX).map { |p| [p, generate_entry(p)] } + + entries.each do |p, e| + printf(" {0x%016xULL, 0x%016xULL},\n", e[:hi], e[:lo]) + end +end + +main diff --git a/tools/lrama/LEGAL.md b/tools/lrama/LEGAL.md new file mode 100644 index 0000000000..5f2e1dbddf --- /dev/null +++ b/tools/lrama/LEGAL.md @@ -0,0 +1,12 @@ +# LEGAL NOTICE INFORMATION + +All the files in this distribution are covered under the MIT License except some files +mentioned below. + +## GNU General Public License version 3 + +These files are licensed under the GNU General Public License version 3 or later. See these files for more information. + +- template/bison/\_yacc.h +- template/bison/yacc.c +- template/bison/yacc.h diff --git a/tools/lrama/MIT b/tools/lrama/MIT new file mode 100644 index 0000000000..b23d5210d5 --- /dev/null +++ b/tools/lrama/MIT @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2023 Yuichiro Kaneko + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/tools/lrama/NEWS.md b/tools/lrama/NEWS.md new file mode 100644 index 0000000000..dcf40b50b2 --- /dev/null +++ b/tools/lrama/NEWS.md @@ -0,0 +1,696 @@ +# NEWS for Lrama + +## Lrama 0.7.0 (2025-01-21) + +## [EXPERIMENTAL] Support the generation of the IELR(1) parser described in this paper + +Support the generation of the IELR(1) parser described in this paper. +https://www.sciencedirect.com/science/article/pii/S0167642309001191 + +If you use IELR(1) parser, you can write the following directive in your grammar file. + +```yacc +%define lr.type ielr +``` + +But, currently IELR(1) parser is experimental feature. If you find any bugs, please report it to us. Thank you. + +## Support `-t` option as same as `--debug` option + +Support to `-t` option as same as `--debug` option. +These options align with Bison behavior. So same as `--debug` option. + +## Trace only explicit rules + +Support to trace only explicit rules. +If you use `--trace=rules` option, it shows include mid-rule actions. If you want to show only explicit rules, you can use `--trace=only-explicit-rules` option. + +Example: + +```yacc +%{ +%} +%union { + int i; +} +%token number +%type program +%% +program : number { printf("%d", $1); } number { $$ = $1 + $3; } + ; +%% +``` + +Result of `--trace=rules`: + +```console +$ exe/lrama --trace=rules sample.y +Grammar rules: +$accept -> program YYEOF +$@1 -> ε +program -> number $@1 number +``` + +Result of `--trace=only-explicit-rules`: + +```console +$ exe/lrama --trace=explicit-rules sample.y +Grammar rules: +$accept -> program YYEOF +program -> number number +``` + +## Lrama 0.6.11 (2024-12-23) + +### Add support for %type declarations using %nterm in Nonterminal Symbols + +Allow to use `%nterm` in Nonterminal Symbols for `%type` declarations. + +```yacc +%nterm nonterminal… +``` + +This directive is also supported for compatibility with Bison, and only non-terminal symbols are allowed. In other words, definitions like the following will result in an error: + +```yacc +%{ +// Prologue +%} + +%token EOI 0 "EOI" +%nterm EOI + +%% + +program: /* empty */ + ; +``` + +It show an error message like the following: + +```command +❯ exe/lrama nterm.y +nterm.y:6:7: symbol EOI redeclared as a nonterminal +%nterm EOI + ^^^ +``` + +## Lrama 0.6.10 (2024-09-11) + +### Aliased Named References for actions of RHS in parameterizing rules + +Allow to use aliased named references for actions of RHS in parameterizing rules. + +```yacc +%rule sum(X, Y): X[summand] '+' Y[addend] { $$ = $summand + $addend } + ; +``` + +https://github.com/ruby/lrama/pull/410 + +### Named References for actions of RHS in parameterizing rules caller side + +Allow to use named references for actions of RHS in parameterizing rules caller side. + +```yacc +opt_nl: '\n'?[nl] { $$ = $nl; } + ; +``` + +https://github.com/ruby/lrama/pull/414 + +### Widen the definable position of parameterizing rules + +Allow to define parameterizing rules in the middle of the grammar. + +```yacc +%rule defined_option(X): /* empty */ + | X + ; + +%% + +program : defined_option(number) + | defined_list(number) + ; + +%rule defined_list(X): /* empty */ /* <--- here */ + | defined_list(X) number + ; +``` + +https://github.com/ruby/lrama/pull/420 + +### Report unused terminal symbols + +Support to report unused terminal symbols. +Run `exe/lrama --report=terms` to show unused terminal symbols. + +```console +$ exe/lrama --report=terms sample/calc.y + 11 Unused Terms + 0 YYerror + 1 YYUNDEF + 2 '\\\\' + 3 '\\13' + 4 keyword_class2 + 5 tNUMBER + 6 tPLUS + 7 tMINUS + 8 tEQ + 9 tEQEQ + 10 '>' +``` + +https://github.com/ruby/lrama/pull/439 + +### Report unused rules + +Support to report unused rules. +Run `exe/lrama --report=rules` to show unused rules. + +```console +$ exe/lrama --report=rules sample/calc.y + 3 Unused Rules + 0 unused_option + 1 unused_list + 2 unused_nonempty_list +``` + +https://github.com/ruby/lrama/pull/441 + +### Ensure compatibility with Bison for `%locations` directive + +Support `%locations` directive to ensure compatibility with Bison. +Change to `%locations` directive not set by default. + +https://github.com/ruby/lrama/pull/446 + +### Diagnostics report for parameterizing rules redefine + +Support to warning redefined parameterizing rules. +Run `exe/lrama -W` or `exe/lrama --warnings` to show redefined parameterizing rules. + +```console +$ exe/lrama -W sample/calc.y +parameterizing rule redefined: redefined_method(X) +parameterizing rule redefined: redefined_method(X) +``` + +https://github.com/ruby/lrama/pull/448 + +### Support `-v` and `--verbose` option + +Support to `-v` and `--verbose` option. +These options align with Bison behavior. So same as '--report=state' option. + +https://github.com/ruby/lrama/pull/457 + +## Lrama 0.6.9 (2024-05-02) + +### Callee side tag specification of parameterizing rules + +Allow to specify tag on callee side of parameterizing rules. + +```yacc +%union { + int i; +} + +%rule with_tag(X) : X { $$ = $1; } + ; +``` + +### Named References for actions of RHS in parameterizing rules + +Allow to use named references for actions of RHS in parameterizing rules. + +```yacc +%rule option(number): /* empty */ + | number { $$ = $number; } + ; +``` + +## Lrama 0.6.8 (2024-04-29) + +### Nested parameterizing rules with tag + +Allow to nested parameterizing rules with tag. + +```yacc +%union { + int i; +} + +%rule nested_nested_option(X): /* empty */ + | X + ; + +%rule nested_option(X): /* empty */ + | nested_nested_option(X) + ; + +%rule option(Y): /* empty */ + | nested_option(Y) + ; +``` + +## Lrama 0.6.7 (2024-04-28) + +### RHS of user defined parameterizing rules contains `'symbol'?`, `'symbol'+` and `'symbol'*`. + +User can use `'symbol'?`, `'symbol'+` and `'symbol'*` in RHS of user defined parameterizing rules. + +``` +%rule with_word_seps(X): /* empty */ + | X ' '+ + ; +``` + +## Lrama 0.6.6 (2024-04-27) + +### Trace actions + +Support trace actions for debugging. +Run `exe/lrama --trace=actions` to show grammar rules with actions. + +```console +$ exe/lrama --trace=actions sample/calc.y +Grammar rules with actions: +$accept -> list, YYEOF {} +list -> ε {} +list -> list, LF {} +list -> list, expr, LF { printf("=> %d\n", $2); } +expr -> NUM {} +expr -> expr, '+', expr { $$ = $1 + $3; } +expr -> expr, '-', expr { $$ = $1 - $3; } +expr -> expr, '*', expr { $$ = $1 * $3; } +expr -> expr, '/', expr { $$ = $1 / $3; } +expr -> '(', expr, ')' { $$ = $2; } +``` + +### Inlining + +Support inlining for rules. +The `%inline` directive causes all references to symbols to be replaced with its definition. + +```yacc +%rule %inline op: PLUS { + } + | TIMES { * } + ; + +%% + +expr : number { $$ = $1; } + | expr op expr { $$ = $1 $2 $3; } + ; +``` + +as same as + +```yacc +expr : number { $$ = $1; } + | expr '+' expr { $$ = $1 + $3; } + | expr '*' expr { $$ = $1 * $3; } + ; +``` + +## Lrama 0.6.5 (2024-03-25) + +### Typed Midrule Actions + +User can specify the type of mid rule action by tag (``) instead of specifying it with in an action. + +```yacc +primary: k_case expr_value terms? + { + $$ = p->case_labels; + p->case_labels = Qnil; + } + case_body + k_end + { + ... + } +``` + +can be written as + +```yacc +primary: k_case expr_value terms? + { + $$ = p->case_labels; + p->case_labels = Qnil; + } + case_body + k_end + { + ... + } +``` + +`%destructor` for midrule action is invoked only when tag is specified by Typed Midrule Actions. + +Difference from Bison's Typed Midrule Actions is that tag is postposed in Lrama however it's preposed in Bison. + +Bison supports this feature from 3.1. + +## Lrama 0.6.4 (2024-03-22) + +### Parameterizing rules (preceded, terminated, delimited) + +Support `preceded`, `terminated` and `delimited` rules. + +```text +program: preceded(opening, X) + +// Expanded to + +program: preceded_opening_X +preceded_opening_X: opening X +``` + +``` +program: terminated(X, closing) + +// Expanded to + +program: terminated_X_closing +terminated_X_closing: X closing +``` + +``` +program: delimited(opening, X, closing) + +// Expanded to + +program: delimited_opening_X_closing +delimited_opening_X_closing: opening X closing +``` + +https://github.com/ruby/lrama/pull/382 + +### Support `%destructor` declaration + +User can set codes for freeing semantic value resources by using `%destructor`. +In general, these resources are freed by actions or after parsing. +However if syntax error happens in parsing, these codes may not be executed. +Codes associated to `%destructor` are executed when semantic value is popped from the stack by an error. + +```yacc +%token NUM +%type expr2 +%type expr + +%destructor { + printf("destructor for val1: %d\n", $$); +} // printer for TAG + +%destructor { + printf("destructor for val2: %d\n", $$); +} + +%destructor { + printf("destructor for expr: %d\n", $$); +} expr // printer for symbol +``` + +Bison supports this feature from 1.75b. + +https://github.com/ruby/lrama/pull/385 + +## Lrama 0.6.3 (2024-02-15) + +### Bring Your Own Stack + +Provide functionalities for Bring Your Own Stack. + +Ruby’s Ripper library requires their own semantic value stack to manage Ruby Objects returned by user defined callback method. Currently Ripper uses semantic value stack (`yyvsa`) which is used by parser to manage Node. This hack introduces some limitation on Ripper. For example, Ripper can not execute semantic analysis depending on Node structure. + +Lrama introduces two features to support another semantic value stack by parser generator users. + +1. Callback entry points + +User can emulate semantic value stack by these callbacks. +Lrama provides these five callbacks. Registered functions are called when each event happen. For example %after-shift function is called when shift happens on original semantic value stack. + +- `%after-shift` function_name +- `%before-reduce` function_name +- `%after-reduce` function_name +- `%after-shift-error-token` function_name +- `%after-pop-stack` function_name + +2. `$:n` variable to access index of each grammar symbols + +User also needs to access semantic value of their stack in grammar action. `$:n` provides the way to access to it. `$:n` is translated to the minus index from the top of the stack. +For example + +```yacc +primary: k_if expr_value then compstmt if_tail k_end + { + /*% ripper: if!($:2, $:4, $:5) %*/ + /* $:2 = -5, $:4 = -3, $:5 = -2. */ + } +``` + +https://github.com/ruby/lrama/pull/367 + +## Lrama 0.6.2 (2024-01-27) + +### %no-stdlib directive + +If `%no-stdlib` directive is set, Lrama doesn't load Lrama standard library for +parameterizing rules, stdlib.y. + +https://github.com/ruby/lrama/pull/344 + +## Lrama 0.6.1 (2024-01-13) + +### Nested parameterizing rules + +Allow to pass an instantiated rule to other parameterizing rules. + +```yacc +%rule constant(X) : X + ; + +%rule option(Y) : /* empty */ + | Y + ; + +%% + +program : option(constant(number)) // Nested rule + ; +%% +``` + +Allow to use nested parameterizing rules when define parameterizing rules. + +```yacc +%rule option(x) : /* empty */ + | X + ; + +%rule double(Y) : Y Y + ; + +%rule double_opt(A) : option(double(A)) // Nested rule + ; + +%% + +program : double_opt(number) + ; + +%% +``` + +https://github.com/ruby/lrama/pull/337 + +## Lrama 0.6.0 (2023-12-25) + +### User defined parameterizing rules + +Allow to define parameterizing rule by `%rule` directive. + +```yacc +%rule pair(X, Y): X Y { $$ = $1 + $2; } + ; + +%% + +program: stmt + ; + +stmt: pair(ODD, EVEN) + | pair(EVEN, ODD) + ; +``` + +https://github.com/ruby/lrama/pull/285 + +## Lrama 0.5.11 (2023-12-02) + +### Type specification of parameterizing rules + +Allow to specify type of rules by specifying tag, `` in below example. +Tag is post-modification style. + +```yacc +%union { + int i; +} + +%% + +program : option(number) + | number_alias? + ; +``` + +https://github.com/ruby/lrama/pull/272 + +## Lrama 0.5.10 (2023-11-18) + +### Parameterizing rules (option, nonempty_list, list) + +Support function call style parameterizing rules for `option`, `nonempty_list` and `list`. + +https://github.com/ruby/lrama/pull/197 + +### Parameterizing rules (separated_list) + +Support `separated_list` and `separated_nonempty_list` parameterizing rules. + +```text +program: separated_list(',', number) + +// Expanded to + +program: separated_list_number +separated_list_number: ε +separated_list_number: separated_nonempty_list_number +separated_nonempty_list_number: number +separated_nonempty_list_number: separated_nonempty_list_number ',' number +``` + +``` +program: separated_nonempty_list(',', number) + +// Expanded to + +program: separated_nonempty_list_number +separated_nonempty_list_number: number +separated_nonempty_list_number: separated_nonempty_list_number ',' number +``` + +https://github.com/ruby/lrama/pull/204 + +## Lrama 0.5.9 (2023-11-05) + +### Parameterizing rules (suffix) + +Parameterizing rules are template of rules. +It's very common pattern to write "list" grammar rule like: + +```yacc +opt_args: /* none */ + | args + ; + +args: arg + | args arg +``` + +Lrama supports these suffixes: + +- `?`: option +- `+`: nonempty list +- `*`: list + +Idea of Parameterizing rules comes from Menhir LR(1) parser generator (https://gallium.inria.fr/~fpottier/menhir/manual.html#sec32). + +https://github.com/ruby/lrama/pull/181 + +## Lrama 0.5.7 (2023-10-23) + +### Racc parser + +Replace Lrama's parser from hand written parser to LR parser generated by Racc. +Lrama uses `--embedded` option to generate LR parser because Racc is changed from default gem to bundled gem by Ruby 3.3 (https://github.com/ruby/lrama/pull/132). + +https://github.com/ruby/lrama/pull/62 + +## Lrama 0.5.4 (2023-08-17) + +### Runtime configuration for error recovery + +Make error recovery function configurable on runtime by two new macros. + +- `YYMAXREPAIR`: Expected to return max length of repair operations. `%parse-param` is passed to this function. +- `YYERROR_RECOVERY_ENABLED`: Expected to return bool value to determine error recovery is enabled or not. `%parse-param` is passed to this function. + +https://github.com/ruby/lrama/pull/74 + +## Lrama 0.5.3 (2023-08-05) + +### Error Recovery + +Support token insert base Error Recovery. +`-e` option is needed to generate parser with error recovery functions. + +https://github.com/ruby/lrama/pull/44 + +## Lrama 0.5.2 (2023-06-14) + +### Named References + +Instead of positional references like `$1` or `$$`, +named references allow to access to symbol by name. + +```yacc +primary: k_class cpath superclass bodystmt k_end + { + $primary = new_class($cpath, $bodystmt, $superclass); + } +``` + +Alias name can be declared. + +```yacc +expr[result]: expr[ex-left] '+' expr[ex.right] + { + $result = $[ex-left] + $[ex.right]; + } +``` + +Bison supports this feature from 2.5. + +### Add parse params to some macros and functions + +`%parse-param` are added to these macros and functions to remove ytab.sed hack from Ruby. + +- `YY_LOCATION_PRINT` +- `YY_SYMBOL_PRINT` +- `yy_stack_print` +- `YY_STACK_PRINT` +- `YY_REDUCE_PRINT` +- `yysyntax_error` + +https://github.com/ruby/lrama/pull/40 + +See also: https://github.com/ruby/ruby/pull/7807 + +## Lrama 0.5.0 (2023-05-17) + +### stdin mode + +When `-` is given as grammar file name, reads the grammar source from STDIN, and takes the next argument as the input file name. This mode helps pre-process a grammar source. + +https://github.com/ruby/lrama/pull/8 + +## Lrama 0.4.0 (2023-05-13) + +This is the first version migrated to Ruby. +This version generates "parse.c" compatible with Bison 3.8.2. diff --git a/tools/lrama/exe/lrama b/tools/lrama/exe/lrama new file mode 100755 index 0000000000..1aece5d141 --- /dev/null +++ b/tools/lrama/exe/lrama @@ -0,0 +1,7 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +$LOAD_PATH << File.join(__dir__, "../lib") +require "lrama" + +Lrama::Command.new.run(ARGV.dup) diff --git a/tools/lrama/lib/lrama.rb b/tools/lrama/lib/lrama.rb new file mode 100644 index 0000000000..fe2e05807c --- /dev/null +++ b/tools/lrama/lib/lrama.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require_relative "lrama/bitmap" +require_relative "lrama/command" +require_relative "lrama/context" +require_relative "lrama/counterexamples" +require_relative "lrama/diagnostics" +require_relative "lrama/digraph" +require_relative "lrama/grammar" +require_relative "lrama/grammar_validator" +require_relative "lrama/lexer" +require_relative "lrama/logger" +require_relative "lrama/option_parser" +require_relative "lrama/options" +require_relative "lrama/output" +require_relative "lrama/parser" +require_relative "lrama/report" +require_relative "lrama/state" +require_relative "lrama/states" +require_relative "lrama/states_reporter" +require_relative "lrama/trace_reporter" +require_relative "lrama/version" diff --git a/tools/lrama/lib/lrama/bitmap.rb b/tools/lrama/lib/lrama/bitmap.rb new file mode 100644 index 0000000000..098c6e0b77 --- /dev/null +++ b/tools/lrama/lib/lrama/bitmap.rb @@ -0,0 +1,34 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + module Bitmap + # @rbs (Array[Integer] ary) -> Integer + def self.from_array(ary) + bit = 0 + + ary.each do |int| + bit |= (1 << int) + end + + bit + end + + # @rbs (Integer int) -> Array[Integer] + def self.to_array(int) + a = [] #: Array[Integer] + i = 0 + + while int > 0 do + if int & 1 == 1 + a << i + end + + i += 1 + int >>= 1 + end + + a + end + end +end diff --git a/tools/lrama/lib/lrama/command.rb b/tools/lrama/lib/lrama/command.rb new file mode 100644 index 0000000000..3ff39d578d --- /dev/null +++ b/tools/lrama/lib/lrama/command.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +module Lrama + class Command + LRAMA_LIB = File.realpath(File.join(File.dirname(__FILE__))) + STDLIB_FILE_PATH = File.join(LRAMA_LIB, 'grammar', 'stdlib.y') + + def run(argv) + begin + options = OptionParser.new.parse(argv) + rescue => e + message = e.message + message = message.gsub(/.+/, "\e[1m\\&\e[m") if Exception.to_tty? + abort message + end + + Report::Duration.enable if options.trace_opts[:time] + + text = options.y.read + options.y.close if options.y != STDIN + begin + grammar = Lrama::Parser.new(text, options.grammar_file, options.debug, options.define).parse + unless grammar.no_stdlib + stdlib_grammar = Lrama::Parser.new(File.read(STDLIB_FILE_PATH), STDLIB_FILE_PATH, options.debug).parse + grammar.insert_before_parameterizing_rules(stdlib_grammar.parameterizing_rules) + end + grammar.prepare + grammar.validate! + rescue => e + raise e if options.debug + message = e.message + message = message.gsub(/.+/, "\e[1m\\&\e[m") if Exception.to_tty? + abort message + end + states = Lrama::States.new(grammar, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure])) + states.compute + states.compute_ielr if grammar.ielr_defined? + context = Lrama::Context.new(states) + + if options.report_file + reporter = Lrama::StatesReporter.new(states) + File.open(options.report_file, "w+") do |f| + reporter.report(f, **options.report_opts) + end + end + + reporter = Lrama::TraceReporter.new(grammar) + reporter.report(**options.trace_opts) + + File.open(options.outfile, "w+") do |f| + Lrama::Output.new( + out: f, + output_file_path: options.outfile, + template_name: options.skeleton, + grammar_file_path: options.grammar_file, + header_file_path: options.header_file, + context: context, + grammar: grammar, + error_recovery: options.error_recovery, + ).render + end + + logger = Lrama::Logger.new + exit false unless Lrama::GrammarValidator.new(grammar, states, logger).valid? + Lrama::Diagnostics.new(grammar, states, logger).run(options.diagnostic) + end + end +end diff --git a/tools/lrama/lib/lrama/context.rb b/tools/lrama/lib/lrama/context.rb new file mode 100644 index 0000000000..9f406f8de0 --- /dev/null +++ b/tools/lrama/lib/lrama/context.rb @@ -0,0 +1,499 @@ +# frozen_string_literal: true + +require_relative "report/duration" + +module Lrama + # This is passed to a template + class Context + include Report::Duration + + ErrorActionNumber = -Float::INFINITY + BaseMin = -Float::INFINITY + + # TODO: It might be better to pass `states` to Output directly? + attr_reader :states, :yylast, :yypact_ninf, :yytable_ninf, :yydefact, :yydefgoto + + def initialize(states) + @states = states + @yydefact = nil + @yydefgoto = nil + # Array of array + @_actions = [] + + compute_tables + end + + # enum yytokentype + def yytokentype + @states.terms.reject do |term| + 0 < term.token_id && term.token_id < 128 + end.map do |term| + [term.id.s_value, term.token_id, term.display_name] + end.unshift(["YYEMPTY", -2, nil]) + end + + # enum yysymbol_kind_t + def yysymbol_kind_t + @states.symbols.map do |sym| + [sym.enum_name, sym.number, sym.comment] + end.unshift(["YYSYMBOL_YYEMPTY", -2, nil]) + end + + # State number of final (accepted) state + def yyfinal + @states.states.find do |state| + state.items.find do |item| + item.lhs.accept_symbol? && item.end_of_rule? + end + end.id + end + + # Number of terms + def yyntokens + @states.terms.count + end + + # Number of nterms + def yynnts + @states.nterms.count + end + + # Number of rules + def yynrules + @states.rules.count + end + + # Number of states + def yynstates + @states.states.count + end + + # Last token number + def yymaxutok + @states.terms.map(&:token_id).max + end + + # YYTRANSLATE + # + # yytranslate is a mapping from token id to symbol number + def yytranslate + # 2 is YYSYMBOL_YYUNDEF + a = Array.new(yymaxutok, 2) + + @states.terms.each do |term| + a[term.token_id] = term.number + end + + return a + end + + def yytranslate_inverted + a = Array.new(@states.symbols.count, @states.undef_symbol.token_id) + + @states.terms.each do |term| + a[term.number] = term.token_id + end + + return a + end + + # Mapping from rule number to line number of the rule is defined. + # Dummy rule is appended as the first element whose value is 0 + # because 0 means error in yydefact. + def yyrline + a = [0] + + @states.rules.each do |rule| + a << rule.lineno + end + + return a + end + + # Mapping from symbol number to its name + def yytname + @states.symbols.sort_by(&:number).map do |sym| + sym.display_name + end + end + + def yypact + @base[0...yynstates] + end + + def yypgoto + @base[yynstates..-1] + end + + def yytable + @table + end + + def yycheck + @check + end + + def yystos + @states.states.map do |state| + state.accessing_symbol.number + end + end + + # Mapping from rule number to symbol number of LHS. + # Dummy rule is appended as the first element whose value is 0 + # because 0 means error in yydefact. + def yyr1 + a = [0] + + @states.rules.each do |rule| + a << rule.lhs.number + end + + return a + end + + # Mapping from rule number to length of RHS. + # Dummy rule is appended as the first element whose value is 0 + # because 0 means error in yydefact. + def yyr2 + a = [0] + + @states.rules.each do |rule| + a << rule.rhs.count + end + + return a + end + + private + + # Compute these + # + # See also: "src/tables.c" of Bison. + # + # * yydefact + # * yydefgoto + # * yypact and yypgoto + # * yytable + # * yycheck + # * yypact_ninf + # * yytable_ninf + def compute_tables + report_duration(:compute_yydefact) { compute_yydefact } + report_duration(:compute_yydefgoto) { compute_yydefgoto } + report_duration(:sort_actions) { sort_actions } + # debug_sorted_actions + report_duration(:compute_packed_table) { compute_packed_table } + end + + def vectors_count + @states.states.count + @states.nterms.count + end + + # In compressed table, rule 0 is appended as an error case + # and reduce is represented as minus number. + def rule_id_to_action_number(rule_id) + (rule_id + 1) * -1 + end + + # Symbol number is assigned to term first then nterm. + # This method calculates sequence_number for nterm. + def nterm_number_to_sequence_number(nterm_number) + nterm_number - @states.terms.count + end + + # Vector is states + nterms + def nterm_number_to_vector_number(nterm_number) + @states.states.count + (nterm_number - @states.terms.count) + end + + def compute_yydefact + # Default action (shift/reduce/error) for each state. + # Index is state id, value is `rule id + 1` of a default reduction. + @yydefact = Array.new(@states.states.count, 0) + + @states.states.each do |state| + # Action number means + # + # * number = 0, default action + # * number = -Float::INFINITY, error by %nonassoc + # * number > 0, shift then move to state "number" + # * number < 0, reduce by "-number" rule. Rule "number" is already added by 1. + actions = Array.new(@states.terms.count, 0) + + if state.reduces.map(&:selected_look_ahead).any? {|la| !la.empty? } + # Iterate reduces with reverse order so that first rule is used. + state.reduces.reverse_each do |reduce| + reduce.look_ahead.each do |term| + actions[term.number] = rule_id_to_action_number(reduce.rule.id) + end + end + end + + # Shift is selected when S/R conflict exists. + state.selected_term_transitions.each do |shift, next_state| + actions[shift.next_sym.number] = next_state.id + end + + state.resolved_conflicts.select do |conflict| + conflict.which == :error + end.each do |conflict| + actions[conflict.symbol.number] = ErrorActionNumber + end + + # If default_reduction_rule, replace default_reduction_rule in + # actions with zero. + if state.default_reduction_rule + actions.map! do |e| + if e == rule_id_to_action_number(state.default_reduction_rule.id) + 0 + else + e + end + end + end + + # If no default_reduction_rule, default behavior is an + # error then replace ErrorActionNumber with zero. + unless state.default_reduction_rule + actions.map! do |e| + if e == ErrorActionNumber + 0 + else + e + end + end + end + + s = actions.each_with_index.map do |n, i| + [i, n] + end.reject do |i, n| + # Remove default_reduction_rule entries + n == 0 + end + + if s.count != 0 + # Entry of @_actions is an array of + # + # * State id + # * Array of tuple, [from, to] where from is term number and to is action. + # * The number of "Array of tuple" used by sort_actions + # * "width" used by sort_actions + @_actions << [state.id, s, s.count, s.last[0] - s.first[0] + 1] + end + + @yydefact[state.id] = state.default_reduction_rule ? state.default_reduction_rule.id + 1 : 0 + end + end + + def compute_yydefgoto + # Default GOTO (nterm transition) for each nterm. + # Index is sequence number of nterm, value is state id + # of a default nterm transition destination. + @yydefgoto = Array.new(@states.nterms.count, 0) + # Mapping from nterm to next_states + nterm_to_next_states = {} + + @states.states.each do |state| + state.nterm_transitions.each do |shift, next_state| + key = shift.next_sym + nterm_to_next_states[key] ||= [] + nterm_to_next_states[key] << [state, next_state] # [from_state, to_state] + end + end + + @states.nterms.each do |nterm| + if (states = nterm_to_next_states[nterm]) + default_state = states.map(&:last).group_by {|s| s }.max_by {|_, v| v.count }.first + default_goto = default_state.id + not_default_gotos = [] + states.each do |from_state, to_state| + next if to_state.id == default_goto + not_default_gotos << [from_state.id, to_state.id] + end + else + default_goto = 0 + not_default_gotos = [] + end + + k = nterm_number_to_sequence_number(nterm.number) + @yydefgoto[k] = default_goto + + if not_default_gotos.count != 0 + v = nterm_number_to_vector_number(nterm.number) + + # Entry of @_actions is an array of + # + # * Nterm number as vector number + # * Array of tuple, [from, to] where from is state number and to is state number. + # * The number of "Array of tuple" used by sort_actions + # * "width" used by sort_actions + @_actions << [v, not_default_gotos, not_default_gotos.count, not_default_gotos.last[0] - not_default_gotos.first[0] + 1] + end + end + end + + def sort_actions + # This is not same with #sort_actions + # + # @sorted_actions = @_actions.sort_by do |_, _, count, width| + # [-width, -count] + # end + + @sorted_actions = [] + + @_actions.each do |action| + if @sorted_actions.empty? + @sorted_actions << action + next + end + + j = @sorted_actions.count - 1 + _state_id, _froms_and_tos, count, width = action + + while (j >= 0) do + case + when @sorted_actions[j][3] < width + j -= 1 + when @sorted_actions[j][3] == width && @sorted_actions[j][2] < count + j -= 1 + else + break + end + end + + @sorted_actions.insert(j + 1, action) + end + end + + def debug_sorted_actions + ary = Array.new + @sorted_actions.each do |state_id, froms_and_tos, count, width| + ary[state_id] = [state_id, froms_and_tos, count, width] + end + + print sprintf("table_print:\n\n") + + print sprintf("order [\n") + vectors_count.times do |i| + print sprintf("%d, ", @sorted_actions[i] ? @sorted_actions[i][0] : 0) + print "\n" if i % 10 == 9 + end + print sprintf("]\n\n") + + print sprintf("width [\n") + vectors_count.times do |i| + print sprintf("%d, ", ary[i] ? ary[i][3] : 0) + print "\n" if i % 10 == 9 + end + print sprintf("]\n\n") + + print sprintf("tally [\n") + vectors_count.times do |i| + print sprintf("%d, ", ary[i] ? ary[i][2] : 0) + print "\n" if i % 10 == 9 + end + print sprintf("]\n\n") + end + + def compute_packed_table + # yypact and yypgoto + @base = Array.new(vectors_count, BaseMin) + # yytable + @table = [] + # yycheck + @check = [] + # Key is froms_and_tos, value is index position + pushed = {} + used_res = {} + lowzero = 0 + high = 0 + + @sorted_actions.each do |state_id, froms_and_tos, _, _| + if (res = pushed[froms_and_tos]) + @base[state_id] = res + next + end + + res = lowzero - froms_and_tos.first[0] + + while true do + ok = true + + froms_and_tos.each do |from, to| + loc = res + from + + if @table[loc] + # If the cell of table is set, can not use the cell. + ok = false + break + end + end + + if ok && used_res[res] + ok = false + end + + if ok + break + else + res += 1 + end + end + + loc = 0 + + froms_and_tos.each do |from, to| + loc = res + from + + @table[loc] = to + @check[loc] = from + end + + while (@table[lowzero]) do + lowzero += 1 + end + + high = loc if high < loc + + @base[state_id] = res + pushed[froms_and_tos] = res + used_res[res] = true + end + + @yylast = high + + # replace_ninf + @yypact_ninf = (@base.reject {|i| i == BaseMin } + [0]).min - 1 + @base.map! do |i| + case i + when BaseMin + @yypact_ninf + else + i + end + end + + @yytable_ninf = (@table.compact.reject {|i| i == ErrorActionNumber } + [0]).min - 1 + @table.map! do |i| + case i + when nil + 0 + when ErrorActionNumber + @yytable_ninf + else + i + end + end + + @check.map! do |i| + case i + when nil + -1 + else + i + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/counterexamples.rb b/tools/lrama/lib/lrama/counterexamples.rb new file mode 100644 index 0000000000..ee2b5d5959 --- /dev/null +++ b/tools/lrama/lib/lrama/counterexamples.rb @@ -0,0 +1,298 @@ +# frozen_string_literal: true + +require "set" + +require_relative "counterexamples/derivation" +require_relative "counterexamples/example" +require_relative "counterexamples/path" +require_relative "counterexamples/production_path" +require_relative "counterexamples/start_path" +require_relative "counterexamples/state_item" +require_relative "counterexamples/transition_path" +require_relative "counterexamples/triple" + +module Lrama + # See: https://www.cs.cornell.edu/andru/papers/cupex/cupex.pdf + # 4. Constructing Nonunifying Counterexamples + class Counterexamples + attr_reader :transitions, :productions + + def initialize(states) + @states = states + setup_transitions + setup_productions + end + + def to_s + "#" + end + alias :inspect :to_s + + def compute(conflict_state) + conflict_state.conflicts.flat_map do |conflict| + case conflict.type + when :shift_reduce + # @type var conflict: State::ShiftReduceConflict + shift_reduce_example(conflict_state, conflict) + when :reduce_reduce + # @type var conflict: State::ReduceReduceConflict + reduce_reduce_examples(conflict_state, conflict) + end + end.compact + end + + private + + def setup_transitions + # Hash [StateItem, Symbol] => StateItem + @transitions = {} + # Hash [StateItem, Symbol] => Set(StateItem) + @reverse_transitions = {} + + @states.states.each do |src_state| + trans = {} #: Hash[Grammar::Symbol, State] + + src_state.transitions.each do |shift, next_state| + trans[shift.next_sym] = next_state + end + + src_state.items.each do |src_item| + next if src_item.end_of_rule? + sym = src_item.next_sym + dest_state = trans[sym] + + dest_state.kernels.each do |dest_item| + next unless (src_item.rule == dest_item.rule) && (src_item.position + 1 == dest_item.position) + src_state_item = StateItem.new(src_state, src_item) + dest_state_item = StateItem.new(dest_state, dest_item) + + @transitions[[src_state_item, sym]] = dest_state_item + + # @type var key: [StateItem, Grammar::Symbol] + key = [dest_state_item, sym] + @reverse_transitions[key] ||= Set.new + @reverse_transitions[key] << src_state_item + end + end + end + end + + def setup_productions + # Hash [StateItem] => Set(Item) + @productions = {} + # Hash [State, Symbol] => Set(Item). Symbol is nterm + @reverse_productions = {} + + @states.states.each do |state| + # LHS => Set(Item) + h = {} #: Hash[Grammar::Symbol, Set[States::Item]] + + state.closure.each do |item| + sym = item.lhs + + h[sym] ||= Set.new + h[sym] << item + end + + state.items.each do |item| + next if item.end_of_rule? + next if item.next_sym.term? + + sym = item.next_sym + state_item = StateItem.new(state, item) + # @type var key: [State, Grammar::Symbol] + key = [state, sym] + + @productions[state_item] = h[sym] + + @reverse_productions[key] ||= Set.new + @reverse_productions[key] << item + end + end + end + + def shift_reduce_example(conflict_state, conflict) + conflict_symbol = conflict.symbols.first + # @type var shift_conflict_item: ::Lrama::States::Item + shift_conflict_item = conflict_state.items.find { |item| item.next_sym == conflict_symbol } + path2 = shortest_path(conflict_state, conflict.reduce.item, conflict_symbol) + path1 = find_shift_conflict_shortest_path(path2, conflict_state, shift_conflict_item) + + Example.new(path1, path2, conflict, conflict_symbol, self) + end + + def reduce_reduce_examples(conflict_state, conflict) + conflict_symbol = conflict.symbols.first + path1 = shortest_path(conflict_state, conflict.reduce1.item, conflict_symbol) + path2 = shortest_path(conflict_state, conflict.reduce2.item, conflict_symbol) + + Example.new(path1, path2, conflict, conflict_symbol, self) + end + + def find_shift_conflict_shortest_path(reduce_path, conflict_state, conflict_item) + state_items = find_shift_conflict_shortest_state_items(reduce_path, conflict_state, conflict_item) + build_paths_from_state_items(state_items) + end + + def find_shift_conflict_shortest_state_items(reduce_path, conflict_state, conflict_item) + target_state_item = StateItem.new(conflict_state, conflict_item) + result = [target_state_item] + reversed_reduce_path = reduce_path.to_a.reverse + # Index for state_item + i = 0 + + while (path = reversed_reduce_path[i]) + # Index for prev_state_item + j = i + 1 + _j = j + + while (prev_path = reversed_reduce_path[j]) + if prev_path.production? + j += 1 + else + break + end + end + + state_item = path.to + prev_state_item = prev_path&.to + + if target_state_item == state_item || target_state_item.item.start_item? + result.concat( + reversed_reduce_path[_j..-1] #: Array[StartPath|TransitionPath|ProductionPath] + .map(&:to)) + break + end + + if target_state_item.item.beginning_of_rule? + queue = [] #: Array[Array[StateItem]] + queue << [target_state_item] + + # Find reverse production + while (sis = queue.shift) + si = sis.last + + # Reach to start state + if si.item.start_item? + sis.shift + result.concat(sis) + target_state_item = si + break + end + + if si.item.beginning_of_rule? + # @type var key: [State, Grammar::Symbol] + key = [si.state, si.item.lhs] + @reverse_productions[key].each do |item| + state_item = StateItem.new(si.state, item) + queue << (sis + [state_item]) + end + else + # @type var key: [StateItem, Grammar::Symbol] + key = [si, si.item.previous_sym] + @reverse_transitions[key].each do |prev_target_state_item| + next if prev_target_state_item.state != prev_state_item&.state + sis.shift + result.concat(sis) + result << prev_target_state_item + target_state_item = prev_target_state_item + i = j + queue.clear + break + end + end + end + else + # Find reverse transition + # @type var key: [StateItem, Grammar::Symbol] + key = [target_state_item, target_state_item.item.previous_sym] + @reverse_transitions[key].each do |prev_target_state_item| + next if prev_target_state_item.state != prev_state_item&.state + result << prev_target_state_item + target_state_item = prev_target_state_item + i = j + break + end + end + end + + result.reverse + end + + def build_paths_from_state_items(state_items) + state_items.zip([nil] + state_items).map do |si, prev_si| + case + when prev_si.nil? + StartPath.new(si) + when si.item.beginning_of_rule? + ProductionPath.new(prev_si, si) + else + TransitionPath.new(prev_si, si) + end + end + end + + def shortest_path(conflict_state, conflict_reduce_item, conflict_term) + # queue: is an array of [Triple, [Path]] + queue = [] #: Array[[Triple, Array[StartPath|TransitionPath|ProductionPath]]] + visited = {} #: Hash[Triple, true] + start_state = @states.states.first #: Lrama::State + raise "BUG: Start state should be just one kernel." if start_state.kernels.count != 1 + + start = Triple.new(start_state, start_state.kernels.first, Set.new([@states.eof_symbol])) + + queue << [start, [StartPath.new(start.state_item)]] + + while true + triple, paths = queue.shift + + next if visited[triple] + visited[triple] = true + + # Found + if triple.state == conflict_state && triple.item == conflict_reduce_item && triple.l.include?(conflict_term) + return paths + end + + # transition + triple.state.transitions.each do |shift, next_state| + next unless triple.item.next_sym && triple.item.next_sym == shift.next_sym + next_state.kernels.each do |kernel| + next if kernel.rule != triple.item.rule + t = Triple.new(next_state, kernel, triple.l) + queue << [t, paths + [TransitionPath.new(triple.state_item, t.state_item)]] + end + end + + # production step + triple.state.closure.each do |item| + next unless triple.item.next_sym && triple.item.next_sym == item.lhs + l = follow_l(triple.item, triple.l) + t = Triple.new(triple.state, item, l) + queue << [t, paths + [ProductionPath.new(triple.state_item, t.state_item)]] + end + + break if queue.empty? + end + + return nil + end + + def follow_l(item, current_l) + # 1. follow_L (A -> X1 ... Xn-1 • Xn) = L + # 2. follow_L (A -> X1 ... Xk • Xk+1 Xk+2 ... Xn) = {Xk+2} if Xk+2 is a terminal + # 3. follow_L (A -> X1 ... Xk • Xk+1 Xk+2 ... Xn) = FIRST(Xk+2) if Xk+2 is a nonnullable nonterminal + # 4. follow_L (A -> X1 ... Xk • Xk+1 Xk+2 ... Xn) = FIRST(Xk+2) + follow_L (A -> X1 ... Xk+1 • Xk+2 ... Xn) if Xk+2 is a nullable nonterminal + case + when item.number_of_rest_symbols == 1 + current_l + when item.next_next_sym.term? + Set.new([item.next_next_sym]) + when !item.next_next_sym.nullable + item.next_next_sym.first_set + else + item.next_next_sym.first_set + follow_l(item.new_by_next_position, current_l) + end + end + end +end diff --git a/tools/lrama/lib/lrama/counterexamples/derivation.rb b/tools/lrama/lib/lrama/counterexamples/derivation.rb new file mode 100644 index 0000000000..368d7f1032 --- /dev/null +++ b/tools/lrama/lib/lrama/counterexamples/derivation.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +module Lrama + class Counterexamples + class Derivation + attr_reader :item, :left, :right + attr_writer :right + + def initialize(item, left, right = nil) + @item = item + @left = left + @right = right + end + + def to_s + "#" + end + alias :inspect :to_s + + def render_strings_for_report + result = [] #: Array[String] + _render_for_report(self, 0, result, 0) + result.map(&:rstrip) + end + + def render_for_report + render_strings_for_report.join("\n") + end + + private + + def _render_for_report(derivation, offset, strings, index) + item = derivation.item + if strings[index] + strings[index] << " " * (offset - strings[index].length) + else + strings[index] = " " * offset + end + str = strings[index] + str << "#{item.rule_id}: #{item.symbols_before_dot.map(&:display_name).join(" ")} " + + if derivation.left + len = str.length + str << "#{item.next_sym.display_name}" + length = _render_for_report(derivation.left, len, strings, index + 1) + # I want String#ljust! + str << " " * (length - str.length) if length > str.length + else + str << " • #{item.symbols_after_dot.map(&:display_name).join(" ")} " + return str.length + end + + if derivation.right&.left + left = derivation.right&.left #: Derivation + length = _render_for_report(left, str.length, strings, index + 1) + str << "#{item.symbols_after_dot[1..-1].map(&:display_name).join(" ")} " # steep:ignore + str << " " * (length - str.length) if length > str.length + elsif item.next_next_sym + str << "#{item.symbols_after_dot[1..-1].map(&:display_name).join(" ")} " # steep:ignore + end + + return str.length + end + end + end +end diff --git a/tools/lrama/lib/lrama/counterexamples/example.rb b/tools/lrama/lib/lrama/counterexamples/example.rb new file mode 100644 index 0000000000..bb08428fcd --- /dev/null +++ b/tools/lrama/lib/lrama/counterexamples/example.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +module Lrama + class Counterexamples + class Example + attr_reader :path1, :path2, :conflict, :conflict_symbol + + # path1 is shift conflict when S/R conflict + # path2 is always reduce conflict + def initialize(path1, path2, conflict, conflict_symbol, counterexamples) + @path1 = path1 + @path2 = path2 + @conflict = conflict + @conflict_symbol = conflict_symbol + @counterexamples = counterexamples + end + + def type + @conflict.type + end + + def path1_item + @path1.last.to.item + end + + def path2_item + @path2.last.to.item + end + + def derivations1 + @derivations1 ||= _derivations(path1) + end + + def derivations2 + @derivations2 ||= _derivations(path2) + end + + private + + def _derivations(paths) + derivation = nil #: Derivation + current = :production + last_path = paths.last #: Path + lookahead_sym = last_path.to.item.end_of_rule? ? @conflict_symbol : nil + + paths.reverse_each do |path| + item = path.to.item + + case current + when :production + case path + when StartPath + derivation = Derivation.new(item, derivation) + current = :start + when TransitionPath + derivation = Derivation.new(item, derivation) + current = :transition + when ProductionPath + derivation = Derivation.new(item, derivation) + current = :production + else + raise "Unexpected. #{path}" + end + + if lookahead_sym && item.next_next_sym && item.next_next_sym.first_set.include?(lookahead_sym) + state_item = @counterexamples.transitions[[path.to, item.next_sym]] + derivation2 = find_derivation_for_symbol(state_item, lookahead_sym) + derivation.right = derivation2 # steep:ignore + lookahead_sym = nil + end + + when :transition + case path + when StartPath + derivation = Derivation.new(item, derivation) + current = :start + when TransitionPath + # ignore + current = :transition + when ProductionPath + # ignore + current = :production + end + else + raise "BUG: Unknown #{current}" + end + + break if current == :start + end + + derivation + end + + def find_derivation_for_symbol(state_item, sym) + queue = [] #: Array[Array[StateItem]] + queue << [state_item] + + while (sis = queue.shift) + si = sis.last + next_sym = si.item.next_sym + + if next_sym == sym + derivation = nil + + sis.reverse_each do |si| + derivation = Derivation.new(si.item, derivation) + end + + return derivation + end + + if next_sym.nterm? && next_sym.first_set.include?(sym) + @counterexamples.productions[si].each do |next_item| + next if next_item.empty_rule? + next_si = StateItem.new(si.state, next_item) + next if sis.include?(next_si) + queue << (sis + [next_si]) + end + + if next_sym.nullable + next_si = @counterexamples.transitions[[si, next_sym]] + queue << (sis + [next_si]) + end + end + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/counterexamples/path.rb b/tools/lrama/lib/lrama/counterexamples/path.rb new file mode 100644 index 0000000000..0a5823dd21 --- /dev/null +++ b/tools/lrama/lib/lrama/counterexamples/path.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +module Lrama + class Counterexamples + class Path + def initialize(from_state_item, to_state_item) + @from_state_item = from_state_item + @to_state_item = to_state_item + end + + def from + @from_state_item + end + + def to + @to_state_item + end + + def to_s + "#" + end + alias :inspect :to_s + + def type + raise NotImplementedError + end + end + end +end diff --git a/tools/lrama/lib/lrama/counterexamples/production_path.rb b/tools/lrama/lib/lrama/counterexamples/production_path.rb new file mode 100644 index 0000000000..0a230c7fce --- /dev/null +++ b/tools/lrama/lib/lrama/counterexamples/production_path.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module Lrama + class Counterexamples + class ProductionPath < Path + def type + :production + end + + def transition? + false + end + + def production? + true + end + end + end +end diff --git a/tools/lrama/lib/lrama/counterexamples/start_path.rb b/tools/lrama/lib/lrama/counterexamples/start_path.rb new file mode 100644 index 0000000000..c0351c8248 --- /dev/null +++ b/tools/lrama/lib/lrama/counterexamples/start_path.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module Lrama + class Counterexamples + class StartPath < Path + def initialize(to_state_item) + super nil, to_state_item + end + + def type + :start + end + + def transition? + false + end + + def production? + false + end + end + end +end diff --git a/tools/lrama/lib/lrama/counterexamples/state_item.rb b/tools/lrama/lib/lrama/counterexamples/state_item.rb new file mode 100644 index 0000000000..c919818324 --- /dev/null +++ b/tools/lrama/lib/lrama/counterexamples/state_item.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module Lrama + class Counterexamples + class StateItem < Struct.new(:state, :item) + end + end +end diff --git a/tools/lrama/lib/lrama/counterexamples/transition_path.rb b/tools/lrama/lib/lrama/counterexamples/transition_path.rb new file mode 100644 index 0000000000..47bfbc4f98 --- /dev/null +++ b/tools/lrama/lib/lrama/counterexamples/transition_path.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module Lrama + class Counterexamples + class TransitionPath < Path + def type + :transition + end + + def transition? + true + end + + def production? + false + end + end + end +end diff --git a/tools/lrama/lib/lrama/counterexamples/triple.rb b/tools/lrama/lib/lrama/counterexamples/triple.rb new file mode 100644 index 0000000000..64014ee223 --- /dev/null +++ b/tools/lrama/lib/lrama/counterexamples/triple.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module Lrama + class Counterexamples + # s: state + # itm: item within s + # l: precise lookahead set + class Triple < Struct.new(:s, :itm, :l) + alias :state :s + alias :item :itm + alias :precise_lookahead_set :l + + def state_item + StateItem.new(state, item) + end + + def inspect + "#{state.inspect}. #{item.display_name}. #{l.map(&:id).map(&:s_value)}" + end + alias :to_s :inspect + end + end +end diff --git a/tools/lrama/lib/lrama/diagnostics.rb b/tools/lrama/lib/lrama/diagnostics.rb new file mode 100644 index 0000000000..e9da398c89 --- /dev/null +++ b/tools/lrama/lib/lrama/diagnostics.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module Lrama + class Diagnostics + def initialize(grammar, states, logger) + @grammar = grammar + @states = states + @logger = logger + end + + def run(diagnostic) + if diagnostic + diagnose_conflict + diagnose_parameterizing_redefined + end + end + + private + + def diagnose_conflict + if @states.sr_conflicts_count != 0 + @logger.warn("shift/reduce conflicts: #{@states.sr_conflicts_count} found") + end + + if @states.rr_conflicts_count != 0 + @logger.warn("reduce/reduce conflicts: #{@states.rr_conflicts_count} found") + end + end + + def diagnose_parameterizing_redefined + @grammar.parameterizing_rule_resolver.redefined_rules.each do |rule| + @logger.warn("parameterizing rule redefined: #{rule}") + end + end + end +end diff --git a/tools/lrama/lib/lrama/digraph.rb b/tools/lrama/lib/lrama/digraph.rb new file mode 100644 index 0000000000..2161f30474 --- /dev/null +++ b/tools/lrama/lib/lrama/digraph.rb @@ -0,0 +1,83 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + # Algorithm Digraph of https://dl.acm.org/doi/pdf/10.1145/69622.357187 (P. 625) + # + # @rbs generic X < Object -- Type of a member of `sets` + # @rbs generic Y < _Or -- Type of sets assigned to a member of `sets` + class Digraph + # TODO: rbs-inline 0.10.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # + # @rbs! + # interface _Or + # def |: (self) -> self + # end + # @sets: Array[X] + # @relation: Hash[X, Array[X]] + # @base_function: Hash[X, Y] + # @stack: Array[X] + # @h: Hash[X, (Integer|Float)?] + # @result: Hash[X, Y] + + # @rbs sets: Array[X] + # @rbs relation: Hash[X, Array[X]] + # @rbs base_function: Hash[X, Y] + # @rbs return: void + def initialize(sets, relation, base_function) + + # X in the paper + @sets = sets + + # R in the paper + @relation = relation + + # F' in the paper + @base_function = base_function + + # S in the paper + @stack = [] + + # N in the paper + @h = Hash.new(0) + + # F in the paper + @result = {} + end + + # @rbs () -> Hash[X, Y] + def compute + @sets.each do |x| + next if @h[x] != 0 + traverse(x) + end + + return @result + end + + private + + # @rbs (X x) -> void + def traverse(x) + @stack.push(x) + d = @stack.count + @h[x] = d + @result[x] = @base_function[x] # F x = F' x + + @relation[x]&.each do |y| + traverse(y) if @h[y] == 0 + @h[x] = [@h[x], @h[y]].min + @result[x] |= @result[y] # F x = F x + F y + end + + if @h[x] == d + while (z = @stack.pop) do + @h[z] = Float::INFINITY + break if z == x + @result[z] = @result[x] # F (Top of S) = F x + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar.rb b/tools/lrama/lib/lrama/grammar.rb new file mode 100644 index 0000000000..214ca1a3f2 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar.rb @@ -0,0 +1,407 @@ +# frozen_string_literal: true + +require "forwardable" +require_relative "grammar/auxiliary" +require_relative "grammar/binding" +require_relative "grammar/code" +require_relative "grammar/counter" +require_relative "grammar/destructor" +require_relative "grammar/error_token" +require_relative "grammar/parameterizing_rule" +require_relative "grammar/percent_code" +require_relative "grammar/precedence" +require_relative "grammar/printer" +require_relative "grammar/reference" +require_relative "grammar/rule" +require_relative "grammar/rule_builder" +require_relative "grammar/symbol" +require_relative "grammar/symbols" +require_relative "grammar/type" +require_relative "grammar/union" +require_relative "lexer" + +module Lrama + # Grammar is the result of parsing an input grammar file + class Grammar + extend Forwardable + + attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux, :parameterizing_rule_resolver + attr_accessor :union, :expect, :printers, :error_tokens, :lex_param, :parse_param, :initial_action, + :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack, + :symbols_resolver, :types, :rules, :rule_builders, :sym_to_rules, :no_stdlib, :locations, :define + + def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, :find_term_by_s_value, + :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol, + :find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type, + :fill_printer, :fill_destructor, :fill_error_token, :sort_by_number! + + def initialize(rule_counter, define = {}) + @rule_counter = rule_counter + + # Code defined by "%code" + @percent_codes = [] + @printers = [] + @destructors = [] + @error_tokens = [] + @symbols_resolver = Grammar::Symbols::Resolver.new + @types = [] + @rule_builders = [] + @rules = [] + @sym_to_rules = {} + @parameterizing_rule_resolver = ParameterizingRule::Resolver.new + @empty_symbol = nil + @eof_symbol = nil + @error_symbol = nil + @undef_symbol = nil + @accept_symbol = nil + @aux = Auxiliary.new + @no_stdlib = false + @locations = false + @define = define.map {|d| d.split('=') }.to_h + + append_special_symbols + end + + def create_rule_builder(rule_counter, midrule_action_counter) + RuleBuilder.new(rule_counter, midrule_action_counter, @parameterizing_rule_resolver) + end + + def add_percent_code(id:, code:) + @percent_codes << PercentCode.new(id.s_value, code.s_value) + end + + def add_destructor(ident_or_tags:, token_code:, lineno:) + @destructors << Destructor.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno) + end + + def add_printer(ident_or_tags:, token_code:, lineno:) + @printers << Printer.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno) + end + + def add_error_token(ident_or_tags:, token_code:, lineno:) + @error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno) + end + + def add_type(id:, tag:) + @types << Type.new(id: id, tag: tag) + end + + def add_nonassoc(sym, precedence) + set_precedence(sym, Precedence.new(type: :nonassoc, precedence: precedence)) + end + + def add_left(sym, precedence) + set_precedence(sym, Precedence.new(type: :left, precedence: precedence)) + end + + def add_right(sym, precedence) + set_precedence(sym, Precedence.new(type: :right, precedence: precedence)) + end + + def add_precedence(sym, precedence) + set_precedence(sym, Precedence.new(type: :precedence, precedence: precedence)) + end + + def set_precedence(sym, precedence) + raise "" if sym.nterm? + sym.precedence = precedence + end + + def set_union(code, lineno) + @union = Union.new(code: code, lineno: lineno) + end + + def add_rule_builder(builder) + @rule_builders << builder + end + + def add_parameterizing_rule(rule) + @parameterizing_rule_resolver.add_parameterizing_rule(rule) + end + + def parameterizing_rules + @parameterizing_rule_resolver.rules + end + + def insert_before_parameterizing_rules(rules) + @parameterizing_rule_resolver.rules = rules + @parameterizing_rule_resolver.rules + end + + def prologue_first_lineno=(prologue_first_lineno) + @aux.prologue_first_lineno = prologue_first_lineno + end + + def prologue=(prologue) + @aux.prologue = prologue + end + + def epilogue_first_lineno=(epilogue_first_lineno) + @aux.epilogue_first_lineno = epilogue_first_lineno + end + + def epilogue=(epilogue) + @aux.epilogue = epilogue + end + + def prepare + resolve_inline_rules + normalize_rules + collect_symbols + set_lhs_and_rhs + fill_default_precedence + fill_symbols + fill_sym_to_rules + compute_nullable + compute_first_set + set_locations + end + + # TODO: More validation methods + # + # * Validation for no_declared_type_reference + def validate! + @symbols_resolver.validate! + validate_rule_lhs_is_nterm! + end + + def find_rules_by_symbol!(sym) + find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found") + end + + def find_rules_by_symbol(sym) + @sym_to_rules[sym.number] + end + + def ielr_defined? + @define.key?('lr.type') && @define['lr.type'] == 'ielr' + end + + private + + def compute_nullable + @rules.each do |rule| + case + when rule.empty_rule? + rule.nullable = true + when rule.rhs.any?(&:term) + rule.nullable = false + else + # noop + end + end + + while true do + rs = @rules.select {|e| e.nullable.nil? } + nts = nterms.select {|e| e.nullable.nil? } + rule_count_1 = rs.count + nterm_count_1 = nts.count + + rs.each do |rule| + if rule.rhs.all?(&:nullable) + rule.nullable = true + end + end + + nts.each do |nterm| + find_rules_by_symbol!(nterm).each do |rule| + if rule.nullable + nterm.nullable = true + end + end + end + + rule_count_2 = @rules.count {|e| e.nullable.nil? } + nterm_count_2 = nterms.count {|e| e.nullable.nil? } + + if (rule_count_1 == rule_count_2) && (nterm_count_1 == nterm_count_2) + break + end + end + + rules.select {|r| r.nullable.nil? }.each do |rule| + rule.nullable = false + end + + nterms.select {|e| e.nullable.nil? }.each do |nterm| + nterm.nullable = false + end + end + + def compute_first_set + terms.each do |term| + term.first_set = Set.new([term]).freeze + term.first_set_bitmap = Lrama::Bitmap.from_array([term.number]) + end + + nterms.each do |nterm| + nterm.first_set = Set.new([]).freeze + nterm.first_set_bitmap = Lrama::Bitmap.from_array([]) + end + + while true do + changed = false + + @rules.each do |rule| + rule.rhs.each do |r| + if rule.lhs.first_set_bitmap | r.first_set_bitmap != rule.lhs.first_set_bitmap + changed = true + rule.lhs.first_set_bitmap = rule.lhs.first_set_bitmap | r.first_set_bitmap + end + + break unless r.nullable + end + end + + break unless changed + end + + nterms.each do |nterm| + nterm.first_set = Lrama::Bitmap.to_array(nterm.first_set_bitmap).map do |number| + find_symbol_by_number!(number) + end.to_set + end + end + + def setup_rules + @rule_builders.each do |builder| + builder.setup_rules + end + end + + def append_special_symbols + # YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated + # term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2) + # term.number = -2 + # @empty_symbol = term + + # YYEOF + term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0) + term.number = 0 + term.eof_symbol = true + @eof_symbol = term + + # YYerror + term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYerror"), alias_name: "error") + term.number = 1 + term.error_symbol = true + @error_symbol = term + + # YYUNDEF + term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYUNDEF"), alias_name: "\"invalid token\"") + term.number = 2 + term.undef_symbol = true + @undef_symbol = term + + # $accept + term = add_nterm(id: Lrama::Lexer::Token::Ident.new(s_value: "$accept")) + term.accept_symbol = true + @accept_symbol = term + end + + def resolve_inline_rules + while @rule_builders.any?(&:has_inline_rules?) do + @rule_builders = @rule_builders.flat_map do |builder| + if builder.has_inline_rules? + builder.resolve_inline_rules + else + builder + end + end + end + end + + def normalize_rules + # Add $accept rule to the top of rules + rule_builder = @rule_builders.first # : RuleBuilder + lineno = rule_builder ? rule_builder.line : 0 + @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [rule_builder.lhs, @eof_symbol.id], token_code: nil, lineno: lineno) + + setup_rules + + @rule_builders.each do |builder| + builder.rules.each do |rule| + add_nterm(id: rule._lhs, tag: rule.lhs_tag) + @rules << rule + end + end + + @rules.sort_by!(&:id) + end + + # Collect symbols from rules + def collect_symbols + @rules.flat_map(&:_rhs).each do |s| + case s + when Lrama::Lexer::Token::Char + add_term(id: s) + when Lrama::Lexer::Token + # skip + else + raise "Unknown class: #{s}" + end + end + end + + def set_lhs_and_rhs + @rules.each do |rule| + rule.lhs = token_to_symbol(rule._lhs) if rule._lhs + + rule.rhs = rule._rhs.map do |t| + token_to_symbol(t) + end + end + end + + # Rule inherits precedence from the last term in RHS. + # + # https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html + def fill_default_precedence + @rules.each do |rule| + # Explicitly specified precedence has the highest priority + next if rule.precedence_sym + + precedence_sym = nil + rule.rhs.each do |sym| + precedence_sym = sym if sym.term? + end + + rule.precedence_sym = precedence_sym + end + end + + def fill_symbols + fill_symbol_number + fill_nterm_type(@types) + fill_printer(@printers) + fill_destructor(@destructors) + fill_error_token(@error_tokens) + sort_by_number! + end + + def fill_sym_to_rules + @rules.each do |rule| + key = rule.lhs.number + @sym_to_rules[key] ||= [] + @sym_to_rules[key] << rule + end + end + + def validate_rule_lhs_is_nterm! + errors = [] #: Array[String] + + rules.each do |rule| + next if rule.lhs.nterm? + + errors << "[BUG] LHS of #{rule.display_name} (line: #{rule.lineno}) is term. It should be nterm." + end + + return if errors.empty? + + raise errors.join("\n") + end + + def set_locations + @locations = @locations || @rules.any? {|rule| rule.contains_at_reference? } + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/auxiliary.rb b/tools/lrama/lib/lrama/grammar/auxiliary.rb new file mode 100644 index 0000000000..2bacee6f1a --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/auxiliary.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + # Grammar file information not used by States but by Output + class Auxiliary < Struct.new(:prologue_first_lineno, :prologue, :epilogue_first_lineno, :epilogue, keyword_init: true) + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/binding.rb b/tools/lrama/lib/lrama/grammar/binding.rb new file mode 100644 index 0000000000..2efb918a0b --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/binding.rb @@ -0,0 +1,67 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Grammar + class Binding + # @rbs @actual_args: Array[Lexer::Token] + # @rbs @param_to_arg: Hash[String, Lexer::Token] + + # @rbs (Array[Lexer::Token] params, Array[Lexer::Token] actual_args) -> void + def initialize(params, actual_args) + @actual_args = actual_args + @param_to_arg = map_params_to_args(params, @actual_args) + end + + # @rbs (Lexer::Token sym) -> Lexer::Token + def resolve_symbol(sym) + if sym.is_a?(Lexer::Token::InstantiateRule) + Lrama::Lexer::Token::InstantiateRule.new( + s_value: sym.s_value, location: sym.location, args: resolved_args(sym), lhs_tag: sym.lhs_tag + ) + else + param_to_arg(sym) + end + end + + # @rbs (Lexer::Token::InstantiateRule token) -> String + def concatenated_args_str(token) + "#{token.rule_name}_#{token_to_args_s_values(token).join('_')}" + end + + private + + # @rbs (Array[Lexer::Token] params, Array[Lexer::Token] actual_args) -> Hash[String, Lexer::Token] + def map_params_to_args(params, actual_args) + params.zip(actual_args).map do |param, arg| + [param.s_value, arg] + end.to_h + end + + # @rbs (Lexer::Token::InstantiateRule sym) -> Array[Lexer::Token] + def resolved_args(sym) + sym.args.map { |arg| resolve_symbol(arg) } + end + + # @rbs (Lexer::Token sym) -> Lexer::Token + def param_to_arg(sym) + if (arg = @param_to_arg[sym.s_value].dup) + arg.alias_name = sym.alias_name + end + arg || sym + end + + # @rbs (Lexer::Token::InstantiateRule token) -> Array[String] + def token_to_args_s_values(token) + token.args.flat_map do |arg| + resolved = resolve_symbol(arg) + if resolved.is_a?(Lexer::Token::InstantiateRule) + [resolved.s_value] + resolved.args.map(&:s_value) + else + [resolved.s_value] + end + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/code.rb b/tools/lrama/lib/lrama/grammar/code.rb new file mode 100644 index 0000000000..b6c1cc49e7 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/code.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +require "forwardable" +require_relative "code/destructor_code" +require_relative "code/initial_action_code" +require_relative "code/no_reference_code" +require_relative "code/printer_code" +require_relative "code/rule_action" + +module Lrama + class Grammar + class Code + extend Forwardable + + def_delegators "token_code", :s_value, :line, :column, :references + + attr_reader :type, :token_code + + def initialize(type:, token_code:) + @type = type + @token_code = token_code + end + + def ==(other) + self.class == other.class && + self.type == other.type && + self.token_code == other.token_code + end + + # $$, $n, @$, @n are translated to C code + def translated_code + t_code = s_value.dup + + references.reverse_each do |ref| + first_column = ref.first_column + last_column = ref.last_column + + str = reference_to_c(ref) + + t_code[first_column...last_column] = str + end + + return t_code + end + + private + + def reference_to_c(ref) + raise NotImplementedError.new("#reference_to_c is not implemented") + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/code/destructor_code.rb b/tools/lrama/lib/lrama/grammar/code/destructor_code.rb new file mode 100644 index 0000000000..794017257c --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/code/destructor_code.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class Code + class DestructorCode < Code + def initialize(type:, token_code:, tag:) + super(type: type, token_code: token_code) + @tag = tag + end + + private + + # * ($$) *yyvaluep + # * (@$) *yylocationp + # * ($:$) error + # * ($1) error + # * (@1) error + # * ($:1) error + def reference_to_c(ref) + case + when ref.type == :dollar && ref.name == "$" # $$ + member = @tag.member + "((*yyvaluep).#{member})" + when ref.type == :at && ref.name == "$" # @$ + "(*yylocationp)" + when ref.type == :index && ref.name == "$" # $:$ + raise "$:#{ref.value} can not be used in #{type}." + when ref.type == :dollar # $n + raise "$#{ref.value} can not be used in #{type}." + when ref.type == :at # @n + raise "@#{ref.value} can not be used in #{type}." + when ref.type == :index # $:n + raise "$:#{ref.value} can not be used in #{type}." + else + raise "Unexpected. #{self}, #{ref}" + end + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/code/initial_action_code.rb b/tools/lrama/lib/lrama/grammar/code/initial_action_code.rb new file mode 100644 index 0000000000..02f2badc9e --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/code/initial_action_code.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class Code + class InitialActionCode < Code + private + + # * ($$) yylval + # * (@$) yylloc + # * ($:$) error + # * ($1) error + # * (@1) error + # * ($:1) error + def reference_to_c(ref) + case + when ref.type == :dollar && ref.name == "$" # $$ + "yylval" + when ref.type == :at && ref.name == "$" # @$ + "yylloc" + when ref.type == :index && ref.name == "$" # $:$ + raise "$:#{ref.value} can not be used in initial_action." + when ref.type == :dollar # $n + raise "$#{ref.value} can not be used in initial_action." + when ref.type == :at # @n + raise "@#{ref.value} can not be used in initial_action." + when ref.type == :index # $:n + raise "$:#{ref.value} can not be used in initial_action." + else + raise "Unexpected. #{self}, #{ref}" + end + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/code/no_reference_code.rb b/tools/lrama/lib/lrama/grammar/code/no_reference_code.rb new file mode 100644 index 0000000000..ab12f32e29 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/code/no_reference_code.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class Code + class NoReferenceCode < Code + private + + # * ($$) error + # * (@$) error + # * ($:$) error + # * ($1) error + # * (@1) error + # * ($:1) error + def reference_to_c(ref) + case + when ref.type == :dollar # $$, $n + raise "$#{ref.value} can not be used in #{type}." + when ref.type == :at # @$, @n + raise "@#{ref.value} can not be used in #{type}." + when ref.type == :index # $:$, $:n + raise "$:#{ref.value} can not be used in #{type}." + else + raise "Unexpected. #{self}, #{ref}" + end + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/code/printer_code.rb b/tools/lrama/lib/lrama/grammar/code/printer_code.rb new file mode 100644 index 0000000000..c0b8d24306 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/code/printer_code.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class Code + class PrinterCode < Code + def initialize(type:, token_code:, tag:) + super(type: type, token_code: token_code) + @tag = tag + end + + private + + # * ($$) *yyvaluep + # * (@$) *yylocationp + # * ($:$) error + # * ($1) error + # * (@1) error + # * ($:1) error + def reference_to_c(ref) + case + when ref.type == :dollar && ref.name == "$" # $$ + member = @tag.member + "((*yyvaluep).#{member})" + when ref.type == :at && ref.name == "$" # @$ + "(*yylocationp)" + when ref.type == :index && ref.name == "$" # $:$ + raise "$:#{ref.value} can not be used in #{type}." + when ref.type == :dollar # $n + raise "$#{ref.value} can not be used in #{type}." + when ref.type == :at # @n + raise "@#{ref.value} can not be used in #{type}." + when ref.type == :index # $:n + raise "$:#{ref.value} can not be used in #{type}." + else + raise "Unexpected. #{self}, #{ref}" + end + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/code/rule_action.rb b/tools/lrama/lib/lrama/grammar/code/rule_action.rb new file mode 100644 index 0000000000..363ecdf25d --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/code/rule_action.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class Code + class RuleAction < Code + def initialize(type:, token_code:, rule:) + super(type: type, token_code: token_code) + @rule = rule + end + + private + + # * ($$) yyval + # * (@$) yyloc + # * ($:$) error + # * ($1) yyvsp[i] + # * (@1) yylsp[i] + # * ($:1) i - 1 + # + # + # Consider a rule like + # + # class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end } + # + # For the semantic action of original rule: + # + # "Rule" class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end } + # "Position in grammar" $1 $2 $3 $4 $5 + # "Index for yyvsp" -4 -3 -2 -1 0 + # "$:n" $:1 $:2 $:3 $:4 $:5 + # "index of $:n" -5 -4 -3 -2 -1 + # + # + # For the first midrule action: + # + # "Rule" class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end } + # "Position in grammar" $1 + # "Index for yyvsp" 0 + # "$:n" $:1 + def reference_to_c(ref) + case + when ref.type == :dollar && ref.name == "$" # $$ + tag = ref.ex_tag || lhs.tag + raise_tag_not_found_error(ref) unless tag + # @type var tag: Lexer::Token::Tag + "(yyval.#{tag.member})" + when ref.type == :at && ref.name == "$" # @$ + "(yyloc)" + when ref.type == :index && ref.name == "$" # $:$ + raise "$:$ is not supported" + when ref.type == :dollar # $n + i = -position_in_rhs + ref.index + tag = ref.ex_tag || rhs[ref.index - 1].tag + raise_tag_not_found_error(ref) unless tag + # @type var tag: Lexer::Token::Tag + "(yyvsp[#{i}].#{tag.member})" + when ref.type == :at # @n + i = -position_in_rhs + ref.index + "(yylsp[#{i}])" + when ref.type == :index # $:n + i = -position_in_rhs + ref.index + "(#{i} - 1)" + else + raise "Unexpected. #{self}, #{ref}" + end + end + + def position_in_rhs + # If rule is not derived rule, User Code is only action at + # the end of rule RHS. In such case, the action is located on + # `@rule.rhs.count`. + @rule.position_in_original_rule_rhs || @rule.rhs.count + end + + # If this is midrule action, RHS is an RHS of the original rule. + def rhs + (@rule.original_rule || @rule).rhs + end + + # Unlike `rhs`, LHS is always an LHS of the rule. + def lhs + @rule.lhs + end + + def raise_tag_not_found_error(ref) + raise "Tag is not specified for '$#{ref.value}' in '#{@rule.display_name}'" + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/counter.rb b/tools/lrama/lib/lrama/grammar/counter.rb new file mode 100644 index 0000000000..dc91b87b71 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/counter.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class Counter + def initialize(number) + @number = number + end + + def increment + n = @number + @number += 1 + n + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/destructor.rb b/tools/lrama/lib/lrama/grammar/destructor.rb new file mode 100644 index 0000000000..a2b6fde0ed --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/destructor.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class Destructor < Struct.new(:ident_or_tags, :token_code, :lineno, keyword_init: true) + def translated_code(tag) + Code::DestructorCode.new(type: :destructor, token_code: token_code, tag: tag).translated_code + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/error_token.rb b/tools/lrama/lib/lrama/grammar/error_token.rb new file mode 100644 index 0000000000..50eaafeebc --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/error_token.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class ErrorToken < Struct.new(:ident_or_tags, :token_code, :lineno, keyword_init: true) + def translated_code(tag) + Code::PrinterCode.new(type: :error_token, token_code: token_code, tag: tag).translated_code + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/parameterizing_rule.rb b/tools/lrama/lib/lrama/grammar/parameterizing_rule.rb new file mode 100644 index 0000000000..ddc1a467ce --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/parameterizing_rule.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +require_relative 'parameterizing_rule/resolver' +require_relative 'parameterizing_rule/rhs' +require_relative 'parameterizing_rule/rule' diff --git a/tools/lrama/lib/lrama/grammar/parameterizing_rule/resolver.rb b/tools/lrama/lib/lrama/grammar/parameterizing_rule/resolver.rb new file mode 100644 index 0000000000..06f2f1cef7 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/parameterizing_rule/resolver.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class ParameterizingRule + class Resolver + attr_accessor :rules, :created_lhs_list + + def initialize + @rules = [] + @created_lhs_list = [] + end + + def add_parameterizing_rule(rule) + @rules << rule + end + + def find_rule(token) + select_rules(@rules, token).last + end + + def find_inline(token) + @rules.reverse.find { |rule| rule.name == token.s_value && rule.is_inline } + end + + def created_lhs(lhs_s_value) + @created_lhs_list.reverse.find { |created_lhs| created_lhs.s_value == lhs_s_value } + end + + def redefined_rules + @rules.select { |rule| @rules.count { |r| r.name == rule.name && r.required_parameters_count == rule.required_parameters_count } > 1 } + end + + private + + def select_rules(rules, token) + rules = select_not_inline_rules(rules) + rules = select_rules_by_name(rules, token.rule_name) + rules = rules.select { |rule| rule.required_parameters_count == token.args_count } + if rules.empty? + raise "Invalid number of arguments. `#{token.rule_name}`" + else + rules + end + end + + def select_not_inline_rules(rules) + rules.select { |rule| !rule.is_inline } + end + + def select_rules_by_name(rules, rule_name) + rules = rules.select { |rule| rule.name == rule_name } + if rules.empty? + raise "Parameterizing rule does not exist. `#{rule_name}`" + else + rules + end + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/parameterizing_rule/rhs.rb b/tools/lrama/lib/lrama/grammar/parameterizing_rule/rhs.rb new file mode 100644 index 0000000000..f60781c053 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/parameterizing_rule/rhs.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class ParameterizingRule + class Rhs + attr_accessor :symbols, :user_code, :precedence_sym + + def initialize + @symbols = [] + @user_code = nil + @precedence_sym = nil + end + + def resolve_user_code(bindings) + return unless user_code + + resolved = Lexer::Token::UserCode.new(s_value: user_code.s_value, location: user_code.location) + var_to_arg = {} #: Hash[String, String] + symbols.each do |sym| + resolved_sym = bindings.resolve_symbol(sym) + if resolved_sym != sym + var_to_arg[sym.s_value] = resolved_sym.s_value + end + end + + var_to_arg.each do |var, arg| + resolved.references.each do |ref| + if ref.name == var + ref.name = arg + end + end + end + + return resolved + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/parameterizing_rule/rule.rb b/tools/lrama/lib/lrama/grammar/parameterizing_rule/rule.rb new file mode 100644 index 0000000000..cc200d2fb6 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/parameterizing_rule/rule.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class ParameterizingRule + class Rule + attr_reader :name, :parameters, :rhs_list, :required_parameters_count, :tag, :is_inline + + def initialize(name, parameters, rhs_list, tag: nil, is_inline: false) + @name = name + @parameters = parameters + @rhs_list = rhs_list + @tag = tag + @is_inline = is_inline + @required_parameters_count = parameters.count + end + + def to_s + "#{@name}(#{@parameters.map(&:s_value).join(', ')})" + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/percent_code.rb b/tools/lrama/lib/lrama/grammar/percent_code.rb new file mode 100644 index 0000000000..416a2d2753 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/percent_code.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class PercentCode + attr_reader :name, :code + + def initialize(name, code) + @name = name + @code = code + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/precedence.rb b/tools/lrama/lib/lrama/grammar/precedence.rb new file mode 100644 index 0000000000..13cf960c32 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/precedence.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class Precedence < Struct.new(:type, :precedence, keyword_init: true) + include Comparable + + def <=>(other) + self.precedence <=> other.precedence + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/printer.rb b/tools/lrama/lib/lrama/grammar/printer.rb new file mode 100644 index 0000000000..b78459e819 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/printer.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class Printer < Struct.new(:ident_or_tags, :token_code, :lineno, keyword_init: true) + def translated_code(tag) + Code::PrinterCode.new(type: :printer, token_code: token_code, tag: tag).translated_code + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/reference.rb b/tools/lrama/lib/lrama/grammar/reference.rb new file mode 100644 index 0000000000..b044516bdb --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/reference.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + # type: :dollar or :at + # name: String (e.g. $$, $foo, $expr.right) + # number: Integer (e.g. $1) + # index: Integer + # ex_tag: "$1" (Optional) + class Reference < Struct.new(:type, :name, :number, :index, :ex_tag, :first_column, :last_column, keyword_init: true) + def value + name || number + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/rule.rb b/tools/lrama/lib/lrama/grammar/rule.rb new file mode 100644 index 0000000000..445752ae0d --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/rule.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + # _rhs holds original RHS element. Use rhs to refer to Symbol. + class Rule < Struct.new(:id, :_lhs, :lhs, :lhs_tag, :_rhs, :rhs, :token_code, :position_in_original_rule_rhs, :nullable, :precedence_sym, :lineno, keyword_init: true) + attr_accessor :original_rule + + def ==(other) + self.class == other.class && + self.lhs == other.lhs && + self.lhs_tag == other.lhs_tag && + self.rhs == other.rhs && + self.token_code == other.token_code && + self.position_in_original_rule_rhs == other.position_in_original_rule_rhs && + self.nullable == other.nullable && + self.precedence_sym == other.precedence_sym && + self.lineno == other.lineno + end + + def display_name + l = lhs.id.s_value + r = empty_rule? ? "ε" : rhs.map {|r| r.id.s_value }.join(" ") + "#{l} -> #{r}" + end + + def display_name_without_action + l = lhs.id.s_value + r = empty_rule? ? "ε" : rhs.map do |r| + r.id.s_value if r.first_set.any? + end.compact.join(" ") + + "#{l} -> #{r}" + end + + # Used by #user_actions + def as_comment + l = lhs.id.s_value + r = empty_rule? ? "%empty" : rhs.map(&:display_name).join(" ") + + "#{l}: #{r}" + end + + def with_actions + "#{display_name} {#{token_code&.s_value}}" + end + + # opt_nl: ε <-- empty_rule + # | '\n' <-- not empty_rule + def empty_rule? + rhs.empty? + end + + def precedence + precedence_sym&.precedence + end + + def initial_rule? + id == 0 + end + + def translated_code + return nil unless token_code + + Code::RuleAction.new(type: :rule_action, token_code: token_code, rule: self).translated_code + end + + def contains_at_reference? + return false unless token_code + + token_code.references.any? {|r| r.type == :at } + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/rule_builder.rb b/tools/lrama/lib/lrama/grammar/rule_builder.rb new file mode 100644 index 0000000000..481a3780f4 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/rule_builder.rb @@ -0,0 +1,255 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class RuleBuilder + attr_accessor :lhs, :line + attr_reader :lhs_tag, :rhs, :user_code, :precedence_sym + + def initialize(rule_counter, midrule_action_counter, parameterizing_rule_resolver, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false) + @rule_counter = rule_counter + @midrule_action_counter = midrule_action_counter + @parameterizing_rule_resolver = parameterizing_rule_resolver + @position_in_original_rule_rhs = position_in_original_rule_rhs + @skip_preprocess_references = skip_preprocess_references + + @lhs = nil + @lhs_tag = lhs_tag + @rhs = [] + @user_code = nil + @precedence_sym = nil + @line = nil + @rules = [] + @rule_builders_for_parameterizing_rules = [] + @rule_builders_for_derived_rules = [] + @parameterizing_rules = [] + @midrule_action_rules = [] + end + + def add_rhs(rhs) + @line ||= rhs.line + + flush_user_code + + @rhs << rhs + end + + def user_code=(user_code) + @line ||= user_code&.line + + flush_user_code + + @user_code = user_code + end + + def precedence_sym=(precedence_sym) + flush_user_code + + @precedence_sym = precedence_sym + end + + def complete_input + freeze_rhs + end + + def setup_rules + preprocess_references unless @skip_preprocess_references + process_rhs + build_rules + end + + def rules + @parameterizing_rules + @midrule_action_rules + @rules + end + + def has_inline_rules? + rhs.any? { |token| @parameterizing_rule_resolver.find_inline(token) } + end + + def resolve_inline_rules + resolved_builders = [] #: Array[RuleBuilder] + rhs.each_with_index do |token, i| + if (inline_rule = @parameterizing_rule_resolver.find_inline(token)) + inline_rule.rhs_list.each do |inline_rhs| + rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, lhs_tag: lhs_tag) + if token.is_a?(Lexer::Token::InstantiateRule) + resolve_inline_rhs(rule_builder, inline_rhs, i, Binding.new(inline_rule.parameters, token.args)) + else + resolve_inline_rhs(rule_builder, inline_rhs, i) + end + rule_builder.lhs = lhs + rule_builder.line = line + rule_builder.precedence_sym = precedence_sym + rule_builder.user_code = replace_inline_user_code(inline_rhs, i) + resolved_builders << rule_builder + end + break + end + end + resolved_builders + end + + private + + def freeze_rhs + @rhs.freeze + end + + def preprocess_references + numberize_references + end + + def build_rules + tokens = @replaced_rhs + + rule = Rule.new( + id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, lhs_tag: lhs_tag, token_code: user_code, + position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line + ) + @rules = [rule] + @parameterizing_rules = @rule_builders_for_parameterizing_rules.map do |rule_builder| + rule_builder.rules + end.flatten + @midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder| + rule_builder.rules + end.flatten + @midrule_action_rules.each do |r| + r.original_rule = rule + end + end + + # rhs is a mixture of variety type of tokens like `Ident`, `InstantiateRule`, `UserCode` and so on. + # `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`. + def process_rhs + return if @replaced_rhs + + @replaced_rhs = [] + + rhs.each_with_index do |token, i| + case token + when Lrama::Lexer::Token::Char + @replaced_rhs << token + when Lrama::Lexer::Token::Ident + @replaced_rhs << token + when Lrama::Lexer::Token::InstantiateRule + parameterizing_rule = @parameterizing_rule_resolver.find_rule(token) + raise "Unexpected token. #{token}" unless parameterizing_rule + + bindings = Binding.new(parameterizing_rule.parameters, token.args) + lhs_s_value = bindings.concatenated_args_str(token) + if (created_lhs = @parameterizing_rule_resolver.created_lhs(lhs_s_value)) + @replaced_rhs << created_lhs + else + lhs_token = Lrama::Lexer::Token::Ident.new(s_value: lhs_s_value, location: token.location) + @replaced_rhs << lhs_token + @parameterizing_rule_resolver.created_lhs_list << lhs_token + parameterizing_rule.rhs_list.each do |r| + rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, lhs_tag: token.lhs_tag || parameterizing_rule.tag) + rule_builder.lhs = lhs_token + r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) } + rule_builder.line = line + rule_builder.precedence_sym = r.precedence_sym + rule_builder.user_code = r.resolve_user_code(bindings) + rule_builder.complete_input + rule_builder.setup_rules + @rule_builders_for_parameterizing_rules << rule_builder + end + end + when Lrama::Lexer::Token::UserCode + prefix = token.referred ? "@" : "$@" + tag = token.tag || lhs_tag + new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s) + @replaced_rhs << new_token + + rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, i, lhs_tag: tag, skip_preprocess_references: true) + rule_builder.lhs = new_token + rule_builder.user_code = token + rule_builder.complete_input + rule_builder.setup_rules + + @rule_builders_for_derived_rules << rule_builder + else + raise "Unexpected token. #{token}" + end + end + end + + def resolve_inline_rhs(rule_builder, inline_rhs, index, bindings = nil) + rhs.each_with_index do |token, i| + if index == i + inline_rhs.symbols.each { |sym| rule_builder.add_rhs(bindings.nil? ? sym : bindings.resolve_symbol(sym)) } + else + rule_builder.add_rhs(token) + end + end + end + + def replace_inline_user_code(inline_rhs, index) + return user_code if inline_rhs.user_code.nil? + return user_code if user_code.nil? + + code = user_code.s_value.gsub(/\$#{index + 1}/, inline_rhs.user_code.s_value) + user_code.references.each do |ref| + next if ref.index.nil? || ref.index <= index # nil is a case for `$$` + code = code.gsub(/\$#{ref.index}/, "$#{ref.index + (inline_rhs.symbols.count-1)}") + code = code.gsub(/@#{ref.index}/, "@#{ref.index + (inline_rhs.symbols.count-1)}") + end + Lrama::Lexer::Token::UserCode.new(s_value: code, location: user_code.location) + end + + def numberize_references + # Bison n'th component is 1-origin + (rhs + [user_code]).compact.each.with_index(1) do |token, i| + next unless token.is_a?(Lrama::Lexer::Token::UserCode) + + token.references.each do |ref| + ref_name = ref.name + + if ref_name + if ref_name == '$' + ref.name = '$' + else + candidates = ([lhs] + rhs).each_with_index.select {|token, _i| token.referred_by?(ref_name) } + + if candidates.size >= 2 + token.invalid_ref(ref, "Referring symbol `#{ref_name}` is duplicated.") + end + + unless (referring_symbol = candidates.first) + token.invalid_ref(ref, "Referring symbol `#{ref_name}` is not found.") + end + + if referring_symbol[1] == 0 # Refers to LHS + ref.name = '$' + else + ref.number = referring_symbol[1] + end + end + end + + if ref.number + ref.index = ref.number + end + + # TODO: Need to check index of @ too? + next if ref.type == :at + + if ref.index + # TODO: Prohibit $0 even so Bison allows it? + # See: https://www.gnu.org/software/bison/manual/html_node/Actions.html + token.invalid_ref(ref, "Can not refer following component. #{ref.index} >= #{i}.") if ref.index >= i + rhs[ref.index - 1].referred = true + end + end + end + end + + def flush_user_code + if (c = @user_code) + @rhs << c + @user_code = nil + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/stdlib.y b/tools/lrama/lib/lrama/grammar/stdlib.y new file mode 100644 index 0000000000..d6e89c908c --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/stdlib.y @@ -0,0 +1,122 @@ +/********************************************************************** + + stdlib.y + + This is lrama's standard library. It provides a number of + parameterizing rule definitions, such as options and lists, + that should be useful in a number of situations. + +**********************************************************************/ + +// ------------------------------------------------------------------- +// Options + +/* + * program: option(number) + * + * => + * + * program: option_number + * option_number: %empty + * option_number: number + */ +%rule option(X): /* empty */ + | X + ; + +// ------------------------------------------------------------------- +// Sequences + +/* + * program: preceded(opening, X) + * + * => + * + * program: preceded_opening_X + * preceded_opening_X: opening X + */ +%rule preceded(opening, X): opening X { $$ = $2; } + ; + +/* + * program: terminated(X, closing) + * + * => + * + * program: terminated_X_closing + * terminated_X_closing: X closing + */ +%rule terminated(X, closing): X closing { $$ = $1; } + ; + +/* + * program: delimited(opening, X, closing) + * + * => + * + * program: delimited_opening_X_closing + * delimited_opening_X_closing: opening X closing + */ +%rule delimited(opening, X, closing): opening X closing { $$ = $2; } + ; + +// ------------------------------------------------------------------- +// Lists + +/* + * program: list(number) + * + * => + * + * program: list_number + * list_number: %empty + * list_number: list_number number + */ +%rule list(X): /* empty */ + | list(X) X + ; + +/* + * program: nonempty_list(number) + * + * => + * + * program: nonempty_list_number + * nonempty_list_number: number + * nonempty_list_number: nonempty_list_number number + */ +%rule nonempty_list(X): X + | nonempty_list(X) X + ; + +/* + * program: separated_nonempty_list(comma, number) + * + * => + * + * program: separated_nonempty_list_comma_number + * separated_nonempty_list_comma_number: number + * separated_nonempty_list_comma_number: separated_nonempty_list_comma_number comma number + */ +%rule separated_nonempty_list(separator, X): X + | separated_nonempty_list(separator, X) separator X + ; + +/* + * program: separated_list(comma, number) + * + * => + * + * program: separated_list_comma_number + * separated_list_comma_number: option_separated_nonempty_list_comma_number + * option_separated_nonempty_list_comma_number: %empty + * option_separated_nonempty_list_comma_number: separated_nonempty_list_comma_number + * separated_nonempty_list_comma_number: number + * separated_nonempty_list_comma_number: comma separated_nonempty_list_comma_number number + */ +%rule separated_list(separator, X): option(separated_nonempty_list(separator, X)) + ; + +%% + +%union{}; diff --git a/tools/lrama/lib/lrama/grammar/symbol.rb b/tools/lrama/lib/lrama/grammar/symbol.rb new file mode 100644 index 0000000000..f9dffcad6c --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/symbol.rb @@ -0,0 +1,105 @@ +# frozen_string_literal: true + +# Symbol is both of nterm and term +# `number` is both for nterm and term +# `token_id` is tokentype for term, internal sequence number for nterm +# +# TODO: Add validation for ASCII code range for Token::Char + +module Lrama + class Grammar + class Symbol + attr_accessor :id, :alias_name, :tag, :number, :token_id, :nullable, :precedence, + :printer, :destructor, :error_token, :first_set, :first_set_bitmap + attr_reader :term + attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol + + def initialize(id:, term:, alias_name: nil, number: nil, tag: nil, token_id: nil, nullable: nil, precedence: nil, printer: nil, destructor: nil) + @id = id + @alias_name = alias_name + @number = number + @tag = tag + @term = term + @token_id = token_id + @nullable = nullable + @precedence = precedence + @printer = printer + @destructor = destructor + end + + def term? + term + end + + def nterm? + !term + end + + def eof_symbol? + !!@eof_symbol + end + + def error_symbol? + !!@error_symbol + end + + def undef_symbol? + !!@undef_symbol + end + + def accept_symbol? + !!@accept_symbol + end + + def display_name + alias_name || id.s_value + end + + # name for yysymbol_kind_t + # + # See: b4_symbol_kind_base + # @type var name: String + def enum_name + case + when accept_symbol? + name = "YYACCEPT" + when eof_symbol? + name = "YYEOF" + when term? && id.is_a?(Lrama::Lexer::Token::Char) + name = number.to_s + display_name + when term? && id.is_a?(Lrama::Lexer::Token::Ident) + name = id.s_value + when nterm? && (id.s_value.include?("$") || id.s_value.include?("@")) + name = number.to_s + id.s_value + when nterm? + name = id.s_value + else + raise "Unexpected #{self}" + end + + "YYSYMBOL_" + name.gsub(/\W+/, "_") + end + + # comment for yysymbol_kind_t + def comment + case + when accept_symbol? + # YYSYMBOL_YYACCEPT + id.s_value + when eof_symbol? + # YYEOF + alias_name + when (term? && 0 < token_id && token_id < 128) + # YYSYMBOL_3_backslash_, YYSYMBOL_14_ + alias_name || id.s_value + when id.s_value.include?("$") || id.s_value.include?("@") + # YYSYMBOL_21_1 + id.s_value + else + # YYSYMBOL_keyword_class, YYSYMBOL_strings_1 + alias_name || id.s_value + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/symbols.rb b/tools/lrama/lib/lrama/grammar/symbols.rb new file mode 100644 index 0000000000..337241d1b2 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/symbols.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +require_relative "symbols/resolver" diff --git a/tools/lrama/lib/lrama/grammar/symbols/resolver.rb b/tools/lrama/lib/lrama/grammar/symbols/resolver.rb new file mode 100644 index 0000000000..52f4ff90bd --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/symbols/resolver.rb @@ -0,0 +1,301 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class Symbols + class Resolver + attr_reader :terms, :nterms + + def initialize + @terms = [] + @nterms = [] + end + + def symbols + @symbols ||= (@terms + @nterms) + end + + def sort_by_number! + symbols.sort_by!(&:number) + end + + def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false) + if token_id && (sym = find_symbol_by_token_id(token_id)) + if replace + sym.id = id + sym.alias_name = alias_name + sym.tag = tag + end + + return sym + end + + if (sym = find_symbol_by_id(id)) + return sym + end + + @symbols = nil + term = Symbol.new( + id: id, alias_name: alias_name, number: nil, tag: tag, + term: true, token_id: token_id, nullable: false + ) + @terms << term + term + end + + def add_nterm(id:, alias_name: nil, tag: nil) + if (sym = find_symbol_by_id(id)) + return sym + end + + @symbols = nil + nterm = Symbol.new( + id: id, alias_name: alias_name, number: nil, tag: tag, + term: false, token_id: nil, nullable: nil, + ) + @nterms << nterm + nterm + end + + def find_term_by_s_value(s_value) + terms.find { |s| s.id.s_value == s_value } + end + + def find_symbol_by_s_value(s_value) + symbols.find { |s| s.id.s_value == s_value } + end + + def find_symbol_by_s_value!(s_value) + find_symbol_by_s_value(s_value) || (raise "Symbol not found. value: `#{s_value}`") + end + + def find_symbol_by_id(id) + symbols.find do |s| + s.id == id || s.alias_name == id.s_value + end + end + + def find_symbol_by_id!(id) + find_symbol_by_id(id) || (raise "Symbol not found. #{id}") + end + + def find_symbol_by_token_id(token_id) + symbols.find {|s| s.token_id == token_id } + end + + def find_symbol_by_number!(number) + sym = symbols[number] + + raise "Symbol not found. number: `#{number}`" unless sym + raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number + + sym + end + + def fill_symbol_number + # YYEMPTY = -2 + # YYEOF = 0 + # YYerror = 1 + # YYUNDEF = 2 + @number = 3 + fill_terms_number + fill_nterms_number + end + + def fill_nterm_type(types) + types.each do |type| + nterm = find_nterm_by_id!(type.id) + nterm.tag = type.tag + end + end + + def fill_printer(printers) + symbols.each do |sym| + printers.each do |printer| + printer.ident_or_tags.each do |ident_or_tag| + case ident_or_tag + when Lrama::Lexer::Token::Ident + sym.printer = printer if sym.id == ident_or_tag + when Lrama::Lexer::Token::Tag + sym.printer = printer if sym.tag == ident_or_tag + else + raise "Unknown token type. #{printer}" + end + end + end + end + end + + def fill_destructor(destructors) + symbols.each do |sym| + destructors.each do |destructor| + destructor.ident_or_tags.each do |ident_or_tag| + case ident_or_tag + when Lrama::Lexer::Token::Ident + sym.destructor = destructor if sym.id == ident_or_tag + when Lrama::Lexer::Token::Tag + sym.destructor = destructor if sym.tag == ident_or_tag + else + raise "Unknown token type. #{destructor}" + end + end + end + end + end + + def fill_error_token(error_tokens) + symbols.each do |sym| + error_tokens.each do |token| + token.ident_or_tags.each do |ident_or_tag| + case ident_or_tag + when Lrama::Lexer::Token::Ident + sym.error_token = token if sym.id == ident_or_tag + when Lrama::Lexer::Token::Tag + sym.error_token = token if sym.tag == ident_or_tag + else + raise "Unknown token type. #{token}" + end + end + end + end + end + + def token_to_symbol(token) + case token + when Lrama::Lexer::Token + find_symbol_by_id!(token) + else + raise "Unknown class: #{token}" + end + end + + def validate! + validate_number_uniqueness! + validate_alias_name_uniqueness! + end + + private + + def find_nterm_by_id!(id) + @nterms.find do |s| + s.id == id + end || (raise "Symbol not found. #{id}") + end + + def fill_terms_number + # Character literal in grammar file has + # token id corresponding to ASCII code by default, + # so start token_id from 256. + token_id = 256 + + @terms.each do |sym| + while used_numbers[@number] do + @number += 1 + end + + if sym.number.nil? + sym.number = @number + used_numbers[@number] = true + @number += 1 + end + + # If id is Token::Char, it uses ASCII code + if sym.token_id.nil? + if sym.id.is_a?(Lrama::Lexer::Token::Char) + # Ignore ' on the both sides + case sym.id.s_value[1..-2] + when "\\b" + sym.token_id = 8 + when "\\f" + sym.token_id = 12 + when "\\n" + sym.token_id = 10 + when "\\r" + sym.token_id = 13 + when "\\t" + sym.token_id = 9 + when "\\v" + sym.token_id = 11 + when "\"" + sym.token_id = 34 + when "'" + sym.token_id = 39 + when "\\\\" + sym.token_id = 92 + when /\A\\(\d+)\z/ + unless (id = Integer($1, 8)).nil? + sym.token_id = id + else + raise "Unknown Char s_value #{sym}" + end + when /\A(.)\z/ + unless (id = $1&.bytes&.first).nil? + sym.token_id = id + else + raise "Unknown Char s_value #{sym}" + end + else + raise "Unknown Char s_value #{sym}" + end + else + sym.token_id = token_id + token_id += 1 + end + end + end + end + + def fill_nterms_number + token_id = 0 + + @nterms.each do |sym| + while used_numbers[@number] do + @number += 1 + end + + if sym.number.nil? + sym.number = @number + used_numbers[@number] = true + @number += 1 + end + + if sym.token_id.nil? + sym.token_id = token_id + token_id += 1 + end + end + end + + def used_numbers + return @used_numbers if defined?(@used_numbers) + + @used_numbers = {} + symbols.map(&:number).each do |n| + @used_numbers[n] = true + end + @used_numbers + end + + def validate_number_uniqueness! + invalid = symbols.group_by(&:number).select do |number, syms| + syms.count > 1 + end + + return if invalid.empty? + + raise "Symbol number is duplicated. #{invalid}" + end + + def validate_alias_name_uniqueness! + invalid = symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms| + syms.count > 1 + end + + return if invalid.empty? + + raise "Symbol alias name is duplicated. #{invalid}" + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/type.rb b/tools/lrama/lib/lrama/grammar/type.rb new file mode 100644 index 0000000000..65537288b3 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/type.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class Type + attr_reader :id, :tag + + def initialize(id:, tag:) + @id = id + @tag = tag + end + + def ==(other) + self.class == other.class && + self.id == other.id && + self.tag == other.tag + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar/union.rb b/tools/lrama/lib/lrama/grammar/union.rb new file mode 100644 index 0000000000..5f1bee0069 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar/union.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +module Lrama + class Grammar + class Union < Struct.new(:code, :lineno, keyword_init: true) + def braces_less_code + # Braces is already removed by lexer + code.s_value + end + end + end +end diff --git a/tools/lrama/lib/lrama/grammar_validator.rb b/tools/lrama/lib/lrama/grammar_validator.rb new file mode 100644 index 0000000000..7790499589 --- /dev/null +++ b/tools/lrama/lib/lrama/grammar_validator.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module Lrama + class GrammarValidator + def initialize(grammar, states, logger) + @grammar = grammar + @states = states + @logger = logger + end + + def valid? + conflicts_within_threshold? + end + + private + + def conflicts_within_threshold? + return true unless @grammar.expect + + [sr_conflicts_within_threshold(@grammar.expect), rr_conflicts_within_threshold(0)].all? + end + + def sr_conflicts_within_threshold(expected) + return true if expected == @states.sr_conflicts_count + + @logger.error("shift/reduce conflicts: #{@states.sr_conflicts_count} found, #{expected} expected") + false + end + + def rr_conflicts_within_threshold(expected) + return true if expected == @states.rr_conflicts_count + + @logger.error("reduce/reduce conflicts: #{@states.rr_conflicts_count} found, #{expected} expected") + false + end + end +end diff --git a/tools/lrama/lib/lrama/lexer.rb b/tools/lrama/lib/lrama/lexer.rb new file mode 100644 index 0000000000..c50af82ae4 --- /dev/null +++ b/tools/lrama/lib/lrama/lexer.rb @@ -0,0 +1,191 @@ +# frozen_string_literal: true + +require "strscan" + +require_relative "lexer/grammar_file" +require_relative "lexer/location" +require_relative "lexer/token" + +module Lrama + class Lexer + attr_reader :head_line, :head_column, :line + attr_accessor :status, :end_symbol + + SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';'].freeze + PERCENT_TOKENS = %w( + %union + %token + %type + %nterm + %left + %right + %nonassoc + %expect + %define + %require + %printer + %destructor + %lex-param + %parse-param + %initial-action + %precedence + %prec + %error-token + %before-reduce + %after-reduce + %after-shift-error-token + %after-shift + %after-pop-stack + %empty + %code + %rule + %no-stdlib + %inline + %locations + ).freeze + + def initialize(grammar_file) + @grammar_file = grammar_file + @scanner = StringScanner.new(grammar_file.text) + @head_column = @head = @scanner.pos + @head_line = @line = 1 + @status = :initial + @end_symbol = nil + end + + def next_token + case @status + when :initial + lex_token + when :c_declaration + lex_c_code + end + end + + def column + @scanner.pos - @head + end + + def location + Location.new( + grammar_file: @grammar_file, + first_line: @head_line, first_column: @head_column, + last_line: line, last_column: column + ) + end + + def lex_token + until @scanner.eos? do + case + when @scanner.scan(/\n/) + newline + when @scanner.scan(/\s+/) + # noop + when @scanner.scan(/\/\*/) + lex_comment + when @scanner.scan(/\/\/.*(?\n)?/) + newline if @scanner[:newline] + else + break + end + end + + reset_first_position + + case + when @scanner.eos? + return + when @scanner.scan(/#{SYMBOLS.join('|')}/) + return [@scanner.matched, @scanner.matched] + when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/) + return [@scanner.matched, @scanner.matched] + when @scanner.scan(/[\?\+\*]/) + return [@scanner.matched, @scanner.matched] + when @scanner.scan(/<\w+>/) + return [:TAG, Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched, location: location)] + when @scanner.scan(/'.'/) + return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)] + when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/) + return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)] + when @scanner.scan(/".*?"/) + return [:STRING, %Q(#{@scanner.matched})] + when @scanner.scan(/\d+/) + return [:INTEGER, Integer(@scanner.matched)] + when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/) + token = Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched, location: location) + type = + if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/) + :IDENT_COLON + else + :IDENTIFIER + end + return [type, token] + else + raise ParseError, "Unexpected token: #{@scanner.peek(10).chomp}." + end + end + + def lex_c_code + nested = 0 + code = '' + reset_first_position + + until @scanner.eos? do + case + when @scanner.scan(/{/) + code += @scanner.matched + nested += 1 + when @scanner.scan(/}/) + if nested == 0 && @end_symbol == '}' + @scanner.unscan + return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] + else + code += @scanner.matched + nested -= 1 + end + when @scanner.check(/#{@end_symbol}/) + return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] + when @scanner.scan(/\n/) + code += @scanner.matched + newline + when @scanner.scan(/".*?"/) + code += %Q(#{@scanner.matched}) + @line += @scanner.matched.count("\n") + when @scanner.scan(/'.*?'/) + code += %Q(#{@scanner.matched}) + when @scanner.scan(/[^\"'\{\}\n]+/) + code += @scanner.matched + when @scanner.scan(/#{Regexp.escape(@end_symbol)}/) + code += @scanner.matched + else + code += @scanner.getch + end + end + raise ParseError, "Unexpected code: #{code}." + end + + private + + def lex_comment + until @scanner.eos? do + case + when @scanner.scan_until(/[\s\S]*?\*\//) + @scanner.matched.count("\n").times { newline } + return + when @scanner.scan_until(/\n/) + newline + end + end + end + + def reset_first_position + @head_line = line + @head_column = column + end + + def newline + @line += 1 + @head = @scanner.pos + end + end +end diff --git a/tools/lrama/lib/lrama/lexer/grammar_file.rb b/tools/lrama/lib/lrama/lexer/grammar_file.rb new file mode 100644 index 0000000000..37e82ff18d --- /dev/null +++ b/tools/lrama/lib/lrama/lexer/grammar_file.rb @@ -0,0 +1,40 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Lexer + class GrammarFile + class Text < String + # @rbs () -> String + def inspect + length <= 50 ? super : "#{self[0..47]}...".inspect + end + end + + attr_reader :path #: String + attr_reader :text #: String + + # @rbs (String path, String text) -> void + def initialize(path, text) + @path = path + @text = Text.new(text).freeze + end + + # @rbs () -> String + def inspect + "<#{self.class}: @path=#{path}, @text=#{text.inspect}>" + end + + # @rbs (GrammarFile other) -> bool + def ==(other) + self.class == other.class && + self.path == other.path + end + + # @rbs () -> Array[String] + def lines + @lines ||= text.split("\n") + end + end + end +end diff --git a/tools/lrama/lib/lrama/lexer/location.rb b/tools/lrama/lib/lrama/lexer/location.rb new file mode 100644 index 0000000000..defdbf8a0b --- /dev/null +++ b/tools/lrama/lib/lrama/lexer/location.rb @@ -0,0 +1,115 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Lexer + class Location + attr_reader :grammar_file #: GrammarFile + attr_reader :first_line #: Integer + attr_reader :first_column #: Integer + attr_reader :last_line #: Integer + attr_reader :last_column #: Integer + + # @rbs (grammar_file: GrammarFile, first_line: Integer, first_column: Integer, last_line: Integer, last_column: Integer) -> void + def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:) + @grammar_file = grammar_file + @first_line = first_line + @first_column = first_column + @last_line = last_line + @last_column = last_column + end + + # @rbs (Location other) -> bool + def ==(other) + self.class == other.class && + self.grammar_file == other.grammar_file && + self.first_line == other.first_line && + self.first_column == other.first_column && + self.last_line == other.last_line && + self.last_column == other.last_column + end + + # @rbs (Integer left, Integer right) -> Location + def partial_location(left, right) + offset = -first_column + new_first_line = -1 + new_first_column = -1 + new_last_line = -1 + new_last_column = -1 + + _text.each.with_index do |line, index| + new_offset = offset + line.length + 1 + + if offset <= left && left <= new_offset + new_first_line = first_line + index + new_first_column = left - offset + end + + if offset <= right && right <= new_offset + new_last_line = first_line + index + new_last_column = right - offset + end + + offset = new_offset + end + + Location.new( + grammar_file: grammar_file, + first_line: new_first_line, first_column: new_first_column, + last_line: new_last_line, last_column: new_last_column + ) + end + + # @rbs () -> String + def to_s + "#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})" + end + + # @rbs (String error_message) -> String + def generate_error_message(error_message) + <<~ERROR.chomp + #{path}:#{first_line}:#{first_column}: #{error_message} + #{line_with_carets} + ERROR + end + + # @rbs () -> String + def line_with_carets + <<~TEXT + #{text} + #{carets} + TEXT + end + + private + + # @rbs () -> String + def path + grammar_file.path + end + + # @rbs () -> String + def blanks + (text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ') + end + + # @rbs () -> String + def carets + blanks + '^' * (last_column - first_column) + end + + # @rbs () -> String + def text + @text ||= _text.join("\n") + end + + # @rbs () -> Array[String] + def _text + @_text ||=begin + range = (first_line - 1)...last_line + grammar_file.lines[range] or raise "#{range} is invalid" + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/lexer/token.rb b/tools/lrama/lib/lrama/lexer/token.rb new file mode 100644 index 0000000000..63da8be4a4 --- /dev/null +++ b/tools/lrama/lib/lrama/lexer/token.rb @@ -0,0 +1,70 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +require_relative 'token/char' +require_relative 'token/ident' +require_relative 'token/instantiate_rule' +require_relative 'token/tag' +require_relative 'token/user_code' + +module Lrama + class Lexer + class Token + attr_reader :s_value #: String + attr_reader :location #: Location + attr_accessor :alias_name #: String + attr_accessor :referred #: bool + + # @rbs (s_value: String, ?alias_name: String, ?location: Location) -> void + def initialize(s_value:, alias_name: nil, location: nil) + s_value.freeze + @s_value = s_value + @alias_name = alias_name + @location = location + end + + # @rbs () -> String + def to_s + "value: `#{s_value}`, location: #{location}" + end + + # @rbs (String string) -> bool + def referred_by?(string) + [self.s_value, self.alias_name].compact.include?(string) + end + + # @rbs (Token other) -> bool + def ==(other) + self.class == other.class && self.s_value == other.s_value + end + + # @rbs () -> Integer + def first_line + location.first_line + end + alias :line :first_line + + # @rbs () -> Integer + def first_column + location.first_column + end + alias :column :first_column + + # @rbs () -> Integer + def last_line + location.last_line + end + + # @rbs () -> Integer + def last_column + location.last_column + end + + # @rbs (Lrama::Grammar::Reference ref, String message) -> bot + def invalid_ref(ref, message) + location = self.location.partial_location(ref.first_column, ref.last_column) + raise location.generate_error_message(message) + end + end + end +end diff --git a/tools/lrama/lib/lrama/lexer/token/char.rb b/tools/lrama/lib/lrama/lexer/token/char.rb new file mode 100644 index 0000000000..fcab7a588f --- /dev/null +++ b/tools/lrama/lib/lrama/lexer/token/char.rb @@ -0,0 +1,11 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Lexer + class Token + class Char < Token + end + end + end +end diff --git a/tools/lrama/lib/lrama/lexer/token/ident.rb b/tools/lrama/lib/lrama/lexer/token/ident.rb new file mode 100644 index 0000000000..8b1328a040 --- /dev/null +++ b/tools/lrama/lib/lrama/lexer/token/ident.rb @@ -0,0 +1,11 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Lexer + class Token + class Ident < Token + end + end + end +end diff --git a/tools/lrama/lib/lrama/lexer/token/instantiate_rule.rb b/tools/lrama/lib/lrama/lexer/token/instantiate_rule.rb new file mode 100644 index 0000000000..37d412aa83 --- /dev/null +++ b/tools/lrama/lib/lrama/lexer/token/instantiate_rule.rb @@ -0,0 +1,30 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Lexer + class Token + class InstantiateRule < Token + attr_reader :args #: Array[Lexer::Token] + attr_reader :lhs_tag #: Lexer::Token::Tag? + + # @rbs (s_value: String, ?alias_name: String, ?location: Location, ?args: Array[Lexer::Token], ?lhs_tag: Lexer::Token::Tag?) -> void + def initialize(s_value:, alias_name: nil, location: nil, args: [], lhs_tag: nil) + super s_value: s_value, alias_name: alias_name, location: location + @args = args + @lhs_tag = lhs_tag + end + + # @rbs () -> String + def rule_name + s_value + end + + # @rbs () -> Integer + def args_count + args.count + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/lexer/token/tag.rb b/tools/lrama/lib/lrama/lexer/token/tag.rb new file mode 100644 index 0000000000..b346ef7c5c --- /dev/null +++ b/tools/lrama/lib/lrama/lexer/token/tag.rb @@ -0,0 +1,16 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Lexer + class Token + class Tag < Token + # @rbs () -> String + def member + # Omit "<>" + s_value[1..-2] or raise "Unexpected Tag format (#{s_value})" + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/lexer/token/user_code.rb b/tools/lrama/lib/lrama/lexer/token/user_code.rb new file mode 100644 index 0000000000..4ef40e6dc8 --- /dev/null +++ b/tools/lrama/lib/lrama/lexer/token/user_code.rb @@ -0,0 +1,83 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +require "strscan" + +module Lrama + class Lexer + class Token + class UserCode < Token + attr_accessor :tag #: Lexer::Token::Tag + + # @rbs () -> Array[Lrama::Grammar::Reference] + def references + @references ||= _references + end + + private + + # @rbs () -> Array[Lrama::Grammar::Reference] + def _references + scanner = StringScanner.new(s_value) + references = [] #: Array[Grammar::Reference] + + until scanner.eos? do + case + when reference = scan_reference(scanner) + references << reference + when scanner.scan(/\/\*/) + scanner.scan_until(/\*\//) + else + scanner.getch + end + end + + references + end + + # @rbs (StringScanner scanner) -> Lrama::Grammar::Reference? + def scan_reference(scanner) + start = scanner.pos + case + # $ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $$ + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos) + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $1 + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos) + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $program (named reference without brackets) + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos) + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $[expr.right] (named reference with brackets) + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos) + + # @ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/@\$/) # @$ + return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos) + when scanner.scan(/@(\d+)/) # @1 + return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos) + when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) + return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos) + when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets) + return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos) + + # $: references + when scanner.scan(/\$:\$/) # $:$ + return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos) + when scanner.scan(/\$:(\d+)/) # $:1 + return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[1]), first_column: start, last_column: scanner.pos) + when scanner.scan(/\$:([a-zA-Z_][a-zA-Z0-9_]*)/) # $:foo, $:expr (named reference without brackets) + return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos) + when scanner.scan(/\$:\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $:[expr.right], $:[expr-right] (named reference with brackets) + return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos) + + end + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/logger.rb b/tools/lrama/lib/lrama/logger.rb new file mode 100644 index 0000000000..88bb920960 --- /dev/null +++ b/tools/lrama/lib/lrama/logger.rb @@ -0,0 +1,21 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Logger + # @rbs (IO out) -> void + def initialize(out = STDERR) + @out = out + end + + # @rbs (String message) -> void + def warn(message) + @out << message << "\n" + end + + # @rbs (String message) -> void + def error(message) + @out << message << "\n" + end + end +end diff --git a/tools/lrama/lib/lrama/option_parser.rb b/tools/lrama/lib/lrama/option_parser.rb new file mode 100644 index 0000000000..23988a5fbb --- /dev/null +++ b/tools/lrama/lib/lrama/option_parser.rb @@ -0,0 +1,169 @@ +# frozen_string_literal: true + +require 'optparse' + +module Lrama + # Handle option parsing for the command line interface. + class OptionParser + def initialize + @options = Options.new + @trace = [] + @report = [] + end + + def parse(argv) + parse_by_option_parser(argv) + + @options.trace_opts = validate_trace(@trace) + @options.report_opts = validate_report(@report) + @options.grammar_file = argv.shift + + unless @options.grammar_file + abort "File should be specified\n" + end + + if @options.grammar_file == '-' + @options.grammar_file = argv.shift or abort "File name for STDIN should be specified\n" + else + @options.y = File.open(@options.grammar_file, 'r') + end + + if !@report.empty? && @options.report_file.nil? && @options.grammar_file + @options.report_file = File.dirname(@options.grammar_file) + "/" + File.basename(@options.grammar_file, ".*") + ".output" + end + + if !@options.header_file && @options.header + case + when @options.outfile + @options.header_file = File.dirname(@options.outfile) + "/" + File.basename(@options.outfile, ".*") + ".h" + when @options.grammar_file + @options.header_file = File.dirname(@options.grammar_file) + "/" + File.basename(@options.grammar_file, ".*") + ".h" + end + end + + @options + end + + private + + def parse_by_option_parser(argv) + ::OptionParser.new do |o| + o.banner = <<~BANNER + Lrama is LALR (1) parser generator written by Ruby. + + Usage: lrama [options] FILE + BANNER + o.separator '' + o.separator 'STDIN mode:' + o.separator 'lrama [options] - FILE read grammar from STDIN' + o.separator '' + o.separator 'Tuning the Parser:' + o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v } + o.on('-t', '--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true } + o.on('-D', '--define=NAME[=VALUE]', Array, "similar to '%define NAME VALUE'") {|v| @options.define = v } + o.separator '' + o.separator 'Output:' + o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v } + o.on('-d', 'also produce a header file') { @options.header = true } + o.on('-r', '--report=REPORTS', Array, 'also produce details on the automaton') {|v| @report = v } + o.on_tail '' + o.on_tail 'REPORTS is a list of comma-separated words that can include:' + o.on_tail ' states describe the states' + o.on_tail ' itemsets complete the core item sets with their closure' + o.on_tail ' lookaheads explicitly associate lookahead tokens to items' + o.on_tail ' solved describe shift/reduce conflicts solving' + o.on_tail ' counterexamples, cex generate conflict counterexamples' + o.on_tail ' rules list unused rules' + o.on_tail ' terms list unused terminals' + o.on_tail ' verbose report detailed internal state and analysis results' + o.on_tail ' all include all the above reports' + o.on_tail ' none disable all reports' + o.on('--report-file=FILE', 'also produce details on the automaton output to a file named FILE') {|v| @options.report_file = v } + o.on('-o', '--output=FILE', 'leave output to FILE') {|v| @options.outfile = v } + o.on('--trace=TRACES', Array, 'also output trace logs at runtime') {|v| @trace = v } + o.on_tail '' + o.on_tail 'TRACES is a list of comma-separated words that can include:' + o.on_tail ' automaton display states' + o.on_tail ' closure display states' + o.on_tail ' rules display grammar rules' + o.on_tail ' only-explicit-rules display only explicit grammar rules' + o.on_tail ' actions display grammar rules with actions' + o.on_tail ' time display generation time' + o.on_tail ' all include all the above traces' + o.on_tail ' none disable all traces' + o.on('-v', '--verbose', "same as '--report=state'") {|_v| @report << 'states' } + o.separator '' + o.separator 'Diagnostics:' + o.on('-W', '--warnings', 'report the warnings') {|v| @options.diagnostic = true } + o.separator '' + o.separator 'Error Recovery:' + o.on('-e', 'enable error recovery') {|v| @options.error_recovery = true } + o.separator '' + o.separator 'Other options:' + o.on('-V', '--version', "output version information and exit") {|v| puts "lrama #{Lrama::VERSION}"; exit 0 } + o.on('-h', '--help', "display this help and exit") {|v| puts o; exit 0 } + o.on_tail + o.parse!(argv) + end + end + + ALIASED_REPORTS = { cex: :counterexamples }.freeze + VALID_REPORTS = %i[states itemsets lookaheads solved counterexamples rules terms verbose].freeze + + def validate_report(report) + h = { grammar: true } + return h if report.empty? + return {} if report == ['none'] + if report == ['all'] + VALID_REPORTS.each { |r| h[r] = true } + return h + end + + report.each do |r| + aliased = aliased_report_option(r) + if VALID_REPORTS.include?(aliased) + h[aliased] = true + else + raise "Invalid report option \"#{r}\"." + end + end + + return h + end + + def aliased_report_option(opt) + (ALIASED_REPORTS[opt.to_sym] || opt).to_sym + end + + VALID_TRACES = %w[ + locations scan parse automaton bitsets closure + grammar rules only-explicit-rules actions resource + sets muscles tools m4-early m4 skeleton time ielr cex + ].freeze + NOT_SUPPORTED_TRACES = %w[ + locations scan parse bitsets grammar resource + sets muscles tools m4-early m4 skeleton ielr cex + ].freeze + SUPPORTED_TRACES = VALID_TRACES - NOT_SUPPORTED_TRACES + + def validate_trace(trace) + h = {} + return h if trace.empty? || trace == ['none'] + all_traces = SUPPORTED_TRACES - %w[only-explicit-rules] + if trace == ['all'] + all_traces.each { |t| h[t.gsub(/-/, '_').to_sym] = true } + return h + end + + trace.each do |t| + if SUPPORTED_TRACES.include?(t) + h[t.gsub(/-/, '_').to_sym] = true + else + raise "Invalid trace option \"#{t}\"." + end + end + + return h + end + end +end diff --git a/tools/lrama/lib/lrama/options.rb b/tools/lrama/lib/lrama/options.rb new file mode 100644 index 0000000000..08f75a770f --- /dev/null +++ b/tools/lrama/lib/lrama/options.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Lrama + # Command line options. + class Options + attr_accessor :skeleton, :header, :header_file, + :report_file, :outfile, + :error_recovery, :grammar_file, + :trace_opts, :report_opts, + :diagnostic, :y, :debug, :define + + def initialize + @skeleton = "bison/yacc.c" + @define = {} + @header = false + @header_file = nil + @report_file = nil + @outfile = "y.tab.c" + @error_recovery = false + @grammar_file = nil + @trace_opts = nil + @report_opts = nil + @diagnostic = false + @y = STDIN + @debug = false + end + end +end diff --git a/tools/lrama/lib/lrama/output.rb b/tools/lrama/lib/lrama/output.rb new file mode 100644 index 0000000000..3c7316ac6d --- /dev/null +++ b/tools/lrama/lib/lrama/output.rb @@ -0,0 +1,459 @@ +# frozen_string_literal: true + +require "erb" +require "forwardable" +require_relative "report/duration" + +module Lrama + class Output + extend Forwardable + include Report::Duration + + attr_reader :grammar_file_path, :context, :grammar, :error_recovery, :include_header + + def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates, + :yymaxutok, :yypact_ninf, :yytable_ninf + + def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol + + def initialize( + out:, output_file_path:, template_name:, grammar_file_path:, + context:, grammar:, header_out: nil, header_file_path: nil, error_recovery: false + ) + @out = out + @output_file_path = output_file_path + @template_name = template_name + @grammar_file_path = grammar_file_path + @header_out = header_out + @header_file_path = header_file_path + @context = context + @grammar = grammar + @error_recovery = error_recovery + @include_header = header_file_path ? header_file_path.sub("./", "") : nil + end + + if ERB.instance_method(:initialize).parameters.last.first == :key + def self.erb(input) + ERB.new(input, trim_mode: '-') + end + else + def self.erb(input) + ERB.new(input, nil, '-') + end + end + + def render_partial(file) + render_template(partial_file(file)) + end + + def render + report_duration(:render) do + tmp = eval_template(template_file, @output_file_path) + @out << tmp + + if @header_file_path + tmp = eval_template(header_template_file, @header_file_path) + + if @header_out + @header_out << tmp + else + File.write(@header_file_path, tmp) + end + end + end + end + + # A part of b4_token_enums + def token_enums + @context.yytokentype.map do |s_value, token_id, display_name| + s = sprintf("%s = %d%s", s_value, token_id, token_id == yymaxutok ? "" : ",") + + if display_name + sprintf(" %-30s /* %s */\n", s, display_name) + else + sprintf(" %s\n", s) + end + end.join + end + + # b4_symbol_enum + def symbol_enum + last_sym_number = @context.yysymbol_kind_t.last[1] + @context.yysymbol_kind_t.map do |s_value, sym_number, display_name| + s = sprintf("%s = %d%s", s_value, sym_number, (sym_number == last_sym_number) ? "" : ",") + + if display_name + sprintf(" %-40s /* %s */\n", s, display_name) + else + sprintf(" %s\n", s) + end + end.join + end + + def yytranslate + int_array_to_string(@context.yytranslate) + end + + def yytranslate_inverted + int_array_to_string(@context.yytranslate_inverted) + end + + def yyrline + int_array_to_string(@context.yyrline) + end + + def yytname + string_array_to_string(@context.yytname) + " YY_NULLPTR" + end + + # b4_int_type_for + def int_type_for(ary) + min = ary.min + max = ary.max + + case + when (-127 <= min && min <= 127) && (-127 <= max && max <= 127) + "yytype_int8" + when (0 <= min && min <= 255) && (0 <= max && max <= 255) + "yytype_uint8" + when (-32767 <= min && min <= 32767) && (-32767 <= max && max <= 32767) + "yytype_int16" + when (0 <= min && min <= 65535) && (0 <= max && max <= 65535) + "yytype_uint16" + else + "int" + end + end + + def symbol_actions_for_printer + @grammar.symbols.map do |sym| + next unless sym.printer + + <<-STR + case #{sym.enum_name}: /* #{sym.comment} */ +#line #{sym.printer.lineno} "#{@grammar_file_path}" + {#{sym.printer.translated_code(sym.tag)}} +#line [@oline@] [@ofile@] + break; + + STR + end.join + end + + def symbol_actions_for_destructor + @grammar.symbols.map do |sym| + next unless sym.destructor + + <<-STR + case #{sym.enum_name}: /* #{sym.comment} */ +#line #{sym.destructor.lineno} "#{@grammar_file_path}" + {#{sym.destructor.translated_code(sym.tag)}} +#line [@oline@] [@ofile@] + break; + + STR + end.join + end + + # b4_user_initial_action + def user_initial_action(comment = "") + return "" unless @grammar.initial_action + + <<-STR + #{comment} +#line #{@grammar.initial_action.line} "#{@grammar_file_path}" + {#{@grammar.initial_action.translated_code}} + STR + end + + def after_shift_function(comment = "") + return "" unless @grammar.after_shift + + <<-STR + #{comment} +#line #{@grammar.after_shift.line} "#{@grammar_file_path}" + {#{@grammar.after_shift.s_value}(#{parse_param_name});} +#line [@oline@] [@ofile@] + STR + end + + def before_reduce_function(comment = "") + return "" unless @grammar.before_reduce + + <<-STR + #{comment} +#line #{@grammar.before_reduce.line} "#{@grammar_file_path}" + {#{@grammar.before_reduce.s_value}(yylen#{user_args});} +#line [@oline@] [@ofile@] + STR + end + + def after_reduce_function(comment = "") + return "" unless @grammar.after_reduce + + <<-STR + #{comment} +#line #{@grammar.after_reduce.line} "#{@grammar_file_path}" + {#{@grammar.after_reduce.s_value}(yylen#{user_args});} +#line [@oline@] [@ofile@] + STR + end + + def after_shift_error_token_function(comment = "") + return "" unless @grammar.after_shift_error_token + + <<-STR + #{comment} +#line #{@grammar.after_shift_error_token.line} "#{@grammar_file_path}" + {#{@grammar.after_shift_error_token.s_value}(#{parse_param_name});} +#line [@oline@] [@ofile@] + STR + end + + def after_pop_stack_function(len, comment = "") + return "" unless @grammar.after_pop_stack + + <<-STR + #{comment} +#line #{@grammar.after_pop_stack.line} "#{@grammar_file_path}" + {#{@grammar.after_pop_stack.s_value}(#{len}#{user_args});} +#line [@oline@] [@ofile@] + STR + end + + def symbol_actions_for_error_token + @grammar.symbols.map do |sym| + next unless sym.error_token + + <<-STR + case #{sym.enum_name}: /* #{sym.comment} */ +#line #{sym.error_token.lineno} "#{@grammar_file_path}" + {#{sym.error_token.translated_code(sym.tag)}} +#line [@oline@] [@ofile@] + break; + + STR + end.join + end + + # b4_user_actions + def user_actions + action = @context.states.rules.map do |rule| + next unless rule.token_code + + code = rule.token_code + spaces = " " * (code.column - 1) + + <<-STR + case #{rule.id + 1}: /* #{rule.as_comment} */ +#line #{code.line} "#{@grammar_file_path}" +#{spaces}{#{rule.translated_code}} +#line [@oline@] [@ofile@] + break; + + STR + end.join + + action + <<-STR + +#line [@oline@] [@ofile@] + STR + end + + def omit_blanks(param) + param.strip + end + + # b4_parse_param + def parse_param + if @grammar.parse_param + omit_blanks(@grammar.parse_param) + else + "" + end + end + + def lex_param + if @grammar.lex_param + omit_blanks(@grammar.lex_param) + else + "" + end + end + + # b4_user_formals + def user_formals + if @grammar.parse_param + ", #{parse_param}" + else + "" + end + end + + # b4_user_args + def user_args + if @grammar.parse_param + ", #{parse_param_name}" + else + "" + end + end + + def extract_param_name(param) + param[/\b([a-zA-Z0-9_]+)(?=\s*\z)/] + end + + def parse_param_name + if @grammar.parse_param + extract_param_name(parse_param) + else + "" + end + end + + def lex_param_name + if @grammar.lex_param + extract_param_name(lex_param) + else + "" + end + end + + # b4_parse_param_use + def parse_param_use(val, loc) + str = <<-STR.dup + YY_USE (#{val}); + YY_USE (#{loc}); + STR + + if @grammar.parse_param + str << " YY_USE (#{parse_param_name});" + end + + str + end + + # b4_yylex_formals + def yylex_formals + ary = ["&yylval"] + ary << "&yylloc" if @grammar.locations + + if @grammar.lex_param + ary << lex_param_name + end + + "(#{ary.join(', ')})" + end + + # b4_table_value_equals + def table_value_equals(table, value, literal, symbol) + if literal < table.min || table.max < literal + "0" + else + "((#{value}) == #{symbol})" + end + end + + # b4_yyerror_args + def yyerror_args + ary = ["&yylloc"] + + if @grammar.parse_param + ary << parse_param_name + end + + "#{ary.join(', ')}" + end + + def template_basename + File.basename(template_file) + end + + def aux + @grammar.aux + end + + def int_array_to_string(ary) + last = ary.count - 1 + + ary.each_with_index.each_slice(10).map do |slice| + " " + slice.map { |e, i| sprintf("%6d%s", e, (i == last) ? "" : ",") }.join + end.join("\n") + end + + def spec_mapped_header_file + @header_file_path + end + + def b4_cpp_guard__b4_spec_mapped_header_file + if @header_file_path + "YY_YY_" + @header_file_path.gsub(/[^a-zA-Z_0-9]+/, "_").upcase + "_INCLUDED" + else + "" + end + end + + # b4_percent_code_get + def percent_code(name) + @grammar.percent_codes.select do |percent_code| + percent_code.name == name + end.map do |percent_code| + percent_code.code + end.join + end + + private + + def eval_template(file, path) + tmp = render_template(file) + replace_special_variables(tmp, path) + end + + def render_template(file) + erb = self.class.erb(File.read(file)) + erb.filename = file + erb.result_with_hash(context: @context, output: self) + end + + def template_file + File.join(template_dir, @template_name) + end + + def header_template_file + File.join(template_dir, "bison/yacc.h") + end + + def partial_file(file) + File.join(template_dir, file) + end + + def template_dir + File.expand_path('../../template', __dir__) + end + + def string_array_to_string(ary) + result = "" + tmp = " " + + ary.each do |s| + replaced = s.gsub('\\', '\\\\\\\\').gsub('"', '\\"') + if (tmp + replaced + " \"\",").length > 75 + result = "#{result}#{tmp}\n" + tmp = " \"#{replaced}\"," + else + tmp = "#{tmp} \"#{replaced}\"," + end + end + + result + tmp + end + + def replace_special_variables(str, ofile) + str.each_line.with_index(1).map do |line, i| + line.gsub!("[@oline@]", (i + 1).to_s) + line.gsub!("[@ofile@]", "\"#{ofile}\"") + line + end.join + end + end +end diff --git a/tools/lrama/lib/lrama/parser.rb b/tools/lrama/lib/lrama/parser.rb new file mode 100644 index 0000000000..177e784e5c --- /dev/null +++ b/tools/lrama/lib/lrama/parser.rb @@ -0,0 +1,2144 @@ +# +# DO NOT MODIFY!!!! +# This file is automatically generated by Racc 1.8.1 +# from Racc grammar file "parser.y". +# + +###### racc/parser.rb begin +unless $".find {|p| p.end_with?('/racc/parser.rb')} +$".push "#{__dir__}/racc/parser.rb" +self.class.module_eval(<<'...end racc/parser.rb/module_eval...', 'racc/parser.rb', 1) +#-- +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# +# As a special exception, when this code is copied by Racc +# into a Racc output file, you may use that output file +# without restriction. +#++ + +unless $".find {|p| p.end_with?('/racc/info.rb')} +$".push "#{__dir__}/racc/info.rb" + +module Racc + VERSION = '1.8.1' + Version = VERSION + Copyright = 'Copyright (c) 1999-2006 Minero Aoki' +end + +end + + +module Racc + class ParseError < StandardError; end +end +unless defined?(::ParseError) + ParseError = Racc::ParseError # :nodoc: +end + +# Racc is an LALR(1) parser generator. +# It is written in Ruby itself, and generates Ruby programs. +# +# == Command-line Reference +# +# racc [-ofilename] [--output-file=filename] +# [-erubypath] [--executable=rubypath] +# [-v] [--verbose] +# [-Ofilename] [--log-file=filename] +# [-g] [--debug] +# [-E] [--embedded] +# [-l] [--no-line-convert] +# [-c] [--line-convert-all] +# [-a] [--no-omit-actions] +# [-C] [--check-only] +# [-S] [--output-status] +# [--version] [--copyright] [--help] grammarfile +# +# [+grammarfile+] +# Racc grammar file. Any extension is permitted. +# [-o+outfile+, --output-file=+outfile+] +# A filename for output. default is <+filename+>.tab.rb +# [-O+filename+, --log-file=+filename+] +# Place logging output in file +filename+. +# Default log file name is <+filename+>.output. +# [-e+rubypath+, --executable=+rubypath+] +# output executable file(mode 755). where +path+ is the Ruby interpreter. +# [-v, --verbose] +# verbose mode. create +filename+.output file, like yacc's y.output file. +# [-g, --debug] +# add debug code to parser class. To display debugging information, +# use this '-g' option and set @yydebug true in parser class. +# [-E, --embedded] +# Output parser which doesn't need runtime files (racc/parser.rb). +# [-F, --frozen] +# Output parser which declares frozen_string_literals: true +# [-C, --check-only] +# Check syntax of racc grammar file and quit. +# [-S, --output-status] +# Print messages time to time while compiling. +# [-l, --no-line-convert] +# turns off line number converting. +# [-c, --line-convert-all] +# Convert line number of actions, inner, header and footer. +# [-a, --no-omit-actions] +# Call all actions, even if an action is empty. +# [--version] +# print Racc version and quit. +# [--copyright] +# Print copyright and quit. +# [--help] +# Print usage and quit. +# +# == Generating Parser Using Racc +# +# To compile Racc grammar file, simply type: +# +# $ racc parse.y +# +# This creates Ruby script file "parse.tab.y". The -o option can change the output filename. +# +# == Writing A Racc Grammar File +# +# If you want your own parser, you have to write a grammar file. +# A grammar file contains the name of your parser class, grammar for the parser, +# user code, and anything else. +# When writing a grammar file, yacc's knowledge is helpful. +# If you have not used yacc before, Racc is not too difficult. +# +# Here's an example Racc grammar file. +# +# class Calcparser +# rule +# target: exp { print val[0] } +# +# exp: exp '+' exp +# | exp '*' exp +# | '(' exp ')' +# | NUMBER +# end +# +# Racc grammar files resemble yacc files. +# But (of course), this is Ruby code. +# yacc's $$ is the 'result', $0, $1... is +# an array called 'val', and $-1, $-2... is an array called '_values'. +# +# See the {Grammar File Reference}[rdoc-ref:lib/racc/rdoc/grammar.en.rdoc] for +# more information on grammar files. +# +# == Parser +# +# Then you must prepare the parse entry method. There are two types of +# parse methods in Racc, Racc::Parser#do_parse and Racc::Parser#yyparse +# +# Racc::Parser#do_parse is simple. +# +# It's yyparse() of yacc, and Racc::Parser#next_token is yylex(). +# This method must returns an array like [TOKENSYMBOL, ITS_VALUE]. +# EOF is [false, false]. +# (TOKENSYMBOL is a Ruby symbol (taken from String#intern) by default. +# If you want to change this, see the grammar reference. +# +# Racc::Parser#yyparse is little complicated, but useful. +# It does not use Racc::Parser#next_token, instead it gets tokens from any iterator. +# +# For example, yyparse(obj, :scan) causes +# calling +obj#scan+, and you can return tokens by yielding them from +obj#scan+. +# +# == Debugging +# +# When debugging, "-v" or/and the "-g" option is helpful. +# +# "-v" creates verbose log file (.output). +# "-g" creates a "Verbose Parser". +# Verbose Parser prints the internal status when parsing. +# But it's _not_ automatic. +# You must use -g option and set +@yydebug+ to +true+ in order to get output. +# -g option only creates the verbose parser. +# +# === Racc reported syntax error. +# +# Isn't there too many "end"? +# grammar of racc file is changed in v0.10. +# +# Racc does not use '%' mark, while yacc uses huge number of '%' marks.. +# +# === Racc reported "XXXX conflicts". +# +# Try "racc -v xxxx.y". +# It causes producing racc's internal log file, xxxx.output. +# +# === Generated parsers does not work correctly +# +# Try "racc -g xxxx.y". +# This command let racc generate "debugging parser". +# Then set @yydebug=true in your parser. +# It produces a working log of your parser. +# +# == Re-distributing Racc runtime +# +# A parser, which is created by Racc, requires the Racc runtime module; +# racc/parser.rb. +# +# Ruby 1.8.x comes with Racc runtime module, +# you need NOT distribute Racc runtime files. +# +# If you want to include the Racc runtime module with your parser. +# This can be done by using '-E' option: +# +# $ racc -E -omyparser.rb myparser.y +# +# This command creates myparser.rb which `includes' Racc runtime. +# Only you must do is to distribute your parser file (myparser.rb). +# +# Note: parser.rb is ruby license, but your parser is not. +# Your own parser is completely yours. +module Racc + + unless defined?(Racc_No_Extensions) + Racc_No_Extensions = false # :nodoc: + end + + class Parser + + Racc_Runtime_Version = ::Racc::VERSION + Racc_Runtime_Core_Version_R = ::Racc::VERSION + + begin + if Object.const_defined?(:RUBY_ENGINE) and RUBY_ENGINE == 'jruby' + require 'jruby' + require 'racc/cparse-jruby.jar' + com.headius.racc.Cparse.new.load(JRuby.runtime, false) + else + require 'racc/cparse' + end + + unless new.respond_to?(:_racc_do_parse_c, true) + raise LoadError, 'old cparse.so' + end + if Racc_No_Extensions + raise LoadError, 'selecting ruby version of racc runtime core' + end + + Racc_Main_Parsing_Routine = :_racc_do_parse_c # :nodoc: + Racc_YY_Parse_Method = :_racc_yyparse_c # :nodoc: + Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_C # :nodoc: + Racc_Runtime_Type = 'c' # :nodoc: + rescue LoadError + Racc_Main_Parsing_Routine = :_racc_do_parse_rb + Racc_YY_Parse_Method = :_racc_yyparse_rb + Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_R + Racc_Runtime_Type = 'ruby' + end + + def Parser.racc_runtime_type # :nodoc: + Racc_Runtime_Type + end + + def _racc_setup + @yydebug = false unless self.class::Racc_debug_parser + @yydebug = false unless defined?(@yydebug) + if @yydebug + @racc_debug_out = $stderr unless defined?(@racc_debug_out) + @racc_debug_out ||= $stderr + end + arg = self.class::Racc_arg + arg[13] = true if arg.size < 14 + arg + end + + def _racc_init_sysvars + @racc_state = [0] + @racc_tstack = [] + @racc_vstack = [] + + @racc_t = nil + @racc_val = nil + + @racc_read_next = true + + @racc_user_yyerror = false + @racc_error_status = 0 + end + + # The entry point of the parser. This method is used with #next_token. + # If Racc wants to get token (and its value), calls next_token. + # + # Example: + # def parse + # @q = [[1,1], + # [2,2], + # [3,3], + # [false, '$']] + # do_parse + # end + # + # def next_token + # @q.shift + # end + class_eval <<~RUBY, __FILE__, __LINE__ + 1 + def do_parse + #{Racc_Main_Parsing_Routine}(_racc_setup(), false) + end + RUBY + + # The method to fetch next token. + # If you use #do_parse method, you must implement #next_token. + # + # The format of return value is [TOKEN_SYMBOL, VALUE]. + # +token-symbol+ is represented by Ruby's symbol by default, e.g. :IDENT + # for 'IDENT'. ";" (String) for ';'. + # + # The final symbol (End of file) must be false. + def next_token + raise NotImplementedError, "#{self.class}\#next_token is not defined" + end + + def _racc_do_parse_rb(arg, in_debug) + action_table, action_check, action_default, action_pointer, + _, _, _, _, + _, _, token_table, * = arg + + _racc_init_sysvars + tok = act = i = nil + + catch(:racc_end_parse) { + while true + if i = action_pointer[@racc_state[-1]] + if @racc_read_next + if @racc_t != 0 # not EOF + tok, @racc_val = next_token() + unless tok # EOF + @racc_t = 0 + else + @racc_t = (token_table[tok] or 1) # error token + end + racc_read_token(@racc_t, tok, @racc_val) if @yydebug + @racc_read_next = false + end + end + i += @racc_t + unless i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + else + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + end + } + end + + # Another entry point for the parser. + # If you use this method, you must implement RECEIVER#METHOD_ID method. + # + # RECEIVER#METHOD_ID is a method to get next token. + # It must 'yield' the token, which format is [TOKEN-SYMBOL, VALUE]. + class_eval <<~RUBY, __FILE__, __LINE__ + 1 + def yyparse(recv, mid) + #{Racc_YY_Parse_Method}(recv, mid, _racc_setup(), false) + end + RUBY + + def _racc_yyparse_rb(recv, mid, arg, c_debug) + action_table, action_check, action_default, action_pointer, + _, _, _, _, + _, _, token_table, * = arg + + _racc_init_sysvars + + catch(:racc_end_parse) { + until i = action_pointer[@racc_state[-1]] + while act = _racc_evalact(action_default[@racc_state[-1]], arg) + ; + end + end + recv.__send__(mid) do |tok, val| + unless tok + @racc_t = 0 + else + @racc_t = (token_table[tok] or 1) # error token + end + @racc_val = val + @racc_read_next = false + + i += @racc_t + unless i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + + while !(i = action_pointer[@racc_state[-1]]) || + ! @racc_read_next || + @racc_t == 0 # $ + unless i and i += @racc_t and + i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + end + end + } + end + + ### + ### common + ### + + def _racc_evalact(act, arg) + action_table, action_check, _, action_pointer, + _, _, _, _, + _, _, _, shift_n, + reduce_n, * = arg + nerr = 0 # tmp + + if act > 0 and act < shift_n + # + # shift + # + if @racc_error_status > 0 + @racc_error_status -= 1 unless @racc_t <= 1 # error token or EOF + end + @racc_vstack.push @racc_val + @racc_state.push act + @racc_read_next = true + if @yydebug + @racc_tstack.push @racc_t + racc_shift @racc_t, @racc_tstack, @racc_vstack + end + + elsif act < 0 and act > -reduce_n + # + # reduce + # + code = catch(:racc_jump) { + @racc_state.push _racc_do_reduce(arg, act) + false + } + if code + case code + when 1 # yyerror + @racc_user_yyerror = true # user_yyerror + return -reduce_n + when 2 # yyaccept + return shift_n + else + raise '[Racc Bug] unknown jump code' + end + end + + elsif act == shift_n + # + # accept + # + racc_accept if @yydebug + throw :racc_end_parse, @racc_vstack[0] + + elsif act == -reduce_n + # + # error + # + case @racc_error_status + when 0 + unless arg[21] # user_yyerror + nerr += 1 + on_error @racc_t, @racc_val, @racc_vstack + end + when 3 + if @racc_t == 0 # is $ + # We're at EOF, and another error occurred immediately after + # attempting auto-recovery + throw :racc_end_parse, nil + end + @racc_read_next = true + end + @racc_user_yyerror = false + @racc_error_status = 3 + while true + if i = action_pointer[@racc_state[-1]] + i += 1 # error token + if i >= 0 and + (act = action_table[i]) and + action_check[i] == @racc_state[-1] + break + end + end + throw :racc_end_parse, nil if @racc_state.size <= 1 + @racc_state.pop + @racc_vstack.pop + if @yydebug + @racc_tstack.pop + racc_e_pop @racc_state, @racc_tstack, @racc_vstack + end + end + return act + + else + raise "[Racc Bug] unknown action #{act.inspect}" + end + + racc_next_state(@racc_state[-1], @racc_state) if @yydebug + + nil + end + + def _racc_do_reduce(arg, act) + _, _, _, _, + goto_table, goto_check, goto_default, goto_pointer, + nt_base, reduce_table, _, _, + _, use_result, * = arg + + state = @racc_state + vstack = @racc_vstack + tstack = @racc_tstack + + i = act * -3 + len = reduce_table[i] + reduce_to = reduce_table[i+1] + method_id = reduce_table[i+2] + void_array = [] + + tmp_t = tstack[-len, len] if @yydebug + tmp_v = vstack[-len, len] + tstack[-len, len] = void_array if @yydebug + vstack[-len, len] = void_array + state[-len, len] = void_array + + # tstack must be updated AFTER method call + if use_result + vstack.push __send__(method_id, tmp_v, vstack, tmp_v[0]) + else + vstack.push __send__(method_id, tmp_v, vstack) + end + tstack.push reduce_to + + racc_reduce(tmp_t, reduce_to, tstack, vstack) if @yydebug + + k1 = reduce_to - nt_base + if i = goto_pointer[k1] + i += state[-1] + if i >= 0 and (curstate = goto_table[i]) and goto_check[i] == k1 + return curstate + end + end + goto_default[k1] + end + + # This method is called when a parse error is found. + # + # ERROR_TOKEN_ID is an internal ID of token which caused error. + # You can get string representation of this ID by calling + # #token_to_str. + # + # ERROR_VALUE is a value of error token. + # + # value_stack is a stack of symbol values. + # DO NOT MODIFY this object. + # + # This method raises ParseError by default. + # + # If this method returns, parsers enter "error recovering mode". + def on_error(t, val, vstack) + raise ParseError, sprintf("parse error on value %s (%s)", + val.inspect, token_to_str(t) || '?') + end + + # Enter error recovering mode. + # This method does not call #on_error. + def yyerror + throw :racc_jump, 1 + end + + # Exit parser. + # Return value is +Symbol_Value_Stack[0]+. + def yyaccept + throw :racc_jump, 2 + end + + # Leave error recovering mode. + def yyerrok + @racc_error_status = 0 + end + + # For debugging output + def racc_read_token(t, tok, val) + @racc_debug_out.print 'read ' + @racc_debug_out.print tok.inspect, '(', racc_token2str(t), ') ' + @racc_debug_out.puts val.inspect + @racc_debug_out.puts + end + + def racc_shift(tok, tstack, vstack) + @racc_debug_out.puts "shift #{racc_token2str tok}" + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_reduce(toks, sim, tstack, vstack) + out = @racc_debug_out + out.print 'reduce ' + if toks.empty? + out.print ' ' + else + toks.each {|t| out.print ' ', racc_token2str(t) } + end + out.puts " --> #{racc_token2str(sim)}" + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_accept + @racc_debug_out.puts 'accept' + @racc_debug_out.puts + end + + def racc_e_pop(state, tstack, vstack) + @racc_debug_out.puts 'error recovering mode: pop token' + racc_print_states state + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_next_state(curstate, state) + @racc_debug_out.puts "goto #{curstate}" + racc_print_states state + @racc_debug_out.puts + end + + def racc_print_stacks(t, v) + out = @racc_debug_out + out.print ' [' + t.each_index do |i| + out.print ' (', racc_token2str(t[i]), ' ', v[i].inspect, ')' + end + out.puts ' ]' + end + + def racc_print_states(s) + out = @racc_debug_out + out.print ' [' + s.each {|st| out.print ' ', st } + out.puts ' ]' + end + + def racc_token2str(tok) + self.class::Racc_token_to_s_table[tok] or + raise "[Racc Bug] can't convert token #{tok} to string" + end + + # Convert internal ID of token symbol to the string. + def token_to_str(t) + self.class::Racc_token_to_s_table[t] + end + + end + +end + +...end racc/parser.rb/module_eval... +end +###### racc/parser.rb end +module Lrama + class Parser < Racc::Parser + +module_eval(<<'...end parser.y/module_eval...', 'parser.y', 428) + +include Lrama::Report::Duration + +def initialize(text, path, debug = false, define = {}) + @grammar_file = Lrama::Lexer::GrammarFile.new(path, text) + @yydebug = debug + @rule_counter = Lrama::Grammar::Counter.new(0) + @midrule_action_counter = Lrama::Grammar::Counter.new(1) + @define = define +end + +def parse + report_duration(:parse) do + @lexer = Lrama::Lexer.new(@grammar_file) + @grammar = Lrama::Grammar.new(@rule_counter, @define) + @precedence_number = 0 + reset_precs + do_parse + @grammar + end +end + +def next_token + @lexer.next_token +end + +def on_error(error_token_id, error_value, value_stack) + if error_value.is_a?(Lrama::Lexer::Token) + location = error_value.location + value = "'#{error_value.s_value}'" + else + location = @lexer.location + value = error_value.inspect + end + + error_message = "parse error on value #{value} (#{token_to_str(error_token_id) || '?'})" + + raise_parse_error(error_message, location) +end + +def on_action_error(error_message, error_value) + if error_value.is_a?(Lrama::Lexer::Token) + location = error_value.location + else + location = @lexer.location + end + + raise_parse_error(error_message, location) +end + +private + +def reset_precs + @prec_seen = false + @code_after_prec = false +end + +def begin_c_declaration(end_symbol) + @lexer.status = :c_declaration + @lexer.end_symbol = end_symbol +end + +def end_c_declaration + @lexer.status = :initial + @lexer.end_symbol = nil +end + +def raise_parse_error(error_message, location) + raise ParseError, location.generate_error_message(error_message) +end +...end parser.y/module_eval... +##### State transition tables begin ### + +racc_action_table = [ + 89, 49, 90, 167, 49, 101, 173, 49, 101, 167, + 49, 101, 173, 6, 101, 80, 49, 49, 48, 48, + 41, 76, 76, 49, 49, 48, 48, 42, 76, 76, + 49, 49, 48, 48, 101, 96, 113, 49, 87, 48, + 150, 101, 96, 151, 45, 171, 169, 170, 151, 176, + 170, 91, 169, 170, 81, 176, 170, 20, 24, 25, + 26, 27, 28, 29, 30, 31, 87, 32, 33, 34, + 35, 36, 37, 38, 39, 49, 4, 48, 5, 101, + 96, 181, 182, 183, 128, 20, 24, 25, 26, 27, + 28, 29, 30, 31, 46, 32, 33, 34, 35, 36, + 37, 38, 39, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 53, 20, 24, 25, 26, 27, 28, 29, + 30, 31, 53, 32, 33, 34, 35, 36, 37, 38, + 39, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 44, 20, 24, 25, 26, 27, 28, 29, 30, 31, + 53, 32, 33, 34, 35, 36, 37, 38, 39, 49, + 4, 48, 5, 101, 96, 49, 49, 48, 48, 101, + 101, 49, 49, 48, 48, 101, 101, 49, 49, 48, + 197, 101, 101, 49, 49, 197, 48, 101, 101, 49, + 49, 197, 48, 101, 181, 182, 183, 128, 204, 210, + 217, 205, 205, 205, 49, 49, 48, 48, 49, 49, + 48, 48, 49, 49, 48, 48, 181, 182, 183, 116, + 117, 56, 53, 53, 53, 53, 53, 62, 63, 64, + 65, 66, 68, 68, 68, 82, 53, 53, 104, 108, + 108, 115, 122, 123, 125, 128, 129, 133, 139, 140, + 141, 142, 144, 145, 101, 154, 139, 157, 154, 161, + 162, 68, 164, 165, 172, 177, 154, 184, 128, 188, + 154, 190, 128, 154, 199, 154, 128, 68, 165, 206, + 165, 68, 68, 215, 128, 68 ] + +racc_action_check = [ + 47, 153, 47, 153, 159, 153, 159, 178, 159, 178, + 189, 178, 189, 1, 189, 39, 35, 36, 35, 36, + 5, 35, 36, 37, 38, 37, 38, 6, 37, 38, + 59, 74, 59, 74, 59, 59, 74, 60, 45, 60, + 138, 60, 60, 138, 9, 156, 153, 153, 156, 159, + 159, 47, 178, 178, 39, 189, 189, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 83, 45, 45, 45, + 45, 45, 45, 45, 45, 61, 0, 61, 0, 61, + 61, 166, 166, 166, 166, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 11, 83, 83, 83, 83, 83, + 83, 83, 83, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 13, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 14, 3, 3, 3, 3, 3, 3, 3, + 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 15, 8, 8, 8, 8, 8, 8, 8, 8, 97, + 2, 97, 2, 97, 97, 71, 108, 71, 108, 71, + 108, 109, 169, 109, 169, 109, 169, 176, 184, 176, + 184, 176, 184, 190, 205, 190, 205, 190, 205, 206, + 12, 206, 12, 206, 174, 174, 174, 174, 196, 201, + 214, 196, 201, 214, 69, 76, 69, 76, 104, 105, + 104, 105, 111, 113, 111, 113, 198, 198, 198, 81, + 81, 16, 17, 20, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 40, 51, 56, 67, 70, + 72, 80, 84, 85, 86, 87, 93, 107, 115, 116, + 117, 118, 127, 128, 134, 140, 141, 143, 144, 145, + 146, 150, 151, 152, 158, 163, 165, 167, 168, 171, + 172, 173, 175, 177, 187, 188, 192, 193, 195, 197, + 200, 202, 204, 209, 210, 216 ] + +racc_action_pointer = [ + 66, 13, 150, 90, nil, 13, 27, nil, 118, 35, + nil, 88, 187, 63, 73, 101, 216, 173, nil, nil, + 174, nil, nil, nil, 175, 176, 177, 222, 223, 224, + 225, 226, 224, 225, 226, 13, 14, 20, 21, 10, + 233, nil, nil, nil, nil, 34, nil, -5, nil, nil, + nil, 187, nil, nil, nil, nil, 188, nil, nil, 27, + 34, 72, nil, nil, nil, nil, nil, 230, nil, 201, + 231, 162, 232, nil, 28, nil, 202, nil, nil, nil, + 200, 215, nil, 62, 233, 221, 222, 191, nil, nil, + nil, nil, nil, 244, nil, nil, nil, 156, nil, nil, + nil, nil, nil, nil, 205, 206, nil, 241, 163, 168, + nil, 209, nil, 210, nil, 243, 206, 209, 240, nil, + nil, nil, nil, nil, nil, nil, nil, 209, 248, nil, + nil, nil, nil, nil, 247, nil, nil, nil, -2, nil, + 208, 251, nil, 255, 211, 204, 210, nil, nil, nil, + 253, 257, 217, -2, nil, nil, 3, nil, 218, 1, + nil, nil, nil, 222, nil, 219, 30, 226, 214, 169, + nil, 226, 223, 230, 143, 218, 174, 226, 4, nil, + nil, nil, nil, nil, 175, nil, nil, 272, 228, 7, + 180, nil, 222, 269, nil, 232, 156, 238, 165, nil, + 234, 157, 273, nil, 274, 181, 186, nil, nil, 233, + 230, nil, nil, nil, 158, nil, 277, nil, nil ] + +racc_action_default = [ + -1, -128, -1, -3, -10, -128, -128, -2, -3, -128, + -16, -128, -128, -128, -128, -128, -128, -128, -24, -25, + -128, -32, -33, -34, -128, -128, -128, -128, -128, -128, + -128, -128, -50, -50, -50, -128, -128, -128, -128, -128, + -128, -13, 219, -4, -26, -128, -17, -123, -93, -94, + -122, -14, -19, -85, -20, -21, -128, -23, -31, -128, + -128, -128, -38, -39, -40, -41, -42, -43, -51, -128, + -44, -128, -45, -46, -88, -90, -128, -47, -48, -49, + -128, -128, -11, -5, -7, -95, -128, -68, -18, -124, + -125, -126, -15, -128, -22, -27, -28, -29, -35, -83, + -84, -127, -36, -37, -128, -52, -54, -56, -128, -79, + -81, -88, -89, -128, -91, -128, -128, -128, -128, -6, + -8, -9, -120, -96, -97, -98, -69, -128, -128, -86, + -30, -55, -53, -57, -76, -82, -80, -92, -128, -62, + -66, -128, -12, -128, -66, -128, -128, -58, -77, -78, + -50, -128, -60, -64, -67, -70, -128, -121, -99, -100, + -102, -119, -87, -128, -63, -66, -68, -93, -68, -128, + -116, -128, -66, -93, -68, -68, -128, -66, -65, -71, + -72, -108, -109, -110, -128, -74, -75, -128, -66, -101, + -128, -103, -68, -50, -107, -59, -128, -93, -111, -117, + -61, -128, -50, -106, -50, -128, -128, -112, -113, -128, + -68, -104, -73, -114, -128, -118, -50, -115, -105 ] + +racc_goto_table = [ + 69, 109, 50, 152, 57, 127, 84, 58, 112, 160, + 114, 59, 60, 61, 86, 52, 54, 55, 98, 102, + 103, 159, 106, 110, 175, 74, 74, 74, 74, 138, + 9, 1, 3, 180, 7, 43, 120, 160, 109, 109, + 195, 192, 121, 94, 119, 112, 40, 137, 118, 189, + 47, 200, 86, 92, 175, 156, 130, 131, 132, 107, + 135, 136, 88, 196, 111, 207, 111, 70, 72, 201, + 73, 77, 78, 79, 67, 147, 134, 178, 148, 149, + 93, 146, 124, 166, 179, 214, 185, 158, 208, 174, + 187, 209, 191, 193, 107, 107, 143, nil, nil, 186, + nil, 111, nil, 111, nil, nil, 194, nil, 166, nil, + 202, nil, nil, nil, 198, nil, nil, nil, 163, 174, + 198, nil, nil, nil, nil, nil, nil, nil, 216, nil, + nil, nil, nil, nil, nil, 213, 198, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, 203, nil, nil, nil, nil, nil, nil, nil, nil, + 211, nil, 212, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, 218 ] + +racc_goto_check = [ + 27, 20, 29, 33, 15, 40, 8, 15, 46, 39, + 46, 15, 15, 15, 12, 16, 16, 16, 22, 22, + 22, 50, 28, 43, 38, 29, 29, 29, 29, 32, + 7, 1, 6, 36, 6, 7, 5, 39, 20, 20, + 33, 36, 9, 15, 8, 46, 10, 46, 11, 50, + 13, 33, 12, 16, 38, 32, 22, 28, 28, 29, + 43, 43, 14, 37, 29, 36, 29, 24, 24, 37, + 25, 25, 25, 25, 23, 30, 31, 34, 41, 42, + 44, 45, 48, 20, 40, 37, 40, 49, 51, 20, + 52, 53, 40, 40, 29, 29, 54, nil, nil, 20, + nil, 29, nil, 29, nil, nil, 20, nil, 20, nil, + 40, nil, nil, nil, 20, nil, nil, nil, 27, 20, + 20, nil, nil, nil, nil, nil, nil, nil, 40, nil, + nil, nil, nil, nil, nil, 20, 20, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, 27, nil, nil, nil, nil, nil, nil, nil, nil, + 27, nil, 27, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, 27 ] + +racc_goto_pointer = [ + nil, 31, nil, nil, nil, -48, 32, 27, -39, -42, + 42, -34, -31, 38, 15, -13, 2, nil, nil, nil, + -70, nil, -41, 42, 34, 35, nil, -32, -47, -10, + -59, -31, -86, -137, -88, nil, -133, -121, -135, -135, + -82, -56, -55, -48, 27, -48, -66, nil, -3, -57, + -123, -110, -80, -108, -26 ] + +racc_goto_default = [ + nil, nil, 2, 8, 83, nil, nil, nil, nil, nil, + nil, nil, 10, nil, nil, 51, nil, 21, 22, 23, + 95, 97, nil, nil, nil, nil, 105, 71, nil, 99, + nil, nil, nil, nil, 153, 126, nil, nil, 168, 155, + nil, 100, nil, nil, nil, nil, 75, 85, nil, nil, + nil, nil, nil, nil, nil ] + +racc_reduce_table = [ + 0, 0, :racc_error, + 0, 63, :_reduce_1, + 2, 63, :_reduce_2, + 0, 64, :_reduce_3, + 2, 64, :_reduce_4, + 1, 65, :_reduce_5, + 2, 65, :_reduce_6, + 0, 66, :_reduce_none, + 1, 66, :_reduce_none, + 5, 58, :_reduce_none, + 0, 67, :_reduce_10, + 0, 68, :_reduce_11, + 5, 59, :_reduce_12, + 2, 59, :_reduce_none, + 1, 73, :_reduce_14, + 2, 73, :_reduce_15, + 1, 60, :_reduce_none, + 2, 60, :_reduce_17, + 3, 60, :_reduce_18, + 2, 60, :_reduce_none, + 2, 60, :_reduce_20, + 2, 60, :_reduce_21, + 3, 60, :_reduce_22, + 2, 60, :_reduce_23, + 1, 60, :_reduce_24, + 1, 60, :_reduce_25, + 2, 60, :_reduce_none, + 1, 78, :_reduce_27, + 1, 78, :_reduce_28, + 1, 79, :_reduce_29, + 2, 79, :_reduce_30, + 2, 69, :_reduce_31, + 1, 69, :_reduce_none, + 1, 69, :_reduce_none, + 1, 69, :_reduce_none, + 3, 69, :_reduce_35, + 3, 69, :_reduce_36, + 3, 69, :_reduce_37, + 2, 69, :_reduce_38, + 2, 69, :_reduce_39, + 2, 69, :_reduce_40, + 2, 69, :_reduce_41, + 2, 69, :_reduce_42, + 2, 74, :_reduce_none, + 2, 74, :_reduce_44, + 2, 74, :_reduce_45, + 2, 74, :_reduce_46, + 2, 74, :_reduce_47, + 2, 74, :_reduce_48, + 2, 74, :_reduce_49, + 0, 84, :_reduce_none, + 1, 84, :_reduce_none, + 1, 85, :_reduce_52, + 2, 85, :_reduce_53, + 2, 80, :_reduce_54, + 3, 80, :_reduce_55, + 0, 88, :_reduce_none, + 1, 88, :_reduce_none, + 3, 83, :_reduce_58, + 8, 75, :_reduce_59, + 5, 76, :_reduce_60, + 8, 76, :_reduce_61, + 1, 89, :_reduce_62, + 3, 89, :_reduce_63, + 1, 90, :_reduce_64, + 3, 90, :_reduce_65, + 0, 96, :_reduce_none, + 1, 96, :_reduce_none, + 0, 97, :_reduce_none, + 1, 97, :_reduce_none, + 1, 91, :_reduce_70, + 3, 91, :_reduce_71, + 3, 91, :_reduce_72, + 6, 91, :_reduce_73, + 3, 91, :_reduce_74, + 3, 91, :_reduce_75, + 0, 99, :_reduce_none, + 1, 99, :_reduce_none, + 1, 87, :_reduce_78, + 1, 100, :_reduce_79, + 2, 100, :_reduce_80, + 2, 81, :_reduce_81, + 3, 81, :_reduce_82, + 1, 77, :_reduce_none, + 1, 77, :_reduce_none, + 0, 101, :_reduce_85, + 0, 102, :_reduce_86, + 5, 72, :_reduce_87, + 1, 103, :_reduce_88, + 2, 103, :_reduce_89, + 1, 82, :_reduce_90, + 2, 82, :_reduce_91, + 3, 82, :_reduce_92, + 1, 86, :_reduce_93, + 1, 86, :_reduce_94, + 0, 105, :_reduce_none, + 1, 105, :_reduce_none, + 2, 61, :_reduce_none, + 2, 61, :_reduce_none, + 4, 104, :_reduce_99, + 1, 106, :_reduce_100, + 3, 106, :_reduce_101, + 1, 107, :_reduce_102, + 3, 107, :_reduce_103, + 5, 107, :_reduce_104, + 7, 107, :_reduce_105, + 4, 107, :_reduce_106, + 3, 107, :_reduce_107, + 1, 93, :_reduce_108, + 1, 93, :_reduce_109, + 1, 93, :_reduce_110, + 0, 108, :_reduce_none, + 1, 108, :_reduce_none, + 2, 94, :_reduce_113, + 3, 94, :_reduce_114, + 4, 94, :_reduce_115, + 0, 109, :_reduce_116, + 0, 110, :_reduce_117, + 5, 95, :_reduce_118, + 3, 92, :_reduce_119, + 0, 111, :_reduce_120, + 3, 62, :_reduce_121, + 1, 70, :_reduce_none, + 0, 71, :_reduce_none, + 1, 71, :_reduce_none, + 1, 71, :_reduce_none, + 1, 71, :_reduce_none, + 1, 98, :_reduce_127 ] + +racc_reduce_n = 128 + +racc_shift_n = 219 + +racc_token_table = { + false => 0, + :error => 1, + :C_DECLARATION => 2, + :CHARACTER => 3, + :IDENT_COLON => 4, + :IDENTIFIER => 5, + :INTEGER => 6, + :STRING => 7, + :TAG => 8, + "%%" => 9, + "%{" => 10, + "%}" => 11, + "%require" => 12, + "%expect" => 13, + "%define" => 14, + "%param" => 15, + "%lex-param" => 16, + "%parse-param" => 17, + "%code" => 18, + "%initial-action" => 19, + "%no-stdlib" => 20, + "%locations" => 21, + ";" => 22, + "%union" => 23, + "%destructor" => 24, + "%printer" => 25, + "%error-token" => 26, + "%after-shift" => 27, + "%before-reduce" => 28, + "%after-reduce" => 29, + "%after-shift-error-token" => 30, + "%after-pop-stack" => 31, + "-temp-group" => 32, + "%token" => 33, + "%type" => 34, + "%nterm" => 35, + "%left" => 36, + "%right" => 37, + "%precedence" => 38, + "%nonassoc" => 39, + "%rule" => 40, + "(" => 41, + ")" => 42, + ":" => 43, + "%inline" => 44, + "," => 45, + "|" => 46, + "%empty" => 47, + "%prec" => 48, + "{" => 49, + "}" => 50, + "?" => 51, + "+" => 52, + "*" => 53, + "[" => 54, + "]" => 55, + "{...}" => 56 } + +racc_nt_base = 57 + +racc_use_result_var = true + +Racc_arg = [ + racc_action_table, + racc_action_check, + racc_action_default, + racc_action_pointer, + racc_goto_table, + racc_goto_check, + racc_goto_default, + racc_goto_pointer, + racc_nt_base, + racc_reduce_table, + racc_token_table, + racc_shift_n, + racc_reduce_n, + racc_use_result_var ] +Ractor.make_shareable(Racc_arg) if defined?(Ractor) + +Racc_token_to_s_table = [ + "$end", + "error", + "C_DECLARATION", + "CHARACTER", + "IDENT_COLON", + "IDENTIFIER", + "INTEGER", + "STRING", + "TAG", + "\"%%\"", + "\"%{\"", + "\"%}\"", + "\"%require\"", + "\"%expect\"", + "\"%define\"", + "\"%param\"", + "\"%lex-param\"", + "\"%parse-param\"", + "\"%code\"", + "\"%initial-action\"", + "\"%no-stdlib\"", + "\"%locations\"", + "\";\"", + "\"%union\"", + "\"%destructor\"", + "\"%printer\"", + "\"%error-token\"", + "\"%after-shift\"", + "\"%before-reduce\"", + "\"%after-reduce\"", + "\"%after-shift-error-token\"", + "\"%after-pop-stack\"", + "\"-temp-group\"", + "\"%token\"", + "\"%type\"", + "\"%nterm\"", + "\"%left\"", + "\"%right\"", + "\"%precedence\"", + "\"%nonassoc\"", + "\"%rule\"", + "\"(\"", + "\")\"", + "\":\"", + "\"%inline\"", + "\",\"", + "\"|\"", + "\"%empty\"", + "\"%prec\"", + "\"{\"", + "\"}\"", + "\"?\"", + "\"+\"", + "\"*\"", + "\"[\"", + "\"]\"", + "\"{...}\"", + "$start", + "input", + "prologue_declaration", + "bison_declaration", + "rules_or_grammar_declaration", + "epilogue_declaration", + "\"-many@prologue_declaration\"", + "\"-many@bison_declaration\"", + "\"-many1@rules_or_grammar_declaration\"", + "\"-option@epilogue_declaration\"", + "@1", + "@2", + "grammar_declaration", + "variable", + "value", + "param", + "\"-many1@param\"", + "symbol_declaration", + "rule_declaration", + "inline_declaration", + "symbol", + "\"-group@symbol|TAG\"", + "\"-many1@-group@symbol|TAG\"", + "token_declarations", + "symbol_declarations", + "token_declarations_for_precedence", + "token_declaration", + "\"-option@TAG\"", + "\"-many1@token_declaration\"", + "id", + "alias", + "\"-option@INTEGER\"", + "rule_args", + "rule_rhs_list", + "rule_rhs", + "named_ref", + "parameterizing_suffix", + "parameterizing_args", + "midrule_action", + "\"-option@%empty\"", + "\"-option@named_ref\"", + "string_as_id", + "\"-option@string_as_id\"", + "\"-many1@symbol\"", + "@3", + "@4", + "\"-many1@id\"", + "rules", + "\"-option@;\"", + "rhs_list", + "rhs", + "\"-option@parameterizing_suffix\"", + "@5", + "@6", + "@7" ] +Ractor.make_shareable(Racc_token_to_s_table) if defined?(Ractor) + +Racc_debug_parser = true + +##### State transition tables end ##### + +# reduce 0 omitted + +module_eval(<<'.,.,', 'parser.y', 11) + def _reduce_1(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 11) + def _reduce_2(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 11) + def _reduce_3(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 11) + def _reduce_4(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 11) + def _reduce_5(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 11) + def _reduce_6(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +# reduce 7 omitted + +# reduce 8 omitted + +# reduce 9 omitted + +module_eval(<<'.,.,', 'parser.y', 12) + def _reduce_10(val, _values, result) + begin_c_declaration("%}") + @grammar.prologue_first_lineno = @lexer.line + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 17) + def _reduce_11(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 21) + def _reduce_12(val, _values, result) + @grammar.prologue = val[2].s_value + + result + end +.,., + +# reduce 13 omitted + +module_eval(<<'.,.,', 'parser.y', 54) + def _reduce_14(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 54) + def _reduce_15(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +# reduce 16 omitted + +module_eval(<<'.,.,', 'parser.y', 26) + def _reduce_17(val, _values, result) + @grammar.expect = val[1] + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 27) + def _reduce_18(val, _values, result) + @grammar.define[val[1].s_value] = val[2]&.s_value + result + end +.,., + +# reduce 19 omitted + +module_eval(<<'.,.,', 'parser.y', 31) + def _reduce_20(val, _values, result) + val[1].each {|token| + @grammar.lex_param = Grammar::Code::NoReferenceCode.new(type: :lex_param, token_code: token).token_code.s_value + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 37) + def _reduce_21(val, _values, result) + val[1].each {|token| + @grammar.parse_param = Grammar::Code::NoReferenceCode.new(type: :parse_param, token_code: token).token_code.s_value + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 43) + def _reduce_22(val, _values, result) + @grammar.add_percent_code(id: val[1], code: val[2]) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 47) + def _reduce_23(val, _values, result) + @grammar.initial_action = Grammar::Code::InitialActionCode.new(type: :initial_action, token_code: val[1]) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 49) + def _reduce_24(val, _values, result) + @grammar.no_stdlib = true + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 50) + def _reduce_25(val, _values, result) + @grammar.locations = true + result + end +.,., + +# reduce 26 omitted + +module_eval(<<'.,.,', 'parser.y', 109) + def _reduce_27(val, _values, result) + result = val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 109) + def _reduce_28(val, _values, result) + result = val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 109) + def _reduce_29(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 109) + def _reduce_30(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 55) + def _reduce_31(val, _values, result) + @grammar.set_union( + Grammar::Code::NoReferenceCode.new(type: :union, token_code: val[1]), + val[1].line + ) + + result + end +.,., + +# reduce 32 omitted + +# reduce 33 omitted + +# reduce 34 omitted + +module_eval(<<'.,.,', 'parser.y', 65) + def _reduce_35(val, _values, result) + @grammar.add_destructor( + ident_or_tags: val[2].flatten, + token_code: val[1], + lineno: val[1].line + ) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 73) + def _reduce_36(val, _values, result) + @grammar.add_printer( + ident_or_tags: val[2].flatten, + token_code: val[1], + lineno: val[1].line + ) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 81) + def _reduce_37(val, _values, result) + @grammar.add_error_token( + ident_or_tags: val[2].flatten, + token_code: val[1], + lineno: val[1].line + ) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 89) + def _reduce_38(val, _values, result) + @grammar.after_shift = val[1] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 93) + def _reduce_39(val, _values, result) + @grammar.before_reduce = val[1] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 97) + def _reduce_40(val, _values, result) + @grammar.after_reduce = val[1] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 101) + def _reduce_41(val, _values, result) + @grammar.after_shift_error_token = val[1] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 105) + def _reduce_42(val, _values, result) + @grammar.after_pop_stack = val[1] + + result + end +.,., + +# reduce 43 omitted + +module_eval(<<'.,.,', 'parser.y', 111) + def _reduce_44(val, _values, result) + val[1].each {|hash| + hash[:tokens].each {|id| + @grammar.add_type(id: id, tag: hash[:tag]) + } + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 119) + def _reduce_45(val, _values, result) + val[1].each {|hash| + hash[:tokens].each {|id| + if @grammar.find_term_by_s_value(id.s_value) + on_action_error("symbol #{id.s_value} redeclared as a nonterminal", id) + else + @grammar.add_type(id: id, tag: hash[:tag]) + end + } + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 131) + def _reduce_46(val, _values, result) + val[1].each {|hash| + hash[:tokens].each {|id| + sym = @grammar.add_term(id: id) + @grammar.add_left(sym, @precedence_number) + } + } + @precedence_number += 1 + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 141) + def _reduce_47(val, _values, result) + val[1].each {|hash| + hash[:tokens].each {|id| + sym = @grammar.add_term(id: id) + @grammar.add_right(sym, @precedence_number) + } + } + @precedence_number += 1 + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 151) + def _reduce_48(val, _values, result) + val[1].each {|hash| + hash[:tokens].each {|id| + sym = @grammar.add_term(id: id) + @grammar.add_precedence(sym, @precedence_number) + } + } + @precedence_number += 1 + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 161) + def _reduce_49(val, _values, result) + val[1].each {|hash| + hash[:tokens].each {|id| + sym = @grammar.add_term(id: id) + @grammar.add_nonassoc(sym, @precedence_number) + } + } + @precedence_number += 1 + + result + end +.,., + +# reduce 50 omitted + +# reduce 51 omitted + +module_eval(<<'.,.,', 'parser.y', 184) + def _reduce_52(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 184) + def _reduce_53(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 172) + def _reduce_54(val, _values, result) + val[1].each {|token_declaration| + @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: val[0], replace: true) + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 178) + def _reduce_55(val, _values, result) + val[2].each {|token_declaration| + @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: val[1], replace: true) + } + + result + end +.,., + +# reduce 56 omitted + +# reduce 57 omitted + +module_eval(<<'.,.,', 'parser.y', 183) + def _reduce_58(val, _values, result) + result = val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 187) + def _reduce_59(val, _values, result) + rule = Grammar::ParameterizingRule::Rule.new(val[1].s_value, val[3], val[7], tag: val[5]) + @grammar.add_parameterizing_rule(rule) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 193) + def _reduce_60(val, _values, result) + rule = Grammar::ParameterizingRule::Rule.new(val[2].s_value, [], val[4], is_inline: true) + @grammar.add_parameterizing_rule(rule) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 198) + def _reduce_61(val, _values, result) + rule = Grammar::ParameterizingRule::Rule.new(val[2].s_value, val[4], val[7], is_inline: true) + @grammar.add_parameterizing_rule(rule) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 202) + def _reduce_62(val, _values, result) + result = [val[0]] + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 203) + def _reduce_63(val, _values, result) + result = val[0].append(val[2]) + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 207) + def _reduce_64(val, _values, result) + builder = val[0] + result = [builder] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 212) + def _reduce_65(val, _values, result) + builder = val[2] + result = val[0].append(builder) + + result + end +.,., + +# reduce 66 omitted + +# reduce 67 omitted + +# reduce 68 omitted + +# reduce 69 omitted + +module_eval(<<'.,.,', 'parser.y', 218) + def _reduce_70(val, _values, result) + reset_precs + result = Grammar::ParameterizingRule::Rhs.new + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 223) + def _reduce_71(val, _values, result) + token = val[1] + token.alias_name = val[2] + builder = val[0] + builder.symbols << token + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 231) + def _reduce_72(val, _values, result) + builder = val[0] + builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], location: @lexer.location, args: [val[1]]) + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 237) + def _reduce_73(val, _values, result) + builder = val[0] + builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[3], lhs_tag: val[5]) + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 243) + def _reduce_74(val, _values, result) + user_code = val[1] + user_code.alias_name = val[2] + builder = val[0] + builder.user_code = user_code + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 251) + def _reduce_75(val, _values, result) + sym = @grammar.find_symbol_by_id!(val[2]) + @prec_seen = true + builder = val[0] + builder.precedence_sym = sym + result = builder + + result + end +.,., + +# reduce 76 omitted + +# reduce 77 omitted + +module_eval(<<'.,.,', 'parser.y', 258) + def _reduce_78(val, _values, result) + result = val[0].s_value if val[0] + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 271) + def _reduce_79(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 271) + def _reduce_80(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 262) + def _reduce_81(val, _values, result) + result = if val[0] + [{tag: val[0], tokens: val[1]}] + else + [{tag: nil, tokens: val[1]}] + end + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 268) + def _reduce_82(val, _values, result) + result = val[0].append({tag: val[1], tokens: val[2]}) + result + end +.,., + +# reduce 83 omitted + +# reduce 84 omitted + +module_eval(<<'.,.,', 'parser.y', 274) + def _reduce_85(val, _values, result) + begin_c_declaration("}") + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 278) + def _reduce_86(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 282) + def _reduce_87(val, _values, result) + result = val[2] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 290) + def _reduce_88(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 290) + def _reduce_89(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 285) + def _reduce_90(val, _values, result) + result = [{tag: nil, tokens: val[0]}] + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 286) + def _reduce_91(val, _values, result) + result = [{tag: val[0], tokens: val[1]}] + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 287) + def _reduce_92(val, _values, result) + result = val[0].append({tag: val[1], tokens: val[2]}) + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 289) + def _reduce_93(val, _values, result) + on_action_error("ident after %prec", val[0]) if @prec_seen + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 290) + def _reduce_94(val, _values, result) + on_action_error("char after %prec", val[0]) if @prec_seen + result + end +.,., + +# reduce 95 omitted + +# reduce 96 omitted + +# reduce 97 omitted + +# reduce 98 omitted + +module_eval(<<'.,.,', 'parser.y', 298) + def _reduce_99(val, _values, result) + lhs = val[0] + lhs.alias_name = val[1] + val[3].each do |builder| + builder.lhs = lhs + builder.complete_input + @grammar.add_rule_builder(builder) + end + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 309) + def _reduce_100(val, _values, result) + builder = val[0] + if !builder.line + builder.line = @lexer.line - 1 + end + result = [builder] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 317) + def _reduce_101(val, _values, result) + builder = val[2] + if !builder.line + builder.line = @lexer.line - 1 + end + result = val[0].append(builder) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 326) + def _reduce_102(val, _values, result) + reset_precs + result = @grammar.create_rule_builder(@rule_counter, @midrule_action_counter) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 331) + def _reduce_103(val, _values, result) + token = val[1] + token.alias_name = val[2] + builder = val[0] + builder.add_rhs(token) + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 339) + def _reduce_104(val, _values, result) + token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], alias_name: val[3], location: @lexer.location, args: [val[1]], lhs_tag: val[4]) + builder = val[0] + builder.add_rhs(token) + builder.line = val[1].first_line + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 347) + def _reduce_105(val, _values, result) + token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, alias_name: val[5], location: @lexer.location, args: val[3], lhs_tag: val[6]) + builder = val[0] + builder.add_rhs(token) + builder.line = val[1].first_line + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 355) + def _reduce_106(val, _values, result) + user_code = val[1] + user_code.alias_name = val[2] + user_code.tag = val[3] + builder = val[0] + builder.user_code = user_code + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 364) + def _reduce_107(val, _values, result) + sym = @grammar.find_symbol_by_id!(val[2]) + @prec_seen = true + builder = val[0] + builder.precedence_sym = sym + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 371) + def _reduce_108(val, _values, result) + result = "option" + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 372) + def _reduce_109(val, _values, result) + result = "nonempty_list" + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 373) + def _reduce_110(val, _values, result) + result = "list" + result + end +.,., + +# reduce 111 omitted + +# reduce 112 omitted + +module_eval(<<'.,.,', 'parser.y', 377) + def _reduce_113(val, _values, result) + result = if val[1] + [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[0])] + else + [val[0]] + end + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 383) + def _reduce_114(val, _values, result) + result = val[0].append(val[2]) + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 384) + def _reduce_115(val, _values, result) + result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[0].s_value, location: @lexer.location, args: val[2])] + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 388) + def _reduce_116(val, _values, result) + if @prec_seen + on_action_error("multiple User_code after %prec", val[0]) if @code_after_prec + @code_after_prec = true + end + begin_c_declaration("}") + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 396) + def _reduce_117(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 400) + def _reduce_118(val, _values, result) + result = val[2] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 403) + def _reduce_119(val, _values, result) + result = val[1].s_value + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 407) + def _reduce_120(val, _values, result) + begin_c_declaration('\Z') + @grammar.epilogue_first_lineno = @lexer.line + 1 + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 412) + def _reduce_121(val, _values, result) + end_c_declaration + @grammar.epilogue = val[2].s_value + + result + end +.,., + +# reduce 122 omitted + +# reduce 123 omitted + +# reduce 124 omitted + +# reduce 125 omitted + +# reduce 126 omitted + +module_eval(<<'.,.,', 'parser.y', 423) + def _reduce_127(val, _values, result) + result = Lrama::Lexer::Token::Ident.new(s_value: val[0]) + result + end +.,., + +def _reduce_none(val, _values, result) + val[0] +end + + end # class Parser +end # module Lrama diff --git a/tools/lrama/lib/lrama/report.rb b/tools/lrama/lib/lrama/report.rb new file mode 100644 index 0000000000..890e5f1e8c --- /dev/null +++ b/tools/lrama/lib/lrama/report.rb @@ -0,0 +1,4 @@ +# frozen_string_literal: true + +require_relative 'report/duration' +require_relative 'report/profile' diff --git a/tools/lrama/lib/lrama/report/duration.rb b/tools/lrama/lib/lrama/report/duration.rb new file mode 100644 index 0000000000..fe09a0d028 --- /dev/null +++ b/tools/lrama/lib/lrama/report/duration.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module Lrama + class Report + module Duration + def self.enable + @_report_duration_enabled = true + end + + def self.enabled? + !!@_report_duration_enabled + end + + def report_duration(method_name) + time1 = Time.now.to_f + result = yield + time2 = Time.now.to_f + + if Duration.enabled? + puts sprintf("%s %10.5f s", method_name, time2 - time1) + end + + return result + end + end + end +end diff --git a/tools/lrama/lib/lrama/report/profile.rb b/tools/lrama/lib/lrama/report/profile.rb new file mode 100644 index 0000000000..10488cf913 --- /dev/null +++ b/tools/lrama/lib/lrama/report/profile.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module Lrama + class Report + module Profile + # See "Profiling Lrama" in README.md for how to use. + def self.report_profile + require "stackprof" + + StackProf.run(mode: :cpu, raw: true, out: 'tmp/stackprof-cpu-myapp.dump') do + yield + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/state.rb b/tools/lrama/lib/lrama/state.rb new file mode 100644 index 0000000000..3008786ced --- /dev/null +++ b/tools/lrama/lib/lrama/state.rb @@ -0,0 +1,433 @@ +# frozen_string_literal: true + +require_relative "state/reduce" +require_relative "state/reduce_reduce_conflict" +require_relative "state/resolved_conflict" +require_relative "state/shift" +require_relative "state/shift_reduce_conflict" + +module Lrama + class State + attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts, + :default_reduction_rule, :closure, :items + attr_accessor :shifts, :reduces, :ielr_isocores, :lalr_isocore + + def initialize(id, accessing_symbol, kernels) + @id = id + @accessing_symbol = accessing_symbol + @kernels = kernels.freeze + @items = @kernels + # Manage relationships between items to state + # to resolve next state + @items_to_state = {} + @conflicts = [] + @resolved_conflicts = [] + @default_reduction_rule = nil + @predecessors = [] + @lalr_isocore = self + @ielr_isocores = [self] + @internal_dependencies = {} + @successor_dependencies = {} + @always_follows = {} + end + + def closure=(closure) + @closure = closure + @items = @kernels + @closure + end + + def non_default_reduces + reduces.reject do |reduce| + reduce.rule == @default_reduction_rule + end + end + + def compute_shifts_reduces + _shifts = {} + reduces = [] + items.each do |item| + # TODO: Consider what should be pushed + if item.end_of_rule? + reduces << Reduce.new(item) + else + key = item.next_sym + _shifts[key] ||= [] + _shifts[key] << item.new_by_next_position + end + end + + # It seems Bison 3.8.2 iterates transitions order by symbol number + shifts = _shifts.sort_by do |next_sym, new_items| + next_sym.number + end.map do |next_sym, new_items| + Shift.new(next_sym, new_items.flatten) + end + self.shifts = shifts.freeze + self.reduces = reduces.freeze + end + + def set_items_to_state(items, next_state) + @items_to_state[items] = next_state + end + + def set_look_ahead(rule, look_ahead) + reduce = reduces.find do |r| + r.rule == rule + end + + reduce.look_ahead = look_ahead + end + + def nterm_transitions + @nterm_transitions ||= transitions.select {|shift, _| shift.next_sym.nterm? } + end + + def term_transitions + @term_transitions ||= transitions.select {|shift, _| shift.next_sym.term? } + end + + def transitions + @transitions ||= shifts.map {|shift| [shift, @items_to_state[shift.next_items]] } + end + + def update_transition(shift, next_state) + set_items_to_state(shift.next_items, next_state) + next_state.append_predecessor(self) + clear_transitions_cache + end + + def clear_transitions_cache + @nterm_transitions = nil + @term_transitions = nil + @transitions = nil + end + + def selected_term_transitions + term_transitions.reject do |shift, next_state| + shift.not_selected + end + end + + # Move to next state by sym + def transition(sym) + result = nil + + if sym.term? + term_transitions.each do |shift, next_state| + term = shift.next_sym + result = next_state if term == sym + end + else + nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + result = next_state if nterm == sym + end + end + + raise "Can not transit by #{sym} #{self}" if result.nil? + + result + end + + def find_reduce_by_item!(item) + reduces.find do |r| + r.item == item + end || (raise "reduce is not found. #{item}") + end + + def default_reduction_rule=(default_reduction_rule) + @default_reduction_rule = default_reduction_rule + + reduces.each do |r| + if r.rule == default_reduction_rule + r.default_reduction = true + end + end + end + + def has_conflicts? + !@conflicts.empty? + end + + def sr_conflicts + @conflicts.select do |conflict| + conflict.type == :shift_reduce + end + end + + def rr_conflicts + @conflicts.select do |conflict| + conflict.type == :reduce_reduce + end + end + + def propagate_lookaheads(next_state) + next_state.kernels.map {|item| + lookahead_sets = + if item.position == 1 + goto_follow_set(item.lhs) + else + kernel = kernels.find {|k| k.predecessor_item_of?(item) } + item_lookahead_set[kernel] + end + + [item, lookahead_sets & next_state.lookahead_set_filters[item]] + }.to_h + end + + def lookaheads_recomputed + !@item_lookahead_set.nil? + end + + def compatible_lookahead?(filtered_lookahead) + !lookaheads_recomputed || + @lalr_isocore.annotation_list.all? {|token, actions| + a = dominant_contribution(token, actions, item_lookahead_set) + b = dominant_contribution(token, actions, filtered_lookahead) + a.nil? || b.nil? || a == b + } + end + + def lookahead_set_filters + kernels.map {|kernel| + [kernel, + @lalr_isocore.annotation_list.select {|token, actions| + token.term? && actions.any? {|action, contributions| + !contributions.nil? && contributions.key?(kernel) && contributions[kernel] + } + }.map {|token, _| token } + ] + }.to_h + end + + def dominant_contribution(token, actions, lookaheads) + a = actions.select {|action, contributions| + contributions.nil? || contributions.any? {|item, contributed| contributed && lookaheads[item].include?(token) } + }.map {|action, _| action } + return nil if a.empty? + a.reject {|action| + if action.is_a?(State::Shift) + action.not_selected + elsif action.is_a?(State::Reduce) + action.not_selected_symbols.include?(token) + end + } + end + + def inadequacy_list + return @inadequacy_list if @inadequacy_list + + shift_contributions = shifts.map {|shift| + [shift.next_sym, [shift]] + }.to_h + reduce_contributions = reduces.map {|reduce| + (reduce.look_ahead || []).map {|sym| + [sym, [reduce]] + }.to_h + }.reduce(Hash.new([])) {|hash, cont| + hash.merge(cont) {|_, a, b| a | b } + } + + list = shift_contributions.merge(reduce_contributions) {|_, a, b| a | b } + @inadequacy_list = list.select {|token, actions| token.term? && actions.size > 1 } + end + + def annotation_list + return @annotation_list if @annotation_list + + @annotation_list = annotate_manifestation + @annotation_list = @items_to_state.values.map {|next_state| next_state.annotate_predecessor(self) } + .reduce(@annotation_list) {|result, annotations| + result.merge(annotations) {|_, actions_a, actions_b| + if actions_a.nil? || actions_b.nil? + actions_a || actions_b + else + actions_a.merge(actions_b) {|_, contributions_a, contributions_b| + if contributions_a.nil? || contributions_b.nil? + next contributions_a || contributions_b + end + + contributions_a.merge(contributions_b) {|_, contributed_a, contributed_b| + contributed_a || contributed_b + } + } + end + } + } + end + + def annotate_manifestation + inadequacy_list.transform_values {|actions| + actions.map {|action| + if action.is_a?(Shift) + [action, nil] + elsif action.is_a?(Reduce) + if action.rule.empty_rule? + [action, lhs_contributions(action.rule.lhs, inadequacy_list.key(actions))] + else + contributions = kernels.map {|kernel| [kernel, kernel.rule == action.rule && kernel.end_of_rule?] }.to_h + [action, contributions] + end + end + }.to_h + } + end + + def annotate_predecessor(predecessor) + annotation_list.transform_values {|actions| + token = annotation_list.key(actions) + actions.transform_values {|inadequacy| + next nil if inadequacy.nil? + lhs_adequacy = kernels.any? {|kernel| + inadequacy[kernel] && kernel.position == 1 && predecessor.lhs_contributions(kernel.lhs, token).nil? + } + if lhs_adequacy + next nil + else + predecessor.kernels.map {|pred_k| + [pred_k, kernels.any? {|k| + inadequacy[k] && ( + pred_k.predecessor_item_of?(k) && predecessor.item_lookahead_set[pred_k].include?(token) || + k.position == 1 && predecessor.lhs_contributions(k.lhs, token)[pred_k] + ) + }] + }.to_h + end + } + } + end + + def lhs_contributions(sym, token) + shift, next_state = nterm_transitions.find {|sh, _| sh.next_sym == sym } + if always_follows(shift, next_state).include?(token) + nil + else + kernels.map {|kernel| [kernel, follow_kernel_items(shift, next_state, kernel) && item_lookahead_set[kernel].include?(token)] }.to_h + end + end + + def follow_kernel_items(shift, next_state, kernel) + queue = [[self, shift, next_state]] + until queue.empty? + st, sh, next_st = queue.pop + return true if kernel.next_sym == sh.next_sym && kernel.symbols_after_transition.all?(&:nullable) + st.internal_dependencies(sh, next_st).each {|v| queue << v } + end + false + end + + def item_lookahead_set + return @item_lookahead_set if @item_lookahead_set + + kernels.map {|item| + value = + if item.lhs.accept_symbol? + [] + elsif item.position > 1 + prev_items = predecessors_with_item(item) + prev_items.map {|st, i| st.item_lookahead_set[i] }.reduce([]) {|acc, syms| acc |= syms } + elsif item.position == 1 + prev_state = @predecessors.find {|p| p.shifts.any? {|shift| shift.next_sym == item.lhs } } + shift, next_state = prev_state.nterm_transitions.find {|shift, _| shift.next_sym == item.lhs } + prev_state.goto_follows(shift, next_state) + end + [item, value] + }.to_h + end + + def item_lookahead_set=(k) + @item_lookahead_set = k + end + + def predecessors_with_item(item) + result = [] + @predecessors.each do |pre| + pre.items.each do |i| + result << [pre, i] if i.predecessor_item_of?(item) + end + end + result + end + + def append_predecessor(prev_state) + @predecessors << prev_state + @predecessors.uniq! + end + + def goto_follow_set(nterm_token) + return [] if nterm_token.accept_symbol? + shift, next_state = @lalr_isocore.nterm_transitions.find {|sh, _| sh.next_sym == nterm_token } + + @kernels + .select {|kernel| follow_kernel_items(shift, next_state, kernel) } + .map {|kernel| item_lookahead_set[kernel] } + .reduce(always_follows(shift, next_state)) {|result, terms| result |= terms } + end + + def goto_follows(shift, next_state) + queue = internal_dependencies(shift, next_state) + predecessor_dependencies(shift, next_state) + terms = always_follows(shift, next_state) + until queue.empty? + st, sh, next_st = queue.pop + terms |= st.always_follows(sh, next_st) + st.internal_dependencies(sh, next_st).each {|v| queue << v } + st.predecessor_dependencies(sh, next_st).each {|v| queue << v } + end + terms + end + + def always_follows(shift, next_state) + return @always_follows[[shift, next_state]] if @always_follows[[shift, next_state]] + + queue = internal_dependencies(shift, next_state) + successor_dependencies(shift, next_state) + terms = [] + until queue.empty? + st, sh, next_st = queue.pop + terms |= next_st.term_transitions.map {|sh, _| sh.next_sym } + st.internal_dependencies(sh, next_st).each {|v| queue << v } + st.successor_dependencies(sh, next_st).each {|v| queue << v } + end + @always_follows[[shift, next_state]] = terms + end + + def internal_dependencies(shift, next_state) + return @internal_dependencies[[shift, next_state]] if @internal_dependencies[[shift, next_state]] + + syms = @items.select {|i| + i.next_sym == shift.next_sym && i.symbols_after_transition.all?(&:nullable) && i.position == 0 + }.map(&:lhs).uniq + @internal_dependencies[[shift, next_state]] = nterm_transitions.select {|sh, _| syms.include?(sh.next_sym) }.map {|goto| [self, *goto] } + end + + def successor_dependencies(shift, next_state) + return @successor_dependencies[[shift, next_state]] if @successor_dependencies[[shift, next_state]] + + @successor_dependencies[[shift, next_state]] = + next_state.nterm_transitions + .select {|next_shift, _| next_shift.next_sym.nullable } + .map {|transition| [next_state, *transition] } + end + + def predecessor_dependencies(shift, next_state) + state_items = [] + @kernels.select {|kernel| + kernel.next_sym == shift.next_sym && kernel.symbols_after_transition.all?(&:nullable) + }.each do |item| + queue = predecessors_with_item(item) + until queue.empty? + st, i = queue.pop + if i.position == 0 + state_items << [st, i] + else + st.predecessors_with_item(i).each {|v| queue << v } + end + end + end + + state_items.map {|state, item| + sh, next_st = state.nterm_transitions.find {|shi, _| shi.next_sym == item.lhs } + [state, sh, next_st] + } + end + end +end diff --git a/tools/lrama/lib/lrama/state/reduce.rb b/tools/lrama/lib/lrama/state/reduce.rb new file mode 100644 index 0000000000..54ab87b468 --- /dev/null +++ b/tools/lrama/lib/lrama/state/reduce.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module Lrama + class State + class Reduce + # https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html + attr_reader :item, :look_ahead, :not_selected_symbols + attr_accessor :default_reduction + + def initialize(item) + @item = item + @look_ahead = nil + @not_selected_symbols = [] + end + + def rule + @item.rule + end + + def look_ahead=(look_ahead) + @look_ahead = look_ahead.freeze + end + + def add_not_selected_symbol(sym) + @not_selected_symbols << sym + end + + def selected_look_ahead + if look_ahead + look_ahead - @not_selected_symbols + else + [] + end + end + end + end +end diff --git a/tools/lrama/lib/lrama/state/reduce_reduce_conflict.rb b/tools/lrama/lib/lrama/state/reduce_reduce_conflict.rb new file mode 100644 index 0000000000..736d08376a --- /dev/null +++ b/tools/lrama/lib/lrama/state/reduce_reduce_conflict.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Lrama + class State + class ReduceReduceConflict < Struct.new(:symbols, :reduce1, :reduce2, keyword_init: true) + def type + :reduce_reduce + end + end + end +end diff --git a/tools/lrama/lib/lrama/state/resolved_conflict.rb b/tools/lrama/lib/lrama/state/resolved_conflict.rb new file mode 100644 index 0000000000..3bb3d1446e --- /dev/null +++ b/tools/lrama/lib/lrama/state/resolved_conflict.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Lrama + class State + # * symbol: A symbol under discussion + # * reduce: A reduce under discussion + # * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative) + class ResolvedConflict < Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true) + def report_message + s = symbol.display_name + r = reduce.rule.precedence_sym&.display_name + case + when which == :shift && same_prec + msg = "resolved as #{which} (%right #{s})" + when which == :shift + msg = "resolved as #{which} (#{r} < #{s})" + when which == :reduce && same_prec + msg = "resolved as #{which} (%left #{s})" + when which == :reduce + msg = "resolved as #{which} (#{s} < #{r})" + when which == :error + msg = "resolved as an #{which} (%nonassoc #{s})" + else + raise "Unknown direction. #{self}" + end + + "Conflict between rule #{reduce.rule.id} and token #{s} #{msg}." + end + end + end +end diff --git a/tools/lrama/lib/lrama/state/shift.rb b/tools/lrama/lib/lrama/state/shift.rb new file mode 100644 index 0000000000..81ef013a17 --- /dev/null +++ b/tools/lrama/lib/lrama/state/shift.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Lrama + class State + class Shift + attr_reader :next_sym, :next_items + attr_accessor :not_selected + + def initialize(next_sym, next_items) + @next_sym = next_sym + @next_items = next_items + end + end + end +end diff --git a/tools/lrama/lib/lrama/state/shift_reduce_conflict.rb b/tools/lrama/lib/lrama/state/shift_reduce_conflict.rb new file mode 100644 index 0000000000..fd66834539 --- /dev/null +++ b/tools/lrama/lib/lrama/state/shift_reduce_conflict.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Lrama + class State + class ShiftReduceConflict < Struct.new(:symbols, :shift, :reduce, keyword_init: true) + def type + :shift_reduce + end + end + end +end diff --git a/tools/lrama/lib/lrama/states.rb b/tools/lrama/lib/lrama/states.rb new file mode 100644 index 0000000000..fd8ded905f --- /dev/null +++ b/tools/lrama/lib/lrama/states.rb @@ -0,0 +1,595 @@ +# frozen_string_literal: true + +require "forwardable" +require_relative "report/duration" +require_relative "states/item" + +module Lrama + # States is passed to a template file + # + # "Efficient Computation of LALR(1) Look-Ahead Sets" + # https://dl.acm.org/doi/pdf/10.1145/69622.357187 + class States + extend Forwardable + include Lrama::Report::Duration + + def_delegators "@grammar", :symbols, :terms, :nterms, :rules, + :accept_symbol, :eof_symbol, :undef_symbol, :find_symbol_by_s_value! + + attr_reader :states, :reads_relation, :includes_relation, :lookback_relation + + def initialize(grammar, trace_state: false) + @grammar = grammar + @trace_state = trace_state + + @states = [] + + # `DR(p, A) = {t ∈ T | p -(A)-> r -(t)-> }` + # where p is state, A is nterm, t is term. + # + # `@direct_read_sets` is a hash whose + # key is [state.id, nterm.token_id], + # value is bitmap of term. + @direct_read_sets = {} + + # Reads relation on nonterminal transitions (pair of state and nterm) + # `(p, A) reads (r, C) iff p -(A)-> r -(C)-> and C =>* ε` + # where p, r are state, A, C are nterm. + # + # `@reads_relation` is a hash whose + # key is [state.id, nterm.token_id], + # value is array of [state.id, nterm.token_id]. + @reads_relation = {} + + # `Read(p, A) =s DR(p, A) ∪ ∪{Read(r, C) | (p, A) reads (r, C)}` + # + # `@read_sets` is a hash whose + # key is [state.id, nterm.token_id], + # value is bitmap of term. + @read_sets = {} + + # `(p, A) includes (p', B) iff B -> βAγ, γ =>* ε, p' -(β)-> p` + # where p, p' are state, A, B are nterm, β, γ is sequence of symbol. + # + # `@includes_relation` is a hash whose + # key is [state.id, nterm.token_id], + # value is array of [state.id, nterm.token_id]. + @includes_relation = {} + + # `(q, A -> ω) lookback (p, A) iff p -(ω)-> q` + # where p, q are state, A -> ω is rule, A is nterm, ω is sequence of symbol. + # + # `@lookback_relation` is a hash whose + # key is [state.id, rule.id], + # value is array of [state.id, nterm.token_id]. + @lookback_relation = {} + + # `Follow(p, A) =s Read(p, A) ∪ ∪{Follow(p', B) | (p, A) includes (p', B)}` + # + # `@follow_sets` is a hash whose + # key is [state.id, rule.id], + # value is bitmap of term. + @follow_sets = {} + + # `LA(q, A -> ω) = ∪{Follow(p, A) | (q, A -> ω) lookback (p, A)` + # + # `@la` is a hash whose + # key is [state.id, rule.id], + # value is bitmap of term. + @la = {} + end + + def compute + # Look Ahead Sets + report_duration(:compute_lr0_states) { compute_lr0_states } + report_duration(:compute_direct_read_sets) { compute_direct_read_sets } + report_duration(:compute_reads_relation) { compute_reads_relation } + report_duration(:compute_read_sets) { compute_read_sets } + report_duration(:compute_includes_relation) { compute_includes_relation } + report_duration(:compute_lookback_relation) { compute_lookback_relation } + report_duration(:compute_follow_sets) { compute_follow_sets } + report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets } + + # Conflicts + report_duration(:compute_conflicts) { compute_conflicts } + + report_duration(:compute_default_reduction) { compute_default_reduction } + end + + def compute_ielr + report_duration(:split_states) { split_states } + report_duration(:compute_direct_read_sets) { compute_direct_read_sets } + report_duration(:compute_reads_relation) { compute_reads_relation } + report_duration(:compute_read_sets) { compute_read_sets } + report_duration(:compute_includes_relation) { compute_includes_relation } + report_duration(:compute_lookback_relation) { compute_lookback_relation } + report_duration(:compute_follow_sets) { compute_follow_sets } + report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets } + report_duration(:compute_conflicts) { compute_conflicts } + + report_duration(:compute_default_reduction) { compute_default_reduction } + end + + def reporter + StatesReporter.new(self) + end + + def states_count + @states.count + end + + def direct_read_sets + @direct_read_sets.transform_values do |v| + bitmap_to_terms(v) + end + end + + def read_sets + @read_sets.transform_values do |v| + bitmap_to_terms(v) + end + end + + def follow_sets + @follow_sets.transform_values do |v| + bitmap_to_terms(v) + end + end + + def la + @la.transform_values do |v| + bitmap_to_terms(v) + end + end + + def sr_conflicts_count + @sr_conflicts_count ||= @states.flat_map(&:sr_conflicts).count + end + + def rr_conflicts_count + @rr_conflicts_count ||= @states.flat_map(&:rr_conflicts).count + end + + private + + def trace_state + if @trace_state + yield STDERR + end + end + + def create_state(accessing_symbol, kernels, states_created) + # A item can appear in some states, + # so need to use `kernels` (not `kernels.first`) as a key. + # + # For example... + # + # %% + # program: '+' strings_1 + # | '-' strings_2 + # ; + # + # strings_1: string_1 + # ; + # + # strings_2: string_1 + # | string_2 + # ; + # + # string_1: string + # ; + # + # string_2: string '+' + # ; + # + # string: tSTRING + # ; + # %% + # + # For these grammar, there are 2 states + # + # State A + # string_1: string • + # + # State B + # string_1: string • + # string_2: string • '+' + # + return [states_created[kernels], false] if states_created[kernels] + + state = State.new(@states.count, accessing_symbol, kernels) + @states << state + states_created[kernels] = state + + return [state, true] + end + + def setup_state(state) + # closure + closure = [] + visited = {} + queued = {} + items = state.kernels.dup + + items.each do |item| + queued[item] = true + end + + while (item = items.shift) do + visited[item] = true + + if (sym = item.next_sym) && sym.nterm? + @grammar.find_rules_by_symbol!(sym).each do |rule| + i = Item.new(rule: rule, position: 0) + next if queued[i] + closure << i + items << i + queued[i] = true + end + end + end + + state.closure = closure.sort_by {|i| i.rule.id } + + # Trace + trace_state do |out| + out << "Closure: input\n" + state.kernels.each do |item| + out << " #{item.display_rest}\n" + end + out << "\n\n" + out << "Closure: output\n" + state.items.each do |item| + out << " #{item.display_rest}\n" + end + out << "\n\n" + end + + # shift & reduce + state.compute_shifts_reduces + end + + def enqueue_state(states, state) + # Trace + previous = state.kernels.first.previous_sym + trace_state do |out| + out << sprintf("state_list_append (state = %d, symbol = %d (%s))\n", + @states.count, previous.number, previous.display_name) + end + + states << state + end + + def compute_lr0_states + # State queue + states = [] + states_created = {} + + state, _ = create_state(symbols.first, [Item.new(rule: @grammar.rules.first, position: 0)], states_created) + enqueue_state(states, state) + + while (state = states.shift) do + # Trace + # + # Bison 3.8.2 renders "(reached by "end-of-input")" for State 0 but + # I think it is not correct... + previous = state.kernels.first.previous_sym + trace_state do |out| + out << "Processing state #{state.id} (reached by #{previous.display_name})\n" + end + + setup_state(state) + + state.shifts.each do |shift| + new_state, created = create_state(shift.next_sym, shift.next_items, states_created) + state.set_items_to_state(shift.next_items, new_state) + if created + enqueue_state(states, new_state) + new_state.append_predecessor(state) + end + end + end + end + + def nterm_transitions + a = [] + + @states.each do |state| + state.nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + a << [state, nterm, next_state] + end + end + + a + end + + def compute_direct_read_sets + @states.each do |state| + state.nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + + ary = next_state.term_transitions.map do |shift, _| + shift.next_sym.number + end + + key = [state.id, nterm.token_id] + @direct_read_sets[key] = Bitmap.from_array(ary) + end + end + end + + def compute_reads_relation + @states.each do |state| + state.nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + next_state.nterm_transitions.each do |shift2, _next_state2| + nterm2 = shift2.next_sym + if nterm2.nullable + key = [state.id, nterm.token_id] + @reads_relation[key] ||= [] + @reads_relation[key] << [next_state.id, nterm2.token_id] + end + end + end + end + end + + def compute_read_sets + sets = nterm_transitions.map do |state, nterm, next_state| + [state.id, nterm.token_id] + end + + @read_sets = Digraph.new(sets, @reads_relation, @direct_read_sets).compute + end + + # Execute transition of state by symbols + # then return final state. + def transition(state, symbols) + symbols.each do |sym| + state = state.transition(sym) + end + + state + end + + def compute_includes_relation + @states.each do |state| + state.nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + @grammar.find_rules_by_symbol!(nterm).each do |rule| + i = rule.rhs.count - 1 + + while (i > -1) do + sym = rule.rhs[i] + + break if sym.term? + state2 = transition(state, rule.rhs[0...i]) + # p' = state, B = nterm, p = state2, A = sym + key = [state2.id, sym.token_id] + # TODO: need to omit if state == state2 ? + @includes_relation[key] ||= [] + @includes_relation[key] << [state.id, nterm.token_id] + break unless sym.nullable + i -= 1 + end + end + end + end + end + + def compute_lookback_relation + @states.each do |state| + state.nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + @grammar.find_rules_by_symbol!(nterm).each do |rule| + state2 = transition(state, rule.rhs) + # p = state, A = nterm, q = state2, A -> ω = rule + key = [state2.id, rule.id] + @lookback_relation[key] ||= [] + @lookback_relation[key] << [state.id, nterm.token_id] + end + end + end + end + + def compute_follow_sets + sets = nterm_transitions.map do |state, nterm, next_state| + [state.id, nterm.token_id] + end + + @follow_sets = Digraph.new(sets, @includes_relation, @read_sets).compute + end + + def compute_look_ahead_sets + @states.each do |state| + rules.each do |rule| + ary = @lookback_relation[[state.id, rule.id]] + next unless ary + + ary.each do |state2_id, nterm_token_id| + # q = state, A -> ω = rule, p = state2, A = nterm + follows = @follow_sets[[state2_id, nterm_token_id]] + + next if follows == 0 + + key = [state.id, rule.id] + @la[key] ||= 0 + look_ahead = @la[key] | follows + @la[key] |= look_ahead + + # No risk of conflict when + # * the state only has single reduce + # * the state only has nterm_transitions (GOTO) + next if state.reduces.count == 1 && state.term_transitions.count == 0 + + state.set_look_ahead(rule, bitmap_to_terms(look_ahead)) + end + end + end + end + + def bitmap_to_terms(bit) + ary = Bitmap.to_array(bit) + ary.map do |i| + @grammar.find_symbol_by_number!(i) + end + end + + def compute_conflicts + compute_shift_reduce_conflicts + compute_reduce_reduce_conflicts + end + + def compute_shift_reduce_conflicts + states.each do |state| + state.shifts.each do |shift| + state.reduces.each do |reduce| + sym = shift.next_sym + + next unless reduce.look_ahead + next unless reduce.look_ahead.include?(sym) + + # Shift/Reduce conflict + shift_prec = sym.precedence + reduce_prec = reduce.item.rule.precedence + + # Can resolve only when both have prec + unless shift_prec && reduce_prec + state.conflicts << State::ShiftReduceConflict.new(symbols: [sym], shift: shift, reduce: reduce) + next + end + + case + when shift_prec < reduce_prec + # Reduce is selected + state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce) + shift.not_selected = true + next + when shift_prec > reduce_prec + # Shift is selected + state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift) + reduce.add_not_selected_symbol(sym) + next + end + + # shift_prec == reduce_prec, then check associativity + case sym.precedence.type + when :precedence + # %precedence only specifies precedence and not specify associativity + # then a conflict is unresolved if precedence is same. + state.conflicts << State::ShiftReduceConflict.new(symbols: [sym], shift: shift, reduce: reduce) + next + when :right + # Shift is selected + state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift, same_prec: true) + reduce.add_not_selected_symbol(sym) + next + when :left + # Reduce is selected + state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce, same_prec: true) + shift.not_selected = true + next + when :nonassoc + # Can not resolve + # + # nonassoc creates "run-time" error, precedence creates "compile-time" error. + # Then omit both the shift and reduce. + # + # https://www.gnu.org/software/bison/manual/html_node/Using-Precedence.html + state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :error) + shift.not_selected = true + reduce.add_not_selected_symbol(sym) + else + raise "Unknown precedence type. #{sym}" + end + end + end + end + end + + def compute_reduce_reduce_conflicts + states.each do |state| + count = state.reduces.count + + (0...count).each do |i| + reduce1 = state.reduces[i] + next if reduce1.look_ahead.nil? + + ((i+1)...count).each do |j| + reduce2 = state.reduces[j] + next if reduce2.look_ahead.nil? + + intersection = reduce1.look_ahead & reduce2.look_ahead + + unless intersection.empty? + state.conflicts << State::ReduceReduceConflict.new(symbols: intersection, reduce1: reduce1, reduce2: reduce2) + end + end + end + end + end + + def compute_default_reduction + states.each do |state| + next if state.reduces.empty? + # Do not set, if conflict exist + next unless state.conflicts.empty? + # Do not set, if shift with `error` exists. + next if state.shifts.map(&:next_sym).include?(@grammar.error_symbol) + + state.default_reduction_rule = state.reduces.map do |r| + [r.rule, r.rule.id, (r.look_ahead || []).count] + end.min_by do |rule, rule_id, count| + [-count, rule_id] + end.first + end + end + + def split_states + @states.each do |state| + state.transitions.each do |shift, next_state| + compute_state(state, shift, next_state) + end + end + end + + def merge_lookaheads(state, filtered_lookaheads) + return if state.kernels.all? {|item| (filtered_lookaheads[item] - state.item_lookahead_set[item]).empty? } + + state.item_lookahead_set = state.item_lookahead_set.merge {|_, v1, v2| v1 | v2 } + state.transitions.each do |shift, next_state| + next if next_state.lookaheads_recomputed + compute_state(state, shift, next_state) + end + end + + def compute_state(state, shift, next_state) + filtered_lookaheads = state.propagate_lookaheads(next_state) + s = next_state.ielr_isocores.find {|st| st.compatible_lookahead?(filtered_lookaheads) } + + if s.nil? + s = next_state.ielr_isocores.last + new_state = State.new(@states.count, s.accessing_symbol, s.kernels) + new_state.closure = s.closure + new_state.compute_shifts_reduces + s.transitions.each do |sh, next_state| + new_state.set_items_to_state(sh.next_items, next_state) + end + @states << new_state + new_state.lalr_isocore = s + s.ielr_isocores << new_state + s.ielr_isocores.each do |st| + st.ielr_isocores = s.ielr_isocores + end + new_state.item_lookahead_set = filtered_lookaheads + state.update_transition(shift, new_state) + elsif(!s.lookaheads_recomputed) + s.item_lookahead_set = filtered_lookaheads + else + state.update_transition(shift, s) + merge_lookaheads(s, filtered_lookaheads) + end + end + end +end diff --git a/tools/lrama/lib/lrama/states/item.rb b/tools/lrama/lib/lrama/states/item.rb new file mode 100644 index 0000000000..e89cb9695b --- /dev/null +++ b/tools/lrama/lib/lrama/states/item.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +# TODO: Validate position is not over rule rhs + +require "forwardable" + +module Lrama + class States + class Item < Struct.new(:rule, :position, keyword_init: true) + extend Forwardable + + def_delegators "rule", :lhs, :rhs + + # Optimization for States#setup_state + def hash + [rule_id, position].hash + end + + def rule_id + rule.id + end + + def empty_rule? + rule.empty_rule? + end + + def number_of_rest_symbols + rhs.count - position + end + + def next_sym + rhs[position] + end + + def next_next_sym + rhs[position + 1] + end + + def previous_sym + rhs[position - 1] + end + + def end_of_rule? + rhs.count == position + end + + def beginning_of_rule? + position == 0 + end + + def start_item? + rule.initial_rule? && beginning_of_rule? + end + + def new_by_next_position + Item.new(rule: rule, position: position + 1) + end + + def symbols_before_dot # steep:ignore + rhs[0...position] + end + + def symbols_after_dot # steep:ignore + rhs[position..-1] + end + + def symbols_after_transition + rhs[position+1..-1] + end + + def to_s + "#{lhs.id.s_value}: #{display_name}" + end + + def display_name + r = rhs.map(&:display_name).insert(position, "•").join(" ") + "#{r} (rule #{rule_id})" + end + + # Right after position + def display_rest + r = symbols_after_dot.map(&:display_name).join(" ") + ". #{r} (rule #{rule_id})" + end + + def predecessor_item_of?(other_item) + rule == other_item.rule && position == other_item.position - 1 + end + end + end +end diff --git a/tools/lrama/lib/lrama/states_reporter.rb b/tools/lrama/lib/lrama/states_reporter.rb new file mode 100644 index 0000000000..64ff4de100 --- /dev/null +++ b/tools/lrama/lib/lrama/states_reporter.rb @@ -0,0 +1,362 @@ +# frozen_string_literal: true + +module Lrama + class StatesReporter + include Lrama::Report::Duration + + def initialize(states) + @states = states + end + + def report(io, **options) + report_duration(:report) do + _report(io, **options) + end + end + + private + + def _report(io, grammar: false, rules: false, terms: false, states: false, itemsets: false, lookaheads: false, solved: false, counterexamples: false, verbose: false) + report_unused_rules(io) if rules + report_unused_terms(io) if terms + report_conflicts(io) + report_grammar(io) if grammar + report_states(io, itemsets, lookaheads, solved, counterexamples, verbose) + end + + def report_unused_terms(io) + look_aheads = @states.states.each do |state| + state.reduces.flat_map do |reduce| + reduce.look_ahead unless reduce.look_ahead.nil? + end + end + + next_terms = @states.states.flat_map do |state| + state.shifts.map(&:next_sym).select(&:term?) + end + + unused_symbols = @states.terms.select do |term| + !(look_aheads + next_terms).include?(term) + end + + unless unused_symbols.empty? + io << "#{unused_symbols.count} Unused Terms\n\n" + unused_symbols.each_with_index do |term, index| + io << sprintf("%5d %s\n", index, term.id.s_value) + end + io << "\n\n" + end + end + + def report_unused_rules(io) + used_rules = @states.rules.flat_map(&:rhs) + + unused_rules = @states.rules.map(&:lhs).select do |rule| + !used_rules.include?(rule) && rule.token_id != 0 + end + + unless unused_rules.empty? + io << "#{unused_rules.count} Unused Rules\n\n" + unused_rules.each_with_index do |rule, index| + io << sprintf("%5d %s\n", index, rule.display_name) + end + io << "\n\n" + end + end + + def report_conflicts(io) + has_conflict = false + + @states.states.each do |state| + messages = [] + cs = state.conflicts.group_by(&:type) + if cs[:shift_reduce] + messages << "#{cs[:shift_reduce].count} shift/reduce" + end + + if cs[:reduce_reduce] + messages << "#{cs[:reduce_reduce].count} reduce/reduce" + end + + unless messages.empty? + has_conflict = true + io << "State #{state.id} conflicts: #{messages.join(', ')}\n" + end + end + + if has_conflict + io << "\n\n" + end + end + + def report_grammar(io) + io << "Grammar\n" + last_lhs = nil + + @states.rules.each do |rule| + if rule.empty_rule? + r = "ε" + else + r = rule.rhs.map(&:display_name).join(" ") + end + + if rule.lhs == last_lhs + io << sprintf("%5d %s| %s\n", rule.id, " " * rule.lhs.display_name.length, r) + else + io << "\n" + io << sprintf("%5d %s: %s\n", rule.id, rule.lhs.display_name, r) + end + + last_lhs = rule.lhs + end + io << "\n\n" + end + + def report_states(io, itemsets, lookaheads, solved, counterexamples, verbose) + if counterexamples + cex = Counterexamples.new(@states) + end + + @states.states.each do |state| + # Report State + io << "State #{state.id}\n\n" + + # Report item + last_lhs = nil + list = itemsets ? state.items : state.kernels + list.sort_by {|i| [i.rule_id, i.position] }.each do |item| + if item.empty_rule? + r = "ε •" + else + r = item.rhs.map(&:display_name).insert(item.position, "•").join(" ") + end + if item.lhs == last_lhs + l = " " * item.lhs.id.s_value.length + "|" + else + l = item.lhs.id.s_value + ":" + end + la = "" + if lookaheads && item.end_of_rule? + reduce = state.find_reduce_by_item!(item) + look_ahead = reduce.selected_look_ahead + unless look_ahead.empty? + la = " [#{look_ahead.map(&:display_name).join(", ")}]" + end + end + last_lhs = item.lhs + + io << sprintf("%5i %s %s%s\n", item.rule_id, l, r, la) + end + io << "\n" + + # Report shifts + tmp = state.term_transitions.reject do |shift, _| + shift.not_selected + end.map do |shift, next_state| + [shift.next_sym, next_state.id] + end + max_len = tmp.map(&:first).map(&:display_name).map(&:length).max + tmp.each do |term, state_id| + io << " #{term.display_name.ljust(max_len)} shift, and go to state #{state_id}\n" + end + io << "\n" unless tmp.empty? + + # Report error caused by %nonassoc + nl = false + tmp = state.resolved_conflicts.select do |resolved| + resolved.which == :error + end.map do |error| + error.symbol.display_name + end + max_len = tmp.map(&:length).max + tmp.each do |name| + nl = true + io << " #{name.ljust(max_len)} error (nonassociative)\n" + end + io << "\n" unless tmp.empty? + + # Report reduces + nl = false + max_len = state.non_default_reduces.flat_map(&:look_ahead).compact.map(&:display_name).map(&:length).max || 0 + max_len = [max_len, "$default".length].max if state.default_reduction_rule + ary = [] + + state.non_default_reduces.each do |reduce| + reduce.look_ahead.each do |term| + ary << [term, reduce] + end + end + + ary.sort_by do |term, reduce| + term.number + end.each do |term, reduce| + rule = reduce.item.rule + io << " #{term.display_name.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.display_name})\n" + nl = true + end + + if (r = state.default_reduction_rule) + nl = true + s = "$default".ljust(max_len) + + if r.initial_rule? + io << " #{s} accept\n" + else + io << " #{s} reduce using rule #{r.id} (#{r.lhs.display_name})\n" + end + end + io << "\n" if nl + + # Report nonterminal transitions + tmp = [] + max_len = 0 + state.nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + tmp << [nterm, next_state.id] + max_len = [max_len, nterm.id.s_value.length].max + end + tmp.uniq! + tmp.sort_by! do |nterm, state_id| + nterm.number + end + tmp.each do |nterm, state_id| + io << " #{nterm.id.s_value.ljust(max_len)} go to state #{state_id}\n" + end + io << "\n" unless tmp.empty? + + if solved + # Report conflict resolutions + state.resolved_conflicts.each do |resolved| + io << " #{resolved.report_message}\n" + end + io << "\n" unless state.resolved_conflicts.empty? + end + + if counterexamples && state.has_conflicts? + # Report counterexamples + examples = cex.compute(state) + examples.each do |example| + label0 = example.type == :shift_reduce ? "shift/reduce" : "reduce/reduce" + label1 = example.type == :shift_reduce ? "Shift derivation" : "First Reduce derivation" + label2 = example.type == :shift_reduce ? "Reduce derivation" : "Second Reduce derivation" + + io << " #{label0} conflict on token #{example.conflict_symbol.id.s_value}:\n" + io << " #{example.path1_item}\n" + io << " #{example.path2_item}\n" + io << " #{label1}\n" + example.derivations1.render_strings_for_report.each do |str| + io << " #{str}\n" + end + io << " #{label2}\n" + example.derivations2.render_strings_for_report.each do |str| + io << " #{str}\n" + end + end + end + + if verbose + # Report direct_read_sets + io << " [Direct Read sets]\n" + direct_read_sets = @states.direct_read_sets + @states.nterms.each do |nterm| + terms = direct_read_sets[[state.id, nterm.token_id]] + next unless terms + next if terms.empty? + + str = terms.map {|sym| sym.id.s_value }.join(", ") + io << " read #{nterm.id.s_value} shift #{str}\n" + end + io << "\n" + + # Report reads_relation + io << " [Reads Relation]\n" + @states.nterms.each do |nterm| + a = @states.reads_relation[[state.id, nterm.token_id]] + next unless a + + a.each do |state_id2, nterm_id2| + n = @states.nterms.find {|n| n.token_id == nterm_id2 } + io << " (State #{state_id2}, #{n.id.s_value})\n" + end + end + io << "\n" + + # Report read_sets + io << " [Read sets]\n" + read_sets = @states.read_sets + @states.nterms.each do |nterm| + terms = read_sets[[state.id, nterm.token_id]] + next unless terms + next if terms.empty? + + terms.each do |sym| + io << " #{sym.id.s_value}\n" + end + end + io << "\n" + + # Report includes_relation + io << " [Includes Relation]\n" + @states.nterms.each do |nterm| + a = @states.includes_relation[[state.id, nterm.token_id]] + next unless a + + a.each do |state_id2, nterm_id2| + n = @states.nterms.find {|n| n.token_id == nterm_id2 } + io << " (State #{state.id}, #{nterm.id.s_value}) -> (State #{state_id2}, #{n.id.s_value})\n" + end + end + io << "\n" + + # Report lookback_relation + io << " [Lookback Relation]\n" + @states.rules.each do |rule| + a = @states.lookback_relation[[state.id, rule.id]] + next unless a + + a.each do |state_id2, nterm_id2| + n = @states.nterms.find {|n| n.token_id == nterm_id2 } + io << " (Rule: #{rule.display_name}) -> (State #{state_id2}, #{n.id.s_value})\n" + end + end + io << "\n" + + # Report follow_sets + io << " [Follow sets]\n" + follow_sets = @states.follow_sets + @states.nterms.each do |nterm| + terms = follow_sets[[state.id, nterm.token_id]] + + next unless terms + + terms.each do |sym| + io << " #{nterm.id.s_value} -> #{sym.id.s_value}\n" + end + end + io << "\n" + + # Report LA + io << " [Look-Ahead Sets]\n" + tmp = [] + max_len = 0 + @states.rules.each do |rule| + syms = @states.la[[state.id, rule.id]] + next unless syms + + tmp << [rule, syms] + max_len = ([max_len] + syms.map {|s| s.id.s_value.length }).max + end + tmp.each do |rule, syms| + syms.each do |sym| + io << " #{sym.id.s_value.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.id.s_value})\n" + end + end + io << "\n" unless tmp.empty? + end + + # End of Report State + io << "\n" + end + end + end +end diff --git a/tools/lrama/lib/lrama/trace_reporter.rb b/tools/lrama/lib/lrama/trace_reporter.rb new file mode 100644 index 0000000000..bcf1ef1e50 --- /dev/null +++ b/tools/lrama/lib/lrama/trace_reporter.rb @@ -0,0 +1,45 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class TraceReporter + # @rbs (Lrama::Grammar grammar) -> void + def initialize(grammar) + @grammar = grammar + end + + # @rbs (**Hash[Symbol, bool] options) -> void + def report(**options) + _report(**options) + end + + private + + # @rbs rules: (bool rules, bool actions, bool only_explicit_rules, **untyped _) -> void + def _report(rules: false, actions: false, only_explicit_rules: false, **_) + report_rules if rules && !only_explicit_rules + report_only_explicit_rules if only_explicit_rules + report_actions if actions + end + + # @rbs () -> void + def report_rules + puts "Grammar rules:" + @grammar.rules.each { |rule| puts rule.display_name } + end + + # @rbs () -> void + def report_only_explicit_rules + puts "Grammar rules:" + @grammar.rules.each do |rule| + puts rule.display_name_without_action if rule.lhs.first_set.any? + end + end + + # @rbs () -> void + def report_actions + puts "Grammar rules with actions:" + @grammar.rules.each { |rule| puts rule.with_actions } + end + end +end diff --git a/tools/lrama/lib/lrama/version.rb b/tools/lrama/lib/lrama/version.rb new file mode 100644 index 0000000000..12ece5a8f2 --- /dev/null +++ b/tools/lrama/lib/lrama/version.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +module Lrama + VERSION = "0.7.0".freeze +end diff --git a/tools/lrama/template/bison/_yacc.h b/tools/lrama/template/bison/_yacc.h new file mode 100644 index 0000000000..34ed6d81f5 --- /dev/null +++ b/tools/lrama/template/bison/_yacc.h @@ -0,0 +1,71 @@ +<%# b4_shared_declarations -%> + <%-# b4_cpp_guard_open([b4_spec_mapped_header_file]) -%> + <%- if output.spec_mapped_header_file -%> +#ifndef <%= output.b4_cpp_guard__b4_spec_mapped_header_file %> +# define <%= output.b4_cpp_guard__b4_spec_mapped_header_file %> + <%- end -%> + <%-# b4_declare_yydebug & b4_YYDEBUG_define -%> +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif +#if YYDEBUG && !defined(yydebug) +extern int yydebug; +#endif +<%= output.percent_code("requires") %> + + <%-# b4_token_enums_defines -%> +/* Token kinds. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { +<%= output.token_enums -%> + }; + typedef enum yytokentype yytoken_kind_t; +#endif + + <%-# b4_declare_yylstype -%> + <%-# b4_value_type_define -%> +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +union YYSTYPE +{ +#line <%= output.grammar.union.lineno %> "<%= output.grammar_file_path %>" +<%= output.grammar.union.braces_less_code %> +#line [@oline@] [@ofile@] + +}; +typedef union YYSTYPE YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + <%-# b4_location_type_define -%> +/* Location type. */ +#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED +typedef struct YYLTYPE YYLTYPE; +struct YYLTYPE +{ + int first_line; + int first_column; + int last_line; + int last_column; +}; +# define YYLTYPE_IS_DECLARED 1 +# define YYLTYPE_IS_TRIVIAL 1 +#endif + + + + + <%-# b4_declare_yyerror_and_yylex. Not supported -%> + <%-# b4_declare_yyparse -%> +int yyparse (<%= output.parse_param %>); + + +<%= output.percent_code("provides") %> + <%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%> + <%- if output.spec_mapped_header_file -%> +#endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %> */ + <%- end -%> diff --git a/tools/lrama/template/bison/yacc.c b/tools/lrama/template/bison/yacc.c new file mode 100644 index 0000000000..6edd59a0d5 --- /dev/null +++ b/tools/lrama/template/bison/yacc.c @@ -0,0 +1,2068 @@ +<%# b4_generated_by -%> +/* A Bison parser, made by Lrama <%= Lrama::VERSION %>. */ + +<%# b4_copyright -%> +/* Bison implementation for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* C LALR(1) parser skeleton written by Richard Stallman, by + simplifying the original so-called "semantic" parser. */ + +<%# b4_disclaimer -%> +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +<%# b4_identification -%> +/* Identify Bison output, and Bison version. */ +#define YYBISON 30802 + +/* Bison version string. */ +#define YYBISON_VERSION "3.8.2" + +/* Skeleton name. */ +#define YYSKELETON_NAME "<%= output.template_basename %>" + +/* Pure parsers. */ +#define YYPURE 1 + +/* Push parsers. */ +#define YYPUSH 0 + +/* Pull parsers. */ +#define YYPULL 1 + + +<%# b4_user_pre_prologue -%> +<%- if output.aux.prologue -%> +/* First part of user prologue. */ +#line <%= output.aux.prologue_first_lineno %> "<%= output.grammar_file_path %>" +<%= output.aux.prologue %> +#line [@oline@] [@ofile@] +<%- end -%> + +<%# b4_cast_define -%> +# ifndef YY_CAST +# ifdef __cplusplus +# define YY_CAST(Type, Val) static_cast (Val) +# define YY_REINTERPRET_CAST(Type, Val) reinterpret_cast (Val) +# else +# define YY_CAST(Type, Val) ((Type) (Val)) +# define YY_REINTERPRET_CAST(Type, Val) ((Type) (Val)) +# endif +# endif +<%# b4_null_define -%> +# ifndef YY_NULLPTR +# if defined __cplusplus +# if 201103L <= __cplusplus +# define YY_NULLPTR nullptr +# else +# define YY_NULLPTR 0 +# endif +# else +# define YY_NULLPTR ((void*)0) +# endif +# endif + +<%# b4_header_include_if -%> +<%- if output.include_header -%> +#include "<%= output.include_header %>" +<%- else -%> +/* Use api.header.include to #include this header + instead of duplicating it here. */ +<%= output.render_partial("bison/_yacc.h") %> +<%- end -%> +<%# b4_declare_symbol_enum -%> +/* Symbol kind. */ +enum yysymbol_kind_t +{ +<%= output.symbol_enum -%> +}; +typedef enum yysymbol_kind_t yysymbol_kind_t; + + + + +<%# b4_user_post_prologue -%> +<%# b4_c99_int_type_define -%> +#ifdef short +# undef short +#endif + +/* On compilers that do not define __PTRDIFF_MAX__ etc., make sure + and (if available) are included + so that the code can choose integer types of a good width. */ + +#ifndef __PTRDIFF_MAX__ +# include /* INFRINGES ON USER NAME SPACE */ +# if defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__ +# include /* INFRINGES ON USER NAME SPACE */ +# define YY_STDINT_H +# endif +#endif + +/* Narrow types that promote to a signed type and that can represent a + signed or unsigned integer of at least N bits. In tables they can + save space and decrease cache pressure. Promoting to a signed type + helps avoid bugs in integer arithmetic. */ + +#ifdef __INT_LEAST8_MAX__ +typedef __INT_LEAST8_TYPE__ yytype_int8; +#elif defined YY_STDINT_H +typedef int_least8_t yytype_int8; +#else +typedef signed char yytype_int8; +#endif + +#ifdef __INT_LEAST16_MAX__ +typedef __INT_LEAST16_TYPE__ yytype_int16; +#elif defined YY_STDINT_H +typedef int_least16_t yytype_int16; +#else +typedef short yytype_int16; +#endif + +/* Work around bug in HP-UX 11.23, which defines these macros + incorrectly for preprocessor constants. This workaround can likely + be removed in 2023, as HPE has promised support for HP-UX 11.23 + (aka HP-UX 11i v2) only through the end of 2022; see Table 2 of + . */ +#ifdef __hpux +# undef UINT_LEAST8_MAX +# undef UINT_LEAST16_MAX +# define UINT_LEAST8_MAX 255 +# define UINT_LEAST16_MAX 65535 +#endif + +#if defined __UINT_LEAST8_MAX__ && __UINT_LEAST8_MAX__ <= __INT_MAX__ +typedef __UINT_LEAST8_TYPE__ yytype_uint8; +#elif (!defined __UINT_LEAST8_MAX__ && defined YY_STDINT_H \ + && UINT_LEAST8_MAX <= INT_MAX) +typedef uint_least8_t yytype_uint8; +#elif !defined __UINT_LEAST8_MAX__ && UCHAR_MAX <= INT_MAX +typedef unsigned char yytype_uint8; +#else +typedef short yytype_uint8; +#endif + +#if defined __UINT_LEAST16_MAX__ && __UINT_LEAST16_MAX__ <= __INT_MAX__ +typedef __UINT_LEAST16_TYPE__ yytype_uint16; +#elif (!defined __UINT_LEAST16_MAX__ && defined YY_STDINT_H \ + && UINT_LEAST16_MAX <= INT_MAX) +typedef uint_least16_t yytype_uint16; +#elif !defined __UINT_LEAST16_MAX__ && USHRT_MAX <= INT_MAX +typedef unsigned short yytype_uint16; +#else +typedef int yytype_uint16; +#endif + +<%# b4_sizes_types_define -%> +#ifndef YYPTRDIFF_T +# if defined __PTRDIFF_TYPE__ && defined __PTRDIFF_MAX__ +# define YYPTRDIFF_T __PTRDIFF_TYPE__ +# define YYPTRDIFF_MAXIMUM __PTRDIFF_MAX__ +# elif defined PTRDIFF_MAX +# ifndef ptrdiff_t +# include /* INFRINGES ON USER NAME SPACE */ +# endif +# define YYPTRDIFF_T ptrdiff_t +# define YYPTRDIFF_MAXIMUM PTRDIFF_MAX +# else +# define YYPTRDIFF_T long +# define YYPTRDIFF_MAXIMUM LONG_MAX +# endif +#endif + +#ifndef YYSIZE_T +# ifdef __SIZE_TYPE__ +# define YYSIZE_T __SIZE_TYPE__ +# elif defined size_t +# define YYSIZE_T size_t +# elif defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__ +# include /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# else +# define YYSIZE_T unsigned +# endif +#endif + +#define YYSIZE_MAXIMUM \ + YY_CAST (YYPTRDIFF_T, \ + (YYPTRDIFF_MAXIMUM < YY_CAST (YYSIZE_T, -1) \ + ? YYPTRDIFF_MAXIMUM \ + : YY_CAST (YYSIZE_T, -1))) + +#define YYSIZEOF(X) YY_CAST (YYPTRDIFF_T, sizeof (X)) + + +/* Stored state numbers (used for stacks). */ +typedef <%= output.int_type_for([output.yynstates - 1]) %> yy_state_t; + +/* State numbers in computations. */ +typedef int yy_state_fast_t; + +#ifndef YY_ +# if defined YYENABLE_NLS && YYENABLE_NLS +# if ENABLE_NLS +# include /* INFRINGES ON USER NAME SPACE */ +# define YY_(Msgid) dgettext ("bison-runtime", Msgid) +# endif +# endif +# ifndef YY_ +# define YY_(Msgid) Msgid +# endif +#endif + + +<%# b4_attribute_define -%> +#ifndef YY_ATTRIBUTE_PURE +# if defined __GNUC__ && 2 < __GNUC__ + (96 <= __GNUC_MINOR__) +# define YY_ATTRIBUTE_PURE __attribute__ ((__pure__)) +# else +# define YY_ATTRIBUTE_PURE +# endif +#endif + +#ifndef YY_ATTRIBUTE_UNUSED +# if defined __GNUC__ && 2 < __GNUC__ + (7 <= __GNUC_MINOR__) +# define YY_ATTRIBUTE_UNUSED __attribute__ ((__unused__)) +# else +# define YY_ATTRIBUTE_UNUSED +# endif +#endif + +/* Suppress unused-variable warnings by "using" E. */ +#if ! defined lint || defined __GNUC__ +# define YY_USE(E) ((void) (E)) +#else +# define YY_USE(E) /* empty */ +#endif + +/* Suppress an incorrect diagnostic about yylval being uninitialized. */ +#if defined __GNUC__ && ! defined __ICC && 406 <= __GNUC__ * 100 + __GNUC_MINOR__ +# if __GNUC__ * 100 + __GNUC_MINOR__ < 407 +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") +# else +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") \ + _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +# endif +# define YY_IGNORE_MAYBE_UNINITIALIZED_END \ + _Pragma ("GCC diagnostic pop") +#else +# define YY_INITIAL_VALUE(Value) Value +#endif +#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_END +#endif +#ifndef YY_INITIAL_VALUE +# define YY_INITIAL_VALUE(Value) /* Nothing. */ +#endif + +#if defined __cplusplus && defined __GNUC__ && ! defined __ICC && 6 <= __GNUC__ +# define YY_IGNORE_USELESS_CAST_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuseless-cast\"") +# define YY_IGNORE_USELESS_CAST_END \ + _Pragma ("GCC diagnostic pop") +#endif +#ifndef YY_IGNORE_USELESS_CAST_BEGIN +# define YY_IGNORE_USELESS_CAST_BEGIN +# define YY_IGNORE_USELESS_CAST_END +#endif + + +#define YY_ASSERT(E) ((void) (0 && (E))) + +#if 1 + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# ifdef YYSTACK_USE_ALLOCA +# if YYSTACK_USE_ALLOCA +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# elif defined __BUILTIN_VA_ARG_INCR +# include /* INFRINGES ON USER NAME SPACE */ +# elif defined _AIX +# define YYSTACK_ALLOC __alloca +# elif defined _MSC_VER +# include /* INFRINGES ON USER NAME SPACE */ +# define alloca _alloca +# else +# define YYSTACK_ALLOC alloca +# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS +# include /* INFRINGES ON USER NAME SPACE */ + /* Use EXIT_SUCCESS as a witness for stdlib.h. */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's 'empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) +# ifndef YYSTACK_ALLOC_MAXIMUM + /* The OS might guarantee only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + invoke alloca (N) if N exceeds 4096. Use a slightly smaller number + to allow for a few compiler-allocated temporary stack slots. */ +# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ +# endif +# else +# define YYSTACK_ALLOC YYMALLOC +# define YYSTACK_FREE YYFREE +# ifndef YYSTACK_ALLOC_MAXIMUM +# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM +# endif +# if (defined __cplusplus && ! defined EXIT_SUCCESS \ + && ! ((defined YYMALLOC || defined malloc) \ + && (defined YYFREE || defined free))) +# include /* INFRINGES ON USER NAME SPACE */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# ifndef YYMALLOC +# define YYMALLOC malloc +# if ! defined malloc && ! defined EXIT_SUCCESS +void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# ifndef YYFREE +# define YYFREE free +# if ! defined free && ! defined EXIT_SUCCESS +void free (void *); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# endif +#endif /* 1 */ + +#if (! defined yyoverflow \ + && (! defined __cplusplus \ + || (defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL \ + && defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + yy_state_t yyss_alloc; + YYSTYPE yyvs_alloc; + YYLTYPE yyls_alloc; +}; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (YYSIZEOF (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (YYSIZEOF (yy_state_t) + YYSIZEOF (YYSTYPE) \ + + YYSIZEOF (YYLTYPE)) \ + + 2 * YYSTACK_GAP_MAXIMUM) + +# define YYCOPY_NEEDED 1 + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ + do \ + { \ + YYPTRDIFF_T yynewbytes; \ + YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ + Stack = &yyptr->Stack_alloc; \ + yynewbytes = yystacksize * YYSIZEOF (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / YYSIZEOF (*yyptr); \ + } \ + while (0) + +#endif + +#if defined YYCOPY_NEEDED && YYCOPY_NEEDED +/* Copy COUNT objects from SRC to DST. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if defined __GNUC__ && 1 < __GNUC__ +# define YYCOPY(Dst, Src, Count) \ + __builtin_memcpy (Dst, Src, YY_CAST (YYSIZE_T, (Count)) * sizeof (*(Src))) +# else +# define YYCOPY(Dst, Src, Count) \ + do \ + { \ + YYPTRDIFF_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (Dst)[yyi] = (Src)[yyi]; \ + } \ + while (0) +# endif +# endif +#endif /* !YYCOPY_NEEDED */ + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL <%= output.yyfinal %> +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST <%= output.yylast %> + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS <%= output.yyntokens %> +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS <%= output.yynnts %> +/* YYNRULES -- Number of rules. */ +#define YYNRULES <%= output.yynrules %> +/* YYNSTATES -- Number of states. */ +#define YYNSTATES <%= output.yynstates %> + +/* YYMAXUTOK -- Last valid token kind. */ +#define YYMAXUTOK <%= output.yymaxutok %> + + +/* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM + as returned by yylex, with out-of-bounds checking. */ +#define YYTRANSLATE(YYX) \ + (0 <= (YYX) && (YYX) <= YYMAXUTOK \ + ? YY_CAST (yysymbol_kind_t, yytranslate[YYX]) \ + : YYSYMBOL_YYUNDEF) + +/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM + as returned by yylex. */ +static const <%= output.int_type_for(output.context.yytranslate) %> yytranslate[] = +{ +<%= output.yytranslate %> +}; + +<%- if output.error_recovery -%> +/* YYTRANSLATE_INVERTED[SYMBOL-NUM] -- Token number corresponding to SYMBOL-NUM */ +static const <%= output.int_type_for(output.context.yytranslate_inverted) %> yytranslate_inverted[] = +{ +<%= output.yytranslate_inverted %> +}; +<%- end -%> +#if YYDEBUG +/* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ +static const <%= output.int_type_for(output.context.yyrline) %> yyrline[] = +{ +<%= output.yyrline %> +}; +#endif + +/** Accessing symbol of state STATE. */ +#define YY_ACCESSING_SYMBOL(State) YY_CAST (yysymbol_kind_t, yystos[State]) + +#if 1 +/* The user-facing name of the symbol whose (internal) number is + YYSYMBOL. No bounds checking. */ +static const char *yysymbol_name (yysymbol_kind_t yysymbol) YY_ATTRIBUTE_UNUSED; + +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ +<%= output.yytname %> +}; + +static const char * +yysymbol_name (yysymbol_kind_t yysymbol) +{ + return yytname[yysymbol]; +} +#endif + +#define YYPACT_NINF (<%= output.yypact_ninf %>) + +#define yypact_value_is_default(Yyn) \ + <%= output.table_value_equals(output.context.yypact, "Yyn", output.yypact_ninf, "YYPACT_NINF") %> + +#define YYTABLE_NINF (<%= output.yytable_ninf %>) + +#define yytable_value_is_error(Yyn) \ + <%= output.table_value_equals(output.context.yytable, "Yyn", output.yytable_ninf, "YYTABLE_NINF") %> + +<%# b4_parser_tables_define -%> +/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +static const <%= output.int_type_for(output.context.yypact) %> yypact[] = +{ +<%= output.int_array_to_string(output.context.yypact) %> +}; + +/* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. + Performed when YYTABLE does not specify something else to do. Zero + means the default is an error. */ +static const <%= output.int_type_for(output.context.yydefact) %> yydefact[] = +{ +<%= output.int_array_to_string(output.context.yydefact) %> +}; + +/* YYPGOTO[NTERM-NUM]. */ +static const <%= output.int_type_for(output.context.yypgoto) %> yypgoto[] = +{ +<%= output.int_array_to_string(output.context.yypgoto) %> +}; + +/* YYDEFGOTO[NTERM-NUM]. */ +static const <%= output.int_type_for(output.context.yydefgoto) %> yydefgoto[] = +{ +<%= output.int_array_to_string(output.context.yydefgoto) %> +}; + +/* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule whose + number is the opposite. If YYTABLE_NINF, syntax error. */ +static const <%= output.int_type_for(output.context.yytable) %> yytable[] = +{ +<%= output.int_array_to_string(output.context.yytable) %> +}; + +static const <%= output.int_type_for(output.context.yycheck) %> yycheck[] = +{ +<%= output.int_array_to_string(output.context.yycheck) %> +}; + +/* YYSTOS[STATE-NUM] -- The symbol kind of the accessing symbol of + state STATE-NUM. */ +static const <%= output.int_type_for(output.context.yystos) %> yystos[] = +{ +<%= output.int_array_to_string(output.context.yystos) %> +}; + +/* YYR1[RULE-NUM] -- Symbol kind of the left-hand side of rule RULE-NUM. */ +static const <%= output.int_type_for(output.context.yyr1) %> yyr1[] = +{ +<%= output.int_array_to_string(output.context.yyr1) %> +}; + +/* YYR2[RULE-NUM] -- Number of symbols on the right-hand side of rule RULE-NUM. */ +static const <%= output.int_type_for(output.context.yyr2) %> yyr2[] = +{ +<%= output.int_array_to_string(output.context.yyr2) %> +}; + + +enum { YYENOMEM = -2 }; + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = YYEMPTY) + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrorlab +#define YYNOMEM goto yyexhaustedlab + + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ + do \ + if (yychar == YYEMPTY) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + YYPOPSTACK (yylen); \ + yystate = *yyssp; \ + goto yybackup; \ + } \ + else \ + { \ + yyerror (<%= output.yyerror_args %>, YY_("syntax error: cannot back up")); \ + YYERROR; \ + } \ + while (0) + +/* Backward compatibility with an undocumented macro. + Use YYerror or YYUNDEF. */ +#define YYERRCODE YYUNDEF + +<%# b4_yylloc_default_define -%> +/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N]. + If N is 0, then set CURRENT to the empty location which ends + the previous symbol: RHS[0] (always defined). */ + +#ifndef YYLLOC_DEFAULT +# define YYLLOC_DEFAULT(Current, Rhs, N) \ + do \ + if (N) \ + { \ + (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ + } \ + else \ + { \ + (Current).first_line = (Current).last_line = \ + YYRHSLOC (Rhs, 0).last_line; \ + (Current).first_column = (Current).last_column = \ + YYRHSLOC (Rhs, 0).last_column; \ + } \ + while (0) +#endif + +#define YYRHSLOC(Rhs, K) ((Rhs)[K]) + + +/* Enable debugging if requested. */ +#if YYDEBUG + +# ifndef YYFPRINTF +# include /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (0) + + +<%# b4_yylocation_print_define -%> +/* YYLOCATION_PRINT -- Print the location on the stream. + This macro was not mandated originally: define only if we know + we won't break user code: when these are the locations we know. */ + +# ifndef YYLOCATION_PRINT + +# if defined YY_LOCATION_PRINT + + /* Temporary convenience wrapper in case some people defined the + undocumented and private YY_LOCATION_PRINT macros. */ +# define YYLOCATION_PRINT(File, Loc<%= output.user_args %>) YY_LOCATION_PRINT(File, *(Loc)<%= output.user_args %>) + +# elif defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL + +/* Print *YYLOCP on YYO. Private, do not rely on its existence. */ + +YY_ATTRIBUTE_UNUSED +static int +yy_location_print_ (FILE *yyo, YYLTYPE const * const yylocp) +{ + int res = 0; + int end_col = 0 != yylocp->last_column ? yylocp->last_column - 1 : 0; + if (0 <= yylocp->first_line) + { + res += YYFPRINTF (yyo, "%d", yylocp->first_line); + if (0 <= yylocp->first_column) + res += YYFPRINTF (yyo, ".%d", yylocp->first_column); + } + if (0 <= yylocp->last_line) + { + if (yylocp->first_line < yylocp->last_line) + { + res += YYFPRINTF (yyo, "-%d", yylocp->last_line); + if (0 <= end_col) + res += YYFPRINTF (yyo, ".%d", end_col); + } + else if (0 <= end_col && yylocp->first_column < end_col) + res += YYFPRINTF (yyo, "-%d", end_col); + } + return res; +} + +# define YYLOCATION_PRINT yy_location_print_ + + /* Temporary convenience wrapper in case some people defined the + undocumented and private YY_LOCATION_PRINT macros. */ +# define YY_LOCATION_PRINT(File, Loc<%= output.user_args %>) YYLOCATION_PRINT(File, &(Loc)<%= output.user_args %>) + +# else + +# define YYLOCATION_PRINT(File, Loc<%= output.user_args %>) ((void) 0) + /* Temporary convenience wrapper in case some people defined the + undocumented and private YY_LOCATION_PRINT macros. */ +# define YY_LOCATION_PRINT YYLOCATION_PRINT + +# endif +# endif /* !defined YYLOCATION_PRINT */ + + +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location<%= output.user_args %>) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yy_symbol_print (stderr, \ + Kind, Value, Location<%= output.user_args %>); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (0) + + +<%# b4_yy_symbol_print_define -%> +/*-----------------------------------. +| Print this symbol's value on YYO. | +`-----------------------------------*/ + +static void +yy_symbol_value_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, YYLTYPE const * const yylocationp<%= output.user_formals %>) +{ + FILE *yyoutput = yyo; +<%= output.parse_param_use("yyoutput", "yylocationp") %> + if (!yyvaluep) + return; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +<%# b4_symbol_actions(printer) -%> +switch (yykind) + { +<%= output.symbol_actions_for_printer -%> + default: + break; + } + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + +/*---------------------------. +| Print this symbol on YYO. | +`---------------------------*/ + +static void +yy_symbol_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, YYLTYPE const * const yylocationp<%= output.user_formals %>) +{ + YYFPRINTF (yyo, "%s %s (", + yykind < YYNTOKENS ? "token" : "nterm", yysymbol_name (yykind)); + + YYLOCATION_PRINT (yyo, yylocationp<%= output.user_args %>); + YYFPRINTF (yyo, ": "); + yy_symbol_value_print (yyo, yykind, yyvaluep, yylocationp<%= output.user_args %>); + YYFPRINTF (yyo, ")"); +} + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (included). | +`------------------------------------------------------------------*/ + +static void +yy_stack_print (yy_state_t *yybottom, yy_state_t *yytop<%= output.user_formals %>) +{ + YYFPRINTF (stderr, "Stack now"); + for (; yybottom <= yytop; yybottom++) + { + int yybot = *yybottom; + YYFPRINTF (stderr, " %d", yybot); + } + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top<%= output.user_args %>) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)<%= output.user_args %>); \ +} while (0) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +static void +yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp, YYLTYPE *yylsp, + int yyrule<%= output.user_formals %>) +{ + int yylno = yyrline[yyrule]; + int yynrhs = yyr2[yyrule]; + int yyi; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %d):\n", + yyrule - 1, yylno); + /* The symbols being reduced. */ + for (yyi = 0; yyi < yynrhs; yyi++) + { + YYFPRINTF (stderr, " $%d = ", yyi + 1); + yy_symbol_print (stderr, + YY_ACCESSING_SYMBOL (+yyssp[yyi + 1 - yynrhs]), + &yyvsp[(yyi + 1) - (yynrhs)], + &(yylsp[(yyi + 1) - (yynrhs)])<%= output.user_args %>); + YYFPRINTF (stderr, "\n"); + } +} + +# define YY_REDUCE_PRINT(Rule<%= output.user_args %>) \ +do { \ + if (yydebug) \ + yy_reduce_print (yyssp, yyvsp, yylsp, Rule<%= output.user_args %>); \ +} while (0) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +#ifndef yydebug +int yydebug; +#endif +#else /* !YYDEBUG */ +# define YYDPRINTF(Args) ((void) 0) +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location<%= output.user_args %>) +# define YY_STACK_PRINT(Bottom, Top<%= output.user_args %>) +# define YY_REDUCE_PRINT(Rule<%= output.user_args %>) +#endif /* !YYDEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + +/* Context of a parse error. */ +typedef struct +{ + yy_state_t *yyssp; + yysymbol_kind_t yytoken; + YYLTYPE *yylloc; +} yypcontext_t; + +/* Put in YYARG at most YYARGN of the expected tokens given the + current YYCTX, and return the number of tokens stored in YYARG. If + YYARG is null, return the number of expected tokens (guaranteed to + be less than YYNTOKENS). Return YYENOMEM on memory exhaustion. + Return 0 if there are more than YYARGN expected tokens, yet fill + YYARG up to YYARGN. */ +static int +yypcontext_expected_tokens (const yypcontext_t *yyctx, + yysymbol_kind_t yyarg[], int yyargn) +{ + /* Actual size of YYARG. */ + int yycount = 0; + int yyn = yypact[+*yyctx->yyssp]; + if (!yypact_value_is_default (yyn)) + { + /* Start YYX at -YYN if negative to avoid negative indexes in + YYCHECK. In other words, skip the first -YYN actions for + this state because they are default actions. */ + int yyxbegin = yyn < 0 ? -yyn : 0; + /* Stay within bounds of both yycheck and yytname. */ + int yychecklim = YYLAST - yyn + 1; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; + int yyx; + for (yyx = yyxbegin; yyx < yyxend; ++yyx) + if (yycheck[yyx + yyn] == yyx && yyx != YYSYMBOL_YYerror + && !yytable_value_is_error (yytable[yyx + yyn])) + { + if (!yyarg) + ++yycount; + else if (yycount == yyargn) + return 0; + else + yyarg[yycount++] = YY_CAST (yysymbol_kind_t, yyx); + } + } + if (yyarg && yycount == 0 && 0 < yyargn) + yyarg[0] = YYSYMBOL_YYEMPTY; + return yycount; +} + + + + +#ifndef yystrlen +# if defined __GLIBC__ && defined _STRING_H +# define yystrlen(S) (YY_CAST (YYPTRDIFF_T, strlen (S))) +# else +/* Return the length of YYSTR. */ +static YYPTRDIFF_T +yystrlen (const char *yystr) +{ + YYPTRDIFF_T yylen; + for (yylen = 0; yystr[yylen]; yylen++) + continue; + return yylen; +} +# endif +#endif + +#ifndef yystpcpy +# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE +# define yystpcpy stpcpy +# else +/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in + YYDEST. */ +static char * +yystpcpy (char *yydest, const char *yysrc) +{ + char *yyd = yydest; + const char *yys = yysrc; + + while ((*yyd++ = *yys++) != '\0') + continue; + + return yyd - 1; +} +# endif +#endif + +#ifndef yytnamerr +/* Copy to YYRES the contents of YYSTR after stripping away unnecessary + quotes and backslashes, so that it's suitable for yyerror. The + heuristic is that double-quoting is unnecessary unless the string + contains an apostrophe, a comma, or backslash (other than + backslash-backslash). YYSTR is taken from yytname. If YYRES is + null, do not copy; instead, return the length of what the result + would have been. */ +static YYPTRDIFF_T +yytnamerr (char *yyres, const char *yystr) +{ + if (*yystr == '"') + { + YYPTRDIFF_T yyn = 0; + char const *yyp = yystr; + for (;;) + switch (*++yyp) + { + case '\'': + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + else + goto append; + + append: + default: + if (yyres) + yyres[yyn] = *yyp; + yyn++; + break; + + case '"': + if (yyres) + yyres[yyn] = '\0'; + return yyn; + } + do_not_strip_quotes: ; + } + + if (yyres) + return yystpcpy (yyres, yystr) - yyres; + else + return yystrlen (yystr); +} +#endif + + +static int +yy_syntax_error_arguments (const yypcontext_t *yyctx, + yysymbol_kind_t yyarg[], int yyargn) +{ + /* Actual size of YYARG. */ + int yycount = 0; + /* There are many possibilities here to consider: + - If this state is a consistent state with a default action, then + the only way this function was invoked is if the default action + is an error action. In that case, don't check for expected + tokens because there are none. + - The only way there can be no lookahead present (in yychar) is if + this state is a consistent state with a default action. Thus, + detecting the absence of a lookahead is sufficient to determine + that there is no unexpected or expected token to report. In that + case, just report a simple "syntax error". + - Don't assume there isn't a lookahead just because this state is a + consistent state with a default action. There might have been a + previous inconsistent state, consistent state with a non-default + action, or user semantic action that manipulated yychar. + - Of course, the expected token list depends on states to have + correct lookahead information, and it depends on the parser not + to perform extra reductions after fetching a lookahead from the + scanner and before detecting a syntax error. Thus, state merging + (from LALR or IELR) and default reductions corrupt the expected + token list. However, the list is correct for canonical LR with + one exception: it will still contain any token that will not be + accepted due to an error action in a later state. + */ + if (yyctx->yytoken != YYSYMBOL_YYEMPTY) + { + int yyn; + if (yyarg) + yyarg[yycount] = yyctx->yytoken; + ++yycount; + yyn = yypcontext_expected_tokens (yyctx, + yyarg ? yyarg + 1 : yyarg, yyargn - 1); + if (yyn == YYENOMEM) + return YYENOMEM; + else + yycount += yyn; + } + return yycount; +} + +/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message + about the unexpected token YYTOKEN for the state stack whose top is + YYSSP. + + Return 0 if *YYMSG was successfully written. Return -1 if *YYMSG is + not large enough to hold the message. In that case, also set + *YYMSG_ALLOC to the required number of bytes. Return YYENOMEM if the + required number of bytes is too large to store. */ +static int +yysyntax_error (YYPTRDIFF_T *yymsg_alloc, char **yymsg, + const yypcontext_t *yyctx<%= output.user_formals %>) +{ + enum { YYARGS_MAX = 5 }; + /* Internationalized format string. */ + const char *yyformat = YY_NULLPTR; + /* Arguments of yyformat: reported tokens (one for the "unexpected", + one per "expected"). */ + yysymbol_kind_t yyarg[YYARGS_MAX]; + /* Cumulated lengths of YYARG. */ + YYPTRDIFF_T yysize = 0; + + /* Actual size of YYARG. */ + int yycount = yy_syntax_error_arguments (yyctx, yyarg, YYARGS_MAX); + if (yycount == YYENOMEM) + return YYENOMEM; + + switch (yycount) + { +#define YYCASE_(N, S) \ + case N: \ + yyformat = S; \ + break + default: /* Avoid compiler warnings. */ + YYCASE_(0, YY_("syntax error")); + YYCASE_(1, YY_("syntax error, unexpected %s")); + YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s")); + YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s")); + YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s")); + YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s")); +#undef YYCASE_ + } + + /* Compute error message size. Don't count the "%s"s, but reserve + room for the terminator. */ + yysize = yystrlen (yyformat) - 2 * yycount + 1; + { + int yyi; + for (yyi = 0; yyi < yycount; ++yyi) + { + YYPTRDIFF_T yysize1 + = yysize + yytnamerr (YY_NULLPTR, yytname[yyarg[yyi]]); + if (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM) + yysize = yysize1; + else + return YYENOMEM; + } + } + + if (*yymsg_alloc < yysize) + { + *yymsg_alloc = 2 * yysize; + if (! (yysize <= *yymsg_alloc + && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) + *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; + return -1; + } + + /* Avoid sprintf, as that infringes on the user's name space. + Don't have undefined behavior even if the translation + produced a string with the wrong number of "%s"s. */ + { + char *yyp = *yymsg; + int yyi = 0; + while ((*yyp = *yyformat) != '\0') + if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) + { + yyp += yytnamerr (yyp, yytname[yyarg[yyi++]]); + yyformat += 2; + } + else + { + ++yyp; + ++yyformat; + } + } + return 0; +} + +<%# b4_yydestruct_define %> +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +static void +yydestruct (const char *yymsg, + yysymbol_kind_t yykind, YYSTYPE *yyvaluep, YYLTYPE *yylocationp<%= output.user_formals %>) +{ +<%= output.parse_param_use("yyvaluep", "yylocationp") %> + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yykind, yyvaluep, yylocationp<%= output.user_args %>); + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + switch (yykind) + { +<%= output.symbol_actions_for_destructor -%> + default: + break; + } + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + + +<%- if output.error_recovery -%> +#ifndef YYMAXREPAIR +# define YYMAXREPAIR(<%= output.parse_param_name %>) (3) +#endif + +#ifndef YYERROR_RECOVERY_ENABLED +# define YYERROR_RECOVERY_ENABLED(<%= output.parse_param_name %>) (1) +#endif + +enum yy_repair_type { + inserting, + deleting, + shifting, +}; + +struct yy_repair { + enum yy_repair_type type; + yysymbol_kind_t term; +}; +typedef struct yy_repair yy_repair; + +struct yy_repairs { + /* For debug */ + int id; + /* For breadth-first traversing */ + struct yy_repairs *next; + YYPTRDIFF_T stack_length; + /* Bottom of states */ + yy_state_t *states; + /* Top of states */ + yy_state_t *state; + /* repair length */ + int repair_length; + /* */ + struct yy_repairs *prev_repair; + struct yy_repair repair; +}; +typedef struct yy_repairs yy_repairs; + +struct yy_term { + yysymbol_kind_t kind; + YYSTYPE value; + YYLTYPE location; +}; +typedef struct yy_term yy_term; + +struct yy_repair_terms { + int id; + int length; + yy_term terms[]; +}; +typedef struct yy_repair_terms yy_repair_terms; + +static void +yy_error_token_initialize (yysymbol_kind_t yykind, YYSTYPE * const yyvaluep, YYLTYPE * const yylocationp<%= output.user_formals %>) +{ + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +switch (yykind) + { +<%= output.symbol_actions_for_error_token -%> + default: + break; + } + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + +static yy_repair_terms * +yy_create_repair_terms(yy_repairs *reps<%= output.user_formals %>) +{ + yy_repairs *r = reps; + yy_repair_terms *rep_terms; + int count = 0; + + while (r->prev_repair) + { + count++; + r = r->prev_repair; + } + + rep_terms = (yy_repair_terms *) YYMALLOC (sizeof (yy_repair_terms) + sizeof (yy_term) * count); + rep_terms->id = reps->id; + rep_terms->length = count; + + r = reps; + while (r->prev_repair) + { + rep_terms->terms[count-1].kind = r->repair.term; + count--; + r = r->prev_repair; + } + + return rep_terms; +} + +static void +yy_print_repairs(yy_repairs *reps<%= output.user_formals %>) +{ + yy_repairs *r = reps; + + YYDPRINTF ((stderr, + "id: %d, repair_length: %d, repair_state: %d, prev_repair_id: %d\n", + reps->id, reps->repair_length, *reps->state, reps->prev_repair->id)); + + while (r->prev_repair) + { + YYDPRINTF ((stderr, "%s ", yysymbol_name (r->repair.term))); + r = r->prev_repair; + } + + YYDPRINTF ((stderr, "\n")); +} + +static void +yy_print_repair_terms(yy_repair_terms *rep_terms<%= output.user_formals %>) +{ + for (int i = 0; i < rep_terms->length; i++) + YYDPRINTF ((stderr, "%s ", yysymbol_name (rep_terms->terms[i].kind))); + + YYDPRINTF ((stderr, "\n")); +} + +static void +yy_free_repairs(yy_repairs *reps<%= output.user_formals %>) +{ + while (reps) + { + yy_repairs *r = reps; + reps = reps->next; + YYFREE (r->states); + YYFREE (r); + } +} + +static int +yy_process_repairs(yy_repairs *reps, yysymbol_kind_t token) +{ + int yyn; + int yystate = *reps->state; + int yylen = 0; + yysymbol_kind_t yytoken = token; + + goto yyrecover_backup; + +yyrecover_newstate: + // TODO: check reps->stack_length + reps->state += 1; + *reps->state = (yy_state_t) yystate; + + +yyrecover_backup: + yyn = yypact[yystate]; + if (yypact_value_is_default (yyn)) + goto yyrecover_default; + + /* "Reading a token" */ + if (yytoken == YYSYMBOL_YYEMPTY) + return 1; + + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yyrecover_default; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yytable_value_is_error (yyn)) + goto yyrecover_errlab; + yyn = -yyn; + goto yyrecover_reduce; + } + + /* shift */ + yystate = yyn; + yytoken = YYSYMBOL_YYEMPTY; + goto yyrecover_newstate; + + +yyrecover_default: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyrecover_errlab; + goto yyrecover_reduce; + + +yyrecover_reduce: + yylen = yyr2[yyn]; + /* YYPOPSTACK */ + reps->state -= yylen; + yylen = 0; + + { + const int yylhs = yyr1[yyn] - YYNTOKENS; + const int yyi = yypgoto[yylhs] + *reps->state; + yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *reps->state + ? yytable[yyi] + : yydefgoto[yylhs]); + } + + goto yyrecover_newstate; + +yyrecover_errlab: + return 0; +} + +static yy_repair_terms * +yyrecover(yy_state_t *yyss, yy_state_t *yyssp, int yychar<%= output.user_formals %>) +{ + yysymbol_kind_t yytoken = YYTRANSLATE (yychar); + yy_repair_terms *rep_terms = YY_NULLPTR; + int count = 0; + + yy_repairs *head = (yy_repairs *) YYMALLOC (sizeof (yy_repairs)); + yy_repairs *current = head; + yy_repairs *tail = head; + YYPTRDIFF_T stack_length = yyssp - yyss + 1; + + head->id = count; + head->next = 0; + head->stack_length = stack_length; + head->states = (yy_state_t *) YYMALLOC (sizeof (yy_state_t) * (stack_length)); + head->state = head->states + (yyssp - yyss); + YYCOPY (head->states, yyss, stack_length); + head->repair_length = 0; + head->prev_repair = 0; + + stack_length = (stack_length * 2 > 100) ? (stack_length * 2) : 100; + count++; + + while (current) + { + int yystate = *current->state; + int yyn = yypact[yystate]; + /* See also: yypcontext_expected_tokens */ + if (!yypact_value_is_default (yyn)) + { + int yyxbegin = yyn < 0 ? -yyn : 0; + int yychecklim = YYLAST - yyn + 1; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; + int yyx; + for (yyx = yyxbegin; yyx < yyxend; ++yyx) + { + if (yyx != YYSYMBOL_YYerror) + { + if (current->repair_length + 1 > YYMAXREPAIR(<%= output.parse_param_name %>)) + continue; + + yy_repairs *reps = (yy_repairs *) YYMALLOC (sizeof (yy_repairs)); + reps->id = count; + reps->next = 0; + reps->stack_length = stack_length; + reps->states = (yy_state_t *) YYMALLOC (sizeof (yy_state_t) * (stack_length)); + reps->state = reps->states + (current->state - current->states); + YYCOPY (reps->states, current->states, current->state - current->states + 1); + reps->repair_length = current->repair_length + 1; + reps->prev_repair = current; + reps->repair.type = inserting; + reps->repair.term = (yysymbol_kind_t) yyx; + + /* Process PDA assuming next token is yyx */ + if (! yy_process_repairs (reps, (yysymbol_kind_t)yyx)) + { + YYFREE (reps); + continue; + } + + tail->next = reps; + tail = reps; + count++; + + if (yyx == yytoken) + { + rep_terms = yy_create_repair_terms (current<%= output.user_args %>); + YYDPRINTF ((stderr, "repair_terms found. id: %d, length: %d\n", rep_terms->id, rep_terms->length)); + yy_print_repairs (current<%= output.user_args %>); + yy_print_repair_terms (rep_terms<%= output.user_args %>); + + goto done; + } + + YYDPRINTF ((stderr, + "New repairs is enqueued. count: %d, yystate: %d, yyx: %d\n", + count, yystate, yyx)); + yy_print_repairs (reps<%= output.user_args %>); + } + } + } + + current = current->next; + } + +done: + + yy_free_repairs(head<%= output.user_args %>); + + if (!rep_terms) + { + YYDPRINTF ((stderr, "repair_terms not found\n")); + } + + return rep_terms; +} +<%- end -%> + + + +/*----------. +| yyparse. | +`----------*/ + +int +yyparse (<%= output.parse_param %>) +{ +<%# b4_declare_scanner_communication_variables -%> +/* Lookahead token kind. */ +int yychar; + + +/* The semantic value of the lookahead symbol. */ +/* Default value used for initialization, for pacifying older GCCs + or non-GCC compilers. */ +#ifdef __cplusplus +static const YYSTYPE yyval_default = {}; +(void) yyval_default; +#else +YY_INITIAL_VALUE (static const YYSTYPE yyval_default;) +#endif +YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); + +/* Location data for the lookahead symbol. */ +static const YYLTYPE yyloc_default +# if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL + = { 1, 1, 1, 1 } +# endif +; +YYLTYPE yylloc = yyloc_default; + +<%# b4_declare_parser_state_variables -%> + /* Number of syntax errors so far. */ + int yynerrs = 0; + YY_USE (yynerrs); /* Silence compiler warning. */ + + yy_state_fast_t yystate = 0; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus = 0; + + /* Refer to the stacks through separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* Their size. */ + YYPTRDIFF_T yystacksize = YYINITDEPTH; + + /* The state stack: array, bottom, top. */ + yy_state_t yyssa[YYINITDEPTH]; + yy_state_t *yyss = yyssa; + yy_state_t *yyssp = yyss; + + /* The semantic value stack: array, bottom, top. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs = yyvsa; + YYSTYPE *yyvsp = yyvs; + + /* The location stack: array, bottom, top. */ + YYLTYPE yylsa[YYINITDEPTH]; + YYLTYPE *yyls = yylsa; + YYLTYPE *yylsp = yyls; + + int yyn; + /* The return value of yyparse. */ + int yyresult; + /* Lookahead symbol kind. */ + yysymbol_kind_t yytoken = YYSYMBOL_YYEMPTY; + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + YYLTYPE yyloc; + + /* The locations where the error started and ended. */ + YYLTYPE yyerror_range[3]; +<%- if output.error_recovery -%> + yy_repair_terms *rep_terms = 0; + yy_term term_backup; + int rep_terms_index; + int yychar_backup; +<%- end -%> + + /* Buffer for error messages, and its allocated size. */ + char yymsgbuf[128]; + char *yymsg = yymsgbuf; + YYPTRDIFF_T yymsg_alloc = sizeof yymsgbuf; + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N), yylsp -= (N)) + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yychar = YYEMPTY; /* Cause a token to be read. */ + + +<%# b4_user_initial_action -%> +<%= output.user_initial_action("/* User initialization code. */") %> +#line [@oline@] [@ofile@] + + yylsp[0] = yylloc; + goto yysetstate; + + +/*------------------------------------------------------------. +| yynewstate -- push a new state, which is found in yystate. | +`------------------------------------------------------------*/ +yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; + + +/*--------------------------------------------------------------------. +| yysetstate -- set current state (the top of the stack) to yystate. | +`--------------------------------------------------------------------*/ +yysetstate: + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + YY_ASSERT (0 <= yystate && yystate < YYNSTATES); + YY_IGNORE_USELESS_CAST_BEGIN + *yyssp = YY_CAST (yy_state_t, yystate); + YY_IGNORE_USELESS_CAST_END + YY_STACK_PRINT (yyss, yyssp<%= output.user_args %>); + + if (yyss + yystacksize - 1 <= yyssp) +#if !defined yyoverflow && !defined YYSTACK_RELOCATE + YYNOMEM; +#else + { + /* Get the current used size of the three stacks, in elements. */ + YYPTRDIFF_T yysize = yyssp - yyss + 1; + +# if defined yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + yy_state_t *yyss1 = yyss; + YYSTYPE *yyvs1 = yyvs; + YYLTYPE *yyls1 = yyls; + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * YYSIZEOF (*yyssp), + &yyvs1, yysize * YYSIZEOF (*yyvsp), + &yyls1, yysize * YYSIZEOF (*yylsp), + &yystacksize); + yyss = yyss1; + yyvs = yyvs1; + yyls = yyls1; + } +# else /* defined YYSTACK_RELOCATE */ + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + YYNOMEM; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + yy_state_t *yyss1 = yyss; + union yyalloc *yyptr = + YY_CAST (union yyalloc *, + YYSTACK_ALLOC (YY_CAST (YYSIZE_T, YYSTACK_BYTES (yystacksize)))); + if (! yyptr) + YYNOMEM; + YYSTACK_RELOCATE (yyss_alloc, yyss); + YYSTACK_RELOCATE (yyvs_alloc, yyvs); + YYSTACK_RELOCATE (yyls_alloc, yyls); +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + yylsp = yyls + yysize - 1; + + YY_IGNORE_USELESS_CAST_BEGIN + YYDPRINTF ((stderr, "Stack size increased to %ld\n", + YY_CAST (long, yystacksize))); + YY_IGNORE_USELESS_CAST_END + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } +#endif /* !defined yyoverflow && !defined YYSTACK_RELOCATE */ + + + if (yystate == YYFINAL) + YYACCEPT; + + goto yybackup; + + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + /* Do appropriate processing given the current state. Read a + lookahead token if we need one and don't already have one. */ + + /* First try to decide what to do without reference to lookahead token. */ + yyn = yypact[yystate]; + if (yypact_value_is_default (yyn)) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + +<%- if output.error_recovery -%> + if (YYERROR_RECOVERY_ENABLED(<%= output.parse_param_name %>)) + { + if (yychar == YYEMPTY && rep_terms) + { + + if (rep_terms_index < rep_terms->length) + { + YYDPRINTF ((stderr, "An error recovery token is used\n")); + yy_term term = rep_terms->terms[rep_terms_index]; + yytoken = term.kind; + yylval = term.value; + yylloc = term.location; + yychar = yytranslate_inverted[yytoken]; + YY_SYMBOL_PRINT ("Next error recovery token is", yytoken, &yylval, &yylloc<%= output.user_args %>); + rep_terms_index++; + } + else + { + YYDPRINTF ((stderr, "Error recovery is completed\n")); + yytoken = term_backup.kind; + yylval = term_backup.value; + yylloc = term_backup.location; + yychar = yychar_backup; + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc<%= output.user_args %>); + + YYFREE (rep_terms); + rep_terms = 0; + yychar_backup = 0; + } + } + } +<%- end -%> + /* YYCHAR is either empty, or end-of-input, or a valid lookahead. */ + if (yychar == YYEMPTY) + { + YYDPRINTF ((stderr, "Reading a token\n")); + yychar = yylex <%= output.yylex_formals %>; + } + + if (yychar <= <%= output.eof_symbol.id.s_value %>) + { + yychar = <%= output.eof_symbol.id.s_value %>; + yytoken = <%= output.eof_symbol.enum_name %>; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else if (yychar == <%= output.error_symbol.id.s_value %>) + { + /* The scanner already issued an error message, process directly + to error recovery. But do not keep the error token as + lookahead, it is too special and may lead us to an endless + loop in error recovery. */ + yychar = <%= output.undef_symbol.id.s_value %>; + yytoken = <%= output.error_symbol.enum_name %>; + yyerror_range[1] = yylloc; + goto yyerrlab1; + } + else + { + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc<%= output.user_args %>); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yytable_value_is_error (yyn)) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + /* Shift the lookahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc<%= output.user_args %>); + yystate = yyn; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + *++yylsp = yylloc; +<%= output.after_shift_function("/* %after-shift code. */") %> + + /* Discard the shifted token. */ + yychar = YYEMPTY; + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + '$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; +<%= output.before_reduce_function("/* %before-reduce function. */") %> + + /* Default location. */ + YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen); + yyerror_range[1] = yyloc; + YY_REDUCE_PRINT (yyn<%= output.user_args %>); + switch (yyn) + { +<%= output.user_actions -%> + + default: break; + } + /* User semantic actions sometimes alter yychar, and that requires + that yytoken be updated with the new translation. We take the + approach of translating immediately before every use of yytoken. + One alternative is translating here after every semantic action, + but that translation would be missed if the semantic action invokes + YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or + if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an + incorrect destructor might then be invoked immediately. In the + case of YYERROR or YYBACKUP, subsequent parser actions might lead + to an incorrect destructor call or verbose syntax error message + before the lookahead is translated. */ + YY_SYMBOL_PRINT ("-> $$ =", YY_CAST (yysymbol_kind_t, yyr1[yyn]), &yyval, &yyloc<%= output.user_args %>); + + YYPOPSTACK (yylen); +<%= output.after_reduce_function("/* %after-reduce function. */") %> + yylen = 0; + + *++yyvsp = yyval; + *++yylsp = yyloc; + + /* Now 'shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + { + const int yylhs = yyr1[yyn] - YYNTOKENS; + const int yyi = yypgoto[yylhs] + *yyssp; + yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp + ? yytable[yyi] + : yydefgoto[yylhs]); + } + + goto yynewstate; + + +/*--------------------------------------. +| yyerrlab -- here on detecting error. | +`--------------------------------------*/ +yyerrlab: + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = yychar == YYEMPTY ? YYSYMBOL_YYEMPTY : YYTRANSLATE (yychar); + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; + { + yypcontext_t yyctx + = {yyssp, yytoken, &yylloc}; + char const *yymsgp = YY_("syntax error"); + int yysyntax_error_status; + yysyntax_error_status = yysyntax_error (&yymsg_alloc, &yymsg, &yyctx<%= output.user_args %>); + if (yysyntax_error_status == 0) + yymsgp = yymsg; + else if (yysyntax_error_status == -1) + { + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + yymsg = YY_CAST (char *, + YYSTACK_ALLOC (YY_CAST (YYSIZE_T, yymsg_alloc))); + if (yymsg) + { + yysyntax_error_status + = yysyntax_error (&yymsg_alloc, &yymsg, &yyctx<%= output.user_args %>); + yymsgp = yymsg; + } + else + { + yymsg = yymsgbuf; + yymsg_alloc = sizeof yymsgbuf; + yysyntax_error_status = YYENOMEM; + } + } + yyerror (<%= output.yyerror_args %>, yymsgp); + if (yysyntax_error_status == YYENOMEM) + YYNOMEM; + } + } + + yyerror_range[1] = yylloc; + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + if (yychar <= <%= output.eof_symbol.id.s_value %>) + { + /* Return failure if at end of input. */ + if (yychar == <%= output.eof_symbol.id.s_value %>) + YYABORT; + } + else + { + yydestruct ("Error: discarding", + yytoken, &yylval, &yylloc<%= output.user_args %>); + yychar = YYEMPTY; + } + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab1; + + +/*---------------------------------------------------. +| yyerrorlab -- error raised explicitly by YYERROR. | +`---------------------------------------------------*/ +yyerrorlab: + /* Pacify compilers when the user code never invokes YYERROR and the + label yyerrorlab therefore never appears in user code. */ + if (0) + YYERROR; + ++yynerrs; + + /* Do not reclaim the symbols of the rule whose action triggered + this YYERROR. */ + YYPOPSTACK (yylen); +<%= output.after_pop_stack_function("yylen", "/* %after-pop-stack function. */") %> + yylen = 0; + YY_STACK_PRINT (yyss, yyssp<%= output.user_args %>); + yystate = *yyssp; + goto yyerrlab1; + + +/*-------------------------------------------------------------. +| yyerrlab1 -- common code for both syntax error and YYERROR. | +`-------------------------------------------------------------*/ +yyerrlab1: +<%- if output.error_recovery -%> + if (YYERROR_RECOVERY_ENABLED(<%= output.parse_param_name %>)) + { + rep_terms = yyrecover (yyss, yyssp, yychar<%= output.user_args %>); + if (rep_terms) + { + for (int i = 0; i < rep_terms->length; i++) + { + yy_term *term = &rep_terms->terms[i]; + yy_error_token_initialize (term->kind, &term->value, &term->location<%= output.user_args %>); + } + + yychar_backup = yychar; + /* Can be packed into (the tail of) rep_terms? */ + term_backup.kind = yytoken; + term_backup.value = yylval; + term_backup.location = yylloc; + rep_terms_index = 0; + yychar = YYEMPTY; + + goto yybackup; + } + } +<%- end -%> + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + /* Pop stack until we find a state that shifts the error token. */ + for (;;) + { + yyn = yypact[yystate]; + if (!yypact_value_is_default (yyn)) + { + yyn += YYSYMBOL_YYerror; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYSYMBOL_YYerror) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; + + yyerror_range[1] = *yylsp; + yydestruct ("Error: popping", + YY_ACCESSING_SYMBOL (yystate), yyvsp, yylsp<%= output.user_args %>); + YYPOPSTACK (1); +<%= output.after_pop_stack_function(1, "/* %after-pop-stack function. */") %> + yystate = *yyssp; + YY_STACK_PRINT (yyss, yyssp<%= output.user_args %>); + } + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + yyerror_range[2] = yylloc; + ++yylsp; + YYLLOC_DEFAULT (*yylsp, yyerror_range, 2); + + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", YY_ACCESSING_SYMBOL (yyn), yyvsp, yylsp<%= output.user_args %>); +<%= output.after_shift_error_token_function("/* %after-shift-error-token code. */") %> + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturnlab; + + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturnlab; + + +/*-----------------------------------------------------------. +| yyexhaustedlab -- YYNOMEM (memory exhaustion) comes here. | +`-----------------------------------------------------------*/ +yyexhaustedlab: + yyerror (<%= output.yyerror_args %>, YY_("memory exhausted")); + yyresult = 2; + goto yyreturnlab; + + +/*----------------------------------------------------------. +| yyreturnlab -- parsing is finished, clean up and return. | +`----------------------------------------------------------*/ +yyreturnlab: + if (yychar != YYEMPTY) + { + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = YYTRANSLATE (yychar); + yydestruct ("Cleanup: discarding lookahead", + yytoken, &yylval, &yylloc<%= output.user_args %>); + } + /* Do not reclaim the symbols of the rule whose action triggered + this YYABORT or YYACCEPT. */ + YYPOPSTACK (yylen); + YY_STACK_PRINT (yyss, yyssp<%= output.user_args %>); + while (yyssp != yyss) + { + yydestruct ("Cleanup: popping", + YY_ACCESSING_SYMBOL (+*yyssp), yyvsp, yylsp<%= output.user_args %>); + YYPOPSTACK (1); + } +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + return yyresult; +} + +<%# b4_percent_code_get([[epilogue]]) -%> +<%- if output.aux.epilogue -%> +#line <%= output.aux.epilogue_first_lineno - 1 %> "<%= output.grammar_file_path %>" +<%= output.aux.epilogue -%> +<%- end -%> + diff --git a/tools/lrama/template/bison/yacc.h b/tools/lrama/template/bison/yacc.h new file mode 100644 index 0000000000..848dbf5961 --- /dev/null +++ b/tools/lrama/template/bison/yacc.h @@ -0,0 +1,40 @@ +<%# b4_generated_by -%> +/* A Bison parser, made by Lrama <%= Lrama::VERSION %>. */ + +<%# b4_copyright -%> +/* Bison interface for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +<%# b4_disclaimer -%> +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ +<%= output.render_partial("bison/_yacc.h") %>